##// END OF EJS Templates
revlog: skip opener options to pass sparse reading values...
marmoute -
r51937:e2941c39 default
parent child Browse files
Show More
@@ -1,4047 +1,4048 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 # coding: utf-8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 import functools
11 11 import os
12 12 import random
13 13 import re
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from concurrent import futures
19 19 from typing import (
20 20 Optional,
21 21 )
22 22
23 23 from .i18n import _
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullrev,
28 28 sha1nodeconstants,
29 29 short,
30 30 )
31 31 from . import (
32 32 bookmarks,
33 33 branchmap,
34 34 bundle2,
35 35 bundlecaches,
36 36 changegroup,
37 37 color,
38 38 commit,
39 39 context,
40 40 dirstate,
41 41 discovery,
42 42 encoding,
43 43 error,
44 44 exchange,
45 45 extensions,
46 46 filelog,
47 47 hook,
48 48 lock as lockmod,
49 49 match as matchmod,
50 50 mergestate as mergestatemod,
51 51 mergeutil,
52 52 namespaces,
53 53 narrowspec,
54 54 obsolete,
55 55 pathutil,
56 56 phases,
57 57 policy,
58 58 pushkey,
59 59 pycompat,
60 60 rcutil,
61 61 repoview,
62 62 requirements as requirementsmod,
63 63 revlog,
64 64 revset,
65 65 revsetlang,
66 66 scmutil,
67 67 sparse,
68 68 store as storemod,
69 69 subrepoutil,
70 70 tags as tagsmod,
71 71 transaction,
72 72 txnutil,
73 73 util,
74 74 vfs as vfsmod,
75 75 wireprototypes,
76 76 )
77 77
78 78 from .interfaces import (
79 79 repository,
80 80 util as interfaceutil,
81 81 )
82 82
83 83 from .utils import (
84 84 hashutil,
85 85 procutil,
86 86 stringutil,
87 87 urlutil,
88 88 )
89 89
90 90 from .revlogutils import (
91 91 concurrency_checker as revlogchecker,
92 92 constants as revlogconst,
93 93 sidedata as sidedatamod,
94 94 )
95 95
96 96 release = lockmod.release
97 97 urlerr = util.urlerr
98 98 urlreq = util.urlreq
99 99
100 100 RE_SKIP_DIRSTATE_ROLLBACK = re.compile(
101 101 b"^((dirstate|narrowspec.dirstate).*|branch$)"
102 102 )
103 103
104 104 # set of (path, vfs-location) tuples. vfs-location is:
105 105 # - 'plain for vfs relative paths
106 106 # - '' for svfs relative paths
107 107 _cachedfiles = set()
108 108
109 109
110 110 class _basefilecache(scmutil.filecache):
111 111 """All filecache usage on repo are done for logic that should be unfiltered"""
112 112
113 113 def __get__(self, repo, type=None):
114 114 if repo is None:
115 115 return self
116 116 # proxy to unfiltered __dict__ since filtered repo has no entry
117 117 unfi = repo.unfiltered()
118 118 try:
119 119 return unfi.__dict__[self.sname]
120 120 except KeyError:
121 121 pass
122 122 return super(_basefilecache, self).__get__(unfi, type)
123 123
124 124 def set(self, repo, value):
125 125 return super(_basefilecache, self).set(repo.unfiltered(), value)
126 126
127 127
128 128 class repofilecache(_basefilecache):
129 129 """filecache for files in .hg but outside of .hg/store"""
130 130
131 131 def __init__(self, *paths):
132 132 super(repofilecache, self).__init__(*paths)
133 133 for path in paths:
134 134 _cachedfiles.add((path, b'plain'))
135 135
136 136 def join(self, obj, fname):
137 137 return obj.vfs.join(fname)
138 138
139 139
140 140 class storecache(_basefilecache):
141 141 """filecache for files in the store"""
142 142
143 143 def __init__(self, *paths):
144 144 super(storecache, self).__init__(*paths)
145 145 for path in paths:
146 146 _cachedfiles.add((path, b''))
147 147
148 148 def join(self, obj, fname):
149 149 return obj.sjoin(fname)
150 150
151 151
152 152 class changelogcache(storecache):
153 153 """filecache for the changelog"""
154 154
155 155 def __init__(self):
156 156 super(changelogcache, self).__init__()
157 157 _cachedfiles.add((b'00changelog.i', b''))
158 158 _cachedfiles.add((b'00changelog.n', b''))
159 159
160 160 def tracked_paths(self, obj):
161 161 paths = [self.join(obj, b'00changelog.i')]
162 162 if obj.store.opener.options.get(b'persistent-nodemap', False):
163 163 paths.append(self.join(obj, b'00changelog.n'))
164 164 return paths
165 165
166 166
167 167 class manifestlogcache(storecache):
168 168 """filecache for the manifestlog"""
169 169
170 170 def __init__(self):
171 171 super(manifestlogcache, self).__init__()
172 172 _cachedfiles.add((b'00manifest.i', b''))
173 173 _cachedfiles.add((b'00manifest.n', b''))
174 174
175 175 def tracked_paths(self, obj):
176 176 paths = [self.join(obj, b'00manifest.i')]
177 177 if obj.store.opener.options.get(b'persistent-nodemap', False):
178 178 paths.append(self.join(obj, b'00manifest.n'))
179 179 return paths
180 180
181 181
182 182 class mixedrepostorecache(_basefilecache):
183 183 """filecache for a mix files in .hg/store and outside"""
184 184
185 185 def __init__(self, *pathsandlocations):
186 186 # scmutil.filecache only uses the path for passing back into our
187 187 # join(), so we can safely pass a list of paths and locations
188 188 super(mixedrepostorecache, self).__init__(*pathsandlocations)
189 189 _cachedfiles.update(pathsandlocations)
190 190
191 191 def join(self, obj, fnameandlocation):
192 192 fname, location = fnameandlocation
193 193 if location == b'plain':
194 194 return obj.vfs.join(fname)
195 195 else:
196 196 if location != b'':
197 197 raise error.ProgrammingError(
198 198 b'unexpected location: %s' % location
199 199 )
200 200 return obj.sjoin(fname)
201 201
202 202
203 203 def isfilecached(repo, name):
204 204 """check if a repo has already cached "name" filecache-ed property
205 205
206 206 This returns (cachedobj-or-None, iscached) tuple.
207 207 """
208 208 cacheentry = repo.unfiltered()._filecache.get(name, None)
209 209 if not cacheentry:
210 210 return None, False
211 211 return cacheentry.obj, True
212 212
213 213
214 214 class unfilteredpropertycache(util.propertycache):
215 215 """propertycache that apply to unfiltered repo only"""
216 216
217 217 def __get__(self, repo, type=None):
218 218 unfi = repo.unfiltered()
219 219 if unfi is repo:
220 220 return super(unfilteredpropertycache, self).__get__(unfi)
221 221 return getattr(unfi, self.name)
222 222
223 223
224 224 class filteredpropertycache(util.propertycache):
225 225 """propertycache that must take filtering in account"""
226 226
227 227 def cachevalue(self, obj, value):
228 228 object.__setattr__(obj, self.name, value)
229 229
230 230
231 231 def hasunfilteredcache(repo, name):
232 232 """check if a repo has an unfilteredpropertycache value for <name>"""
233 233 return name in vars(repo.unfiltered())
234 234
235 235
236 236 def unfilteredmethod(orig):
237 237 """decorate method that always need to be run on unfiltered version"""
238 238
239 239 @functools.wraps(orig)
240 240 def wrapper(repo, *args, **kwargs):
241 241 return orig(repo.unfiltered(), *args, **kwargs)
242 242
243 243 return wrapper
244 244
245 245
246 246 moderncaps = {
247 247 b'lookup',
248 248 b'branchmap',
249 249 b'pushkey',
250 250 b'known',
251 251 b'getbundle',
252 252 b'unbundle',
253 253 }
254 254 legacycaps = moderncaps.union({b'changegroupsubset'})
255 255
256 256
257 257 @interfaceutil.implementer(repository.ipeercommandexecutor)
258 258 class localcommandexecutor:
259 259 def __init__(self, peer):
260 260 self._peer = peer
261 261 self._sent = False
262 262 self._closed = False
263 263
264 264 def __enter__(self):
265 265 return self
266 266
267 267 def __exit__(self, exctype, excvalue, exctb):
268 268 self.close()
269 269
270 270 def callcommand(self, command, args):
271 271 if self._sent:
272 272 raise error.ProgrammingError(
273 273 b'callcommand() cannot be used after sendcommands()'
274 274 )
275 275
276 276 if self._closed:
277 277 raise error.ProgrammingError(
278 278 b'callcommand() cannot be used after close()'
279 279 )
280 280
281 281 # We don't need to support anything fancy. Just call the named
282 282 # method on the peer and return a resolved future.
283 283 fn = getattr(self._peer, pycompat.sysstr(command))
284 284
285 285 f = futures.Future()
286 286
287 287 try:
288 288 result = fn(**pycompat.strkwargs(args))
289 289 except Exception:
290 290 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
291 291 else:
292 292 f.set_result(result)
293 293
294 294 return f
295 295
296 296 def sendcommands(self):
297 297 self._sent = True
298 298
299 299 def close(self):
300 300 self._closed = True
301 301
302 302
303 303 @interfaceutil.implementer(repository.ipeercommands)
304 304 class localpeer(repository.peer):
305 305 '''peer for a local repo; reflects only the most recent API'''
306 306
307 307 def __init__(self, repo, caps=None, path=None, remotehidden=False):
308 308 super(localpeer, self).__init__(
309 309 repo.ui, path=path, remotehidden=remotehidden
310 310 )
311 311
312 312 if caps is None:
313 313 caps = moderncaps.copy()
314 314 if remotehidden:
315 315 self._repo = repo.filtered(b'served.hidden')
316 316 else:
317 317 self._repo = repo.filtered(b'served')
318 318 if repo._wanted_sidedata:
319 319 formatted = bundle2.format_remote_wanted_sidedata(repo)
320 320 caps.add(b'exp-wanted-sidedata=' + formatted)
321 321
322 322 self._caps = repo._restrictcapabilities(caps)
323 323
324 324 # Begin of _basepeer interface.
325 325
326 326 def url(self):
327 327 return self._repo.url()
328 328
329 329 def local(self):
330 330 return self._repo
331 331
332 332 def canpush(self):
333 333 return True
334 334
335 335 def close(self):
336 336 self._repo.close()
337 337
338 338 # End of _basepeer interface.
339 339
340 340 # Begin of _basewirecommands interface.
341 341
342 342 def branchmap(self):
343 343 return self._repo.branchmap()
344 344
345 345 def capabilities(self):
346 346 return self._caps
347 347
348 348 def get_cached_bundle_inline(self, path):
349 349 # not needed with local peer
350 350 raise NotImplementedError
351 351
352 352 def clonebundles(self):
353 353 return bundlecaches.get_manifest(self._repo)
354 354
355 355 def debugwireargs(self, one, two, three=None, four=None, five=None):
356 356 """Used to test argument passing over the wire"""
357 357 return b"%s %s %s %s %s" % (
358 358 one,
359 359 two,
360 360 pycompat.bytestr(three),
361 361 pycompat.bytestr(four),
362 362 pycompat.bytestr(five),
363 363 )
364 364
365 365 def getbundle(
366 366 self,
367 367 source,
368 368 heads=None,
369 369 common=None,
370 370 bundlecaps=None,
371 371 remote_sidedata=None,
372 372 **kwargs
373 373 ):
374 374 chunks = exchange.getbundlechunks(
375 375 self._repo,
376 376 source,
377 377 heads=heads,
378 378 common=common,
379 379 bundlecaps=bundlecaps,
380 380 remote_sidedata=remote_sidedata,
381 381 **kwargs
382 382 )[1]
383 383 cb = util.chunkbuffer(chunks)
384 384
385 385 if exchange.bundle2requested(bundlecaps):
386 386 # When requesting a bundle2, getbundle returns a stream to make the
387 387 # wire level function happier. We need to build a proper object
388 388 # from it in local peer.
389 389 return bundle2.getunbundler(self.ui, cb)
390 390 else:
391 391 return changegroup.getunbundler(b'01', cb, None)
392 392
393 393 def heads(self):
394 394 return self._repo.heads()
395 395
396 396 def known(self, nodes):
397 397 return self._repo.known(nodes)
398 398
399 399 def listkeys(self, namespace):
400 400 return self._repo.listkeys(namespace)
401 401
402 402 def lookup(self, key):
403 403 return self._repo.lookup(key)
404 404
405 405 def pushkey(self, namespace, key, old, new):
406 406 return self._repo.pushkey(namespace, key, old, new)
407 407
408 408 def stream_out(self):
409 409 raise error.Abort(_(b'cannot perform stream clone against local peer'))
410 410
411 411 def unbundle(self, bundle, heads, url):
412 412 """apply a bundle on a repo
413 413
414 414 This function handles the repo locking itself."""
415 415 try:
416 416 try:
417 417 bundle = exchange.readbundle(self.ui, bundle, None)
418 418 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
419 419 if hasattr(ret, 'getchunks'):
420 420 # This is a bundle20 object, turn it into an unbundler.
421 421 # This little dance should be dropped eventually when the
422 422 # API is finally improved.
423 423 stream = util.chunkbuffer(ret.getchunks())
424 424 ret = bundle2.getunbundler(self.ui, stream)
425 425 return ret
426 426 except Exception as exc:
427 427 # If the exception contains output salvaged from a bundle2
428 428 # reply, we need to make sure it is printed before continuing
429 429 # to fail. So we build a bundle2 with such output and consume
430 430 # it directly.
431 431 #
432 432 # This is not very elegant but allows a "simple" solution for
433 433 # issue4594
434 434 output = getattr(exc, '_bundle2salvagedoutput', ())
435 435 if output:
436 436 bundler = bundle2.bundle20(self._repo.ui)
437 437 for out in output:
438 438 bundler.addpart(out)
439 439 stream = util.chunkbuffer(bundler.getchunks())
440 440 b = bundle2.getunbundler(self.ui, stream)
441 441 bundle2.processbundle(self._repo, b)
442 442 raise
443 443 except error.PushRaced as exc:
444 444 raise error.ResponseError(
445 445 _(b'push failed:'), stringutil.forcebytestr(exc)
446 446 )
447 447
448 448 # End of _basewirecommands interface.
449 449
450 450 # Begin of peer interface.
451 451
452 452 def commandexecutor(self):
453 453 return localcommandexecutor(self)
454 454
455 455 # End of peer interface.
456 456
457 457
458 458 @interfaceutil.implementer(repository.ipeerlegacycommands)
459 459 class locallegacypeer(localpeer):
460 460 """peer extension which implements legacy methods too; used for tests with
461 461 restricted capabilities"""
462 462
463 463 def __init__(self, repo, path=None, remotehidden=False):
464 464 super(locallegacypeer, self).__init__(
465 465 repo, caps=legacycaps, path=path, remotehidden=remotehidden
466 466 )
467 467
468 468 # Begin of baselegacywirecommands interface.
469 469
470 470 def between(self, pairs):
471 471 return self._repo.between(pairs)
472 472
473 473 def branches(self, nodes):
474 474 return self._repo.branches(nodes)
475 475
476 476 def changegroup(self, nodes, source):
477 477 outgoing = discovery.outgoing(
478 478 self._repo, missingroots=nodes, ancestorsof=self._repo.heads()
479 479 )
480 480 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
481 481
482 482 def changegroupsubset(self, bases, heads, source):
483 483 outgoing = discovery.outgoing(
484 484 self._repo, missingroots=bases, ancestorsof=heads
485 485 )
486 486 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
487 487
488 488 # End of baselegacywirecommands interface.
489 489
490 490
491 491 # Functions receiving (ui, features) that extensions can register to impact
492 492 # the ability to load repositories with custom requirements. Only
493 493 # functions defined in loaded extensions are called.
494 494 #
495 495 # The function receives a set of requirement strings that the repository
496 496 # is capable of opening. Functions will typically add elements to the
497 497 # set to reflect that the extension knows how to handle that requirements.
498 498 featuresetupfuncs = set()
499 499
500 500
501 501 def _getsharedvfs(hgvfs, requirements):
502 502 """returns the vfs object pointing to root of shared source
503 503 repo for a shared repository
504 504
505 505 hgvfs is vfs pointing at .hg/ of current repo (shared one)
506 506 requirements is a set of requirements of current repo (shared one)
507 507 """
508 508 # The ``shared`` or ``relshared`` requirements indicate the
509 509 # store lives in the path contained in the ``.hg/sharedpath`` file.
510 510 # This is an absolute path for ``shared`` and relative to
511 511 # ``.hg/`` for ``relshared``.
512 512 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
513 513 if requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements:
514 514 sharedpath = util.normpath(hgvfs.join(sharedpath))
515 515
516 516 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
517 517
518 518 if not sharedvfs.exists():
519 519 raise error.RepoError(
520 520 _(b'.hg/sharedpath points to nonexistent directory %s')
521 521 % sharedvfs.base
522 522 )
523 523 return sharedvfs
524 524
525 525
526 526 def _readrequires(vfs, allowmissing):
527 527 """reads the require file present at root of this vfs
528 528 and return a set of requirements
529 529
530 530 If allowmissing is True, we suppress FileNotFoundError if raised"""
531 531 # requires file contains a newline-delimited list of
532 532 # features/capabilities the opener (us) must have in order to use
533 533 # the repository. This file was introduced in Mercurial 0.9.2,
534 534 # which means very old repositories may not have one. We assume
535 535 # a missing file translates to no requirements.
536 536 read = vfs.tryread if allowmissing else vfs.read
537 537 return set(read(b'requires').splitlines())
538 538
539 539
540 540 def makelocalrepository(baseui, path: bytes, intents=None):
541 541 """Create a local repository object.
542 542
543 543 Given arguments needed to construct a local repository, this function
544 544 performs various early repository loading functionality (such as
545 545 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
546 546 the repository can be opened, derives a type suitable for representing
547 547 that repository, and returns an instance of it.
548 548
549 549 The returned object conforms to the ``repository.completelocalrepository``
550 550 interface.
551 551
552 552 The repository type is derived by calling a series of factory functions
553 553 for each aspect/interface of the final repository. These are defined by
554 554 ``REPO_INTERFACES``.
555 555
556 556 Each factory function is called to produce a type implementing a specific
557 557 interface. The cumulative list of returned types will be combined into a
558 558 new type and that type will be instantiated to represent the local
559 559 repository.
560 560
561 561 The factory functions each receive various state that may be consulted
562 562 as part of deriving a type.
563 563
564 564 Extensions should wrap these factory functions to customize repository type
565 565 creation. Note that an extension's wrapped function may be called even if
566 566 that extension is not loaded for the repo being constructed. Extensions
567 567 should check if their ``__name__`` appears in the
568 568 ``extensionmodulenames`` set passed to the factory function and no-op if
569 569 not.
570 570 """
571 571 ui = baseui.copy()
572 572 # Prevent copying repo configuration.
573 573 ui.copy = baseui.copy
574 574
575 575 # Working directory VFS rooted at repository root.
576 576 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
577 577
578 578 # Main VFS for .hg/ directory.
579 579 hgpath = wdirvfs.join(b'.hg')
580 580 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
581 581 # Whether this repository is shared one or not
582 582 shared = False
583 583 # If this repository is shared, vfs pointing to shared repo
584 584 sharedvfs = None
585 585
586 586 # The .hg/ path should exist and should be a directory. All other
587 587 # cases are errors.
588 588 if not hgvfs.isdir():
589 589 try:
590 590 hgvfs.stat()
591 591 except FileNotFoundError:
592 592 pass
593 593 except ValueError as e:
594 594 # Can be raised on Python 3.8 when path is invalid.
595 595 raise error.Abort(
596 596 _(b'invalid path %s: %s') % (path, stringutil.forcebytestr(e))
597 597 )
598 598
599 599 raise error.RepoError(_(b'repository %s not found') % path)
600 600
601 601 requirements = _readrequires(hgvfs, True)
602 602 shared = (
603 603 requirementsmod.SHARED_REQUIREMENT in requirements
604 604 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
605 605 )
606 606 storevfs = None
607 607 if shared:
608 608 # This is a shared repo
609 609 sharedvfs = _getsharedvfs(hgvfs, requirements)
610 610 storevfs = vfsmod.vfs(sharedvfs.join(b'store'))
611 611 else:
612 612 storevfs = vfsmod.vfs(hgvfs.join(b'store'))
613 613
614 614 # if .hg/requires contains the sharesafe requirement, it means
615 615 # there exists a `.hg/store/requires` too and we should read it
616 616 # NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
617 617 # is present. We never write SHARESAFE_REQUIREMENT for a repo if store
618 618 # is not present, refer checkrequirementscompat() for that
619 619 #
620 620 # However, if SHARESAFE_REQUIREMENT is not present, it means that the
621 621 # repository was shared the old way. We check the share source .hg/requires
622 622 # for SHARESAFE_REQUIREMENT to detect whether the current repository needs
623 623 # to be reshared
624 624 hint = _(b"see `hg help config.format.use-share-safe` for more information")
625 625 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
626 626 if (
627 627 shared
628 628 and requirementsmod.SHARESAFE_REQUIREMENT
629 629 not in _readrequires(sharedvfs, True)
630 630 ):
631 631 mismatch_warn = ui.configbool(
632 632 b'share', b'safe-mismatch.source-not-safe.warn'
633 633 )
634 634 mismatch_config = ui.config(
635 635 b'share', b'safe-mismatch.source-not-safe'
636 636 )
637 637 mismatch_verbose_upgrade = ui.configbool(
638 638 b'share', b'safe-mismatch.source-not-safe:verbose-upgrade'
639 639 )
640 640 if mismatch_config in (
641 641 b'downgrade-allow',
642 642 b'allow',
643 643 b'downgrade-abort',
644 644 ):
645 645 # prevent cyclic import localrepo -> upgrade -> localrepo
646 646 from . import upgrade
647 647
648 648 upgrade.downgrade_share_to_non_safe(
649 649 ui,
650 650 hgvfs,
651 651 sharedvfs,
652 652 requirements,
653 653 mismatch_config,
654 654 mismatch_warn,
655 655 mismatch_verbose_upgrade,
656 656 )
657 657 elif mismatch_config == b'abort':
658 658 raise error.Abort(
659 659 _(b"share source does not support share-safe requirement"),
660 660 hint=hint,
661 661 )
662 662 else:
663 663 raise error.Abort(
664 664 _(
665 665 b"share-safe mismatch with source.\nUnrecognized"
666 666 b" value '%s' of `share.safe-mismatch.source-not-safe`"
667 667 b" set."
668 668 )
669 669 % mismatch_config,
670 670 hint=hint,
671 671 )
672 672 else:
673 673 requirements |= _readrequires(storevfs, False)
674 674 elif shared:
675 675 sourcerequires = _readrequires(sharedvfs, False)
676 676 if requirementsmod.SHARESAFE_REQUIREMENT in sourcerequires:
677 677 mismatch_config = ui.config(b'share', b'safe-mismatch.source-safe')
678 678 mismatch_warn = ui.configbool(
679 679 b'share', b'safe-mismatch.source-safe.warn'
680 680 )
681 681 mismatch_verbose_upgrade = ui.configbool(
682 682 b'share', b'safe-mismatch.source-safe:verbose-upgrade'
683 683 )
684 684 if mismatch_config in (
685 685 b'upgrade-allow',
686 686 b'allow',
687 687 b'upgrade-abort',
688 688 ):
689 689 # prevent cyclic import localrepo -> upgrade -> localrepo
690 690 from . import upgrade
691 691
692 692 upgrade.upgrade_share_to_safe(
693 693 ui,
694 694 hgvfs,
695 695 storevfs,
696 696 requirements,
697 697 mismatch_config,
698 698 mismatch_warn,
699 699 mismatch_verbose_upgrade,
700 700 )
701 701 elif mismatch_config == b'abort':
702 702 raise error.Abort(
703 703 _(
704 704 b'version mismatch: source uses share-safe'
705 705 b' functionality while the current share does not'
706 706 ),
707 707 hint=hint,
708 708 )
709 709 else:
710 710 raise error.Abort(
711 711 _(
712 712 b"share-safe mismatch with source.\nUnrecognized"
713 713 b" value '%s' of `share.safe-mismatch.source-safe` set."
714 714 )
715 715 % mismatch_config,
716 716 hint=hint,
717 717 )
718 718
719 719 # The .hg/hgrc file may load extensions or contain config options
720 720 # that influence repository construction. Attempt to load it and
721 721 # process any new extensions that it may have pulled in.
722 722 if loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs):
723 723 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
724 724 extensions.loadall(ui)
725 725 extensions.populateui(ui)
726 726
727 727 # Set of module names of extensions loaded for this repository.
728 728 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
729 729
730 730 supportedrequirements = gathersupportedrequirements(ui)
731 731
732 732 # We first validate the requirements are known.
733 733 ensurerequirementsrecognized(requirements, supportedrequirements)
734 734
735 735 # Then we validate that the known set is reasonable to use together.
736 736 ensurerequirementscompatible(ui, requirements)
737 737
738 738 # TODO there are unhandled edge cases related to opening repositories with
739 739 # shared storage. If storage is shared, we should also test for requirements
740 740 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
741 741 # that repo, as that repo may load extensions needed to open it. This is a
742 742 # bit complicated because we don't want the other hgrc to overwrite settings
743 743 # in this hgrc.
744 744 #
745 745 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
746 746 # file when sharing repos. But if a requirement is added after the share is
747 747 # performed, thereby introducing a new requirement for the opener, we may
748 748 # will not see that and could encounter a run-time error interacting with
749 749 # that shared store since it has an unknown-to-us requirement.
750 750
751 751 # At this point, we know we should be capable of opening the repository.
752 752 # Now get on with doing that.
753 753
754 754 features = set()
755 755
756 756 # The "store" part of the repository holds versioned data. How it is
757 757 # accessed is determined by various requirements. If `shared` or
758 758 # `relshared` requirements are present, this indicates current repository
759 759 # is a share and store exists in path mentioned in `.hg/sharedpath`
760 760 if shared:
761 761 storebasepath = sharedvfs.base
762 762 cachepath = sharedvfs.join(b'cache')
763 763 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
764 764 else:
765 765 storebasepath = hgvfs.base
766 766 cachepath = hgvfs.join(b'cache')
767 767 wcachepath = hgvfs.join(b'wcache')
768 768
769 769 # The store has changed over time and the exact layout is dictated by
770 770 # requirements. The store interface abstracts differences across all
771 771 # of them.
772 772 store = makestore(
773 773 requirements,
774 774 storebasepath,
775 775 lambda base: vfsmod.vfs(base, cacheaudited=True),
776 776 )
777 777 hgvfs.createmode = store.createmode
778 778
779 779 storevfs = store.vfs
780 780 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
781 781
782 782 if (
783 783 requirementsmod.REVLOGV2_REQUIREMENT in requirements
784 784 or requirementsmod.CHANGELOGV2_REQUIREMENT in requirements
785 785 ):
786 786 features.add(repository.REPO_FEATURE_SIDE_DATA)
787 787 # the revlogv2 docket introduced race condition that we need to fix
788 788 features.discard(repository.REPO_FEATURE_STREAM_CLONE)
789 789
790 790 # The cache vfs is used to manage cache files.
791 791 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
792 792 cachevfs.createmode = store.createmode
793 793 # The cache vfs is used to manage cache files related to the working copy
794 794 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
795 795 wcachevfs.createmode = store.createmode
796 796
797 797 # Now resolve the type for the repository object. We do this by repeatedly
798 798 # calling a factory function to produces types for specific aspects of the
799 799 # repo's operation. The aggregate returned types are used as base classes
800 800 # for a dynamically-derived type, which will represent our new repository.
801 801
802 802 bases = []
803 803 extrastate = {}
804 804
805 805 for iface, fn in REPO_INTERFACES:
806 806 # We pass all potentially useful state to give extensions tons of
807 807 # flexibility.
808 808 typ = fn()(
809 809 ui=ui,
810 810 intents=intents,
811 811 requirements=requirements,
812 812 features=features,
813 813 wdirvfs=wdirvfs,
814 814 hgvfs=hgvfs,
815 815 store=store,
816 816 storevfs=storevfs,
817 817 storeoptions=storevfs.options,
818 818 cachevfs=cachevfs,
819 819 wcachevfs=wcachevfs,
820 820 extensionmodulenames=extensionmodulenames,
821 821 extrastate=extrastate,
822 822 baseclasses=bases,
823 823 )
824 824
825 825 if not isinstance(typ, type):
826 826 raise error.ProgrammingError(
827 827 b'unable to construct type for %s' % iface
828 828 )
829 829
830 830 bases.append(typ)
831 831
832 832 # type() allows you to use characters in type names that wouldn't be
833 833 # recognized as Python symbols in source code. We abuse that to add
834 834 # rich information about our constructed repo.
835 835 name = pycompat.sysstr(
836 836 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
837 837 )
838 838
839 839 cls = type(name, tuple(bases), {})
840 840
841 841 return cls(
842 842 baseui=baseui,
843 843 ui=ui,
844 844 origroot=path,
845 845 wdirvfs=wdirvfs,
846 846 hgvfs=hgvfs,
847 847 requirements=requirements,
848 848 supportedrequirements=supportedrequirements,
849 849 sharedpath=storebasepath,
850 850 store=store,
851 851 cachevfs=cachevfs,
852 852 wcachevfs=wcachevfs,
853 853 features=features,
854 854 intents=intents,
855 855 )
856 856
857 857
858 858 def loadhgrc(
859 859 ui,
860 860 wdirvfs: vfsmod.vfs,
861 861 hgvfs: vfsmod.vfs,
862 862 requirements,
863 863 sharedvfs: Optional[vfsmod.vfs] = None,
864 864 ):
865 865 """Load hgrc files/content into a ui instance.
866 866
867 867 This is called during repository opening to load any additional
868 868 config files or settings relevant to the current repository.
869 869
870 870 Returns a bool indicating whether any additional configs were loaded.
871 871
872 872 Extensions should monkeypatch this function to modify how per-repo
873 873 configs are loaded. For example, an extension may wish to pull in
874 874 configs from alternate files or sources.
875 875
876 876 sharedvfs is vfs object pointing to source repo if the current one is a
877 877 shared one
878 878 """
879 879 if not rcutil.use_repo_hgrc():
880 880 return False
881 881
882 882 ret = False
883 883 # first load config from shared source if we has to
884 884 if requirementsmod.SHARESAFE_REQUIREMENT in requirements and sharedvfs:
885 885 try:
886 886 ui.readconfig(sharedvfs.join(b'hgrc'), root=sharedvfs.base)
887 887 ret = True
888 888 except IOError:
889 889 pass
890 890
891 891 try:
892 892 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
893 893 ret = True
894 894 except IOError:
895 895 pass
896 896
897 897 try:
898 898 ui.readconfig(hgvfs.join(b'hgrc-not-shared'), root=wdirvfs.base)
899 899 ret = True
900 900 except IOError:
901 901 pass
902 902
903 903 return ret
904 904
905 905
906 906 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
907 907 """Perform additional actions after .hg/hgrc is loaded.
908 908
909 909 This function is called during repository loading immediately after
910 910 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
911 911
912 912 The function can be used to validate configs, automatically add
913 913 options (including extensions) based on requirements, etc.
914 914 """
915 915
916 916 # Map of requirements to list of extensions to load automatically when
917 917 # requirement is present.
918 918 autoextensions = {
919 919 b'git': [b'git'],
920 920 b'largefiles': [b'largefiles'],
921 921 b'lfs': [b'lfs'],
922 922 }
923 923
924 924 for requirement, names in sorted(autoextensions.items()):
925 925 if requirement not in requirements:
926 926 continue
927 927
928 928 for name in names:
929 929 if not ui.hasconfig(b'extensions', name):
930 930 ui.setconfig(b'extensions', name, b'', source=b'autoload')
931 931
932 932
933 933 def gathersupportedrequirements(ui):
934 934 """Determine the complete set of recognized requirements."""
935 935 # Start with all requirements supported by this file.
936 936 supported = set(localrepository._basesupported)
937 937
938 938 # Execute ``featuresetupfuncs`` entries if they belong to an extension
939 939 # relevant to this ui instance.
940 940 modules = {m.__name__ for n, m in extensions.extensions(ui)}
941 941
942 942 for fn in featuresetupfuncs:
943 943 if fn.__module__ in modules:
944 944 fn(ui, supported)
945 945
946 946 # Add derived requirements from registered compression engines.
947 947 for name in util.compengines:
948 948 engine = util.compengines[name]
949 949 if engine.available() and engine.revlogheader():
950 950 supported.add(b'exp-compression-%s' % name)
951 951 if engine.name() == b'zstd':
952 952 supported.add(requirementsmod.REVLOG_COMPRESSION_ZSTD)
953 953
954 954 return supported
955 955
956 956
957 957 def ensurerequirementsrecognized(requirements, supported):
958 958 """Validate that a set of local requirements is recognized.
959 959
960 960 Receives a set of requirements. Raises an ``error.RepoError`` if there
961 961 exists any requirement in that set that currently loaded code doesn't
962 962 recognize.
963 963
964 964 Returns a set of supported requirements.
965 965 """
966 966 missing = set()
967 967
968 968 for requirement in requirements:
969 969 if requirement in supported:
970 970 continue
971 971
972 972 if not requirement or not requirement[0:1].isalnum():
973 973 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
974 974
975 975 missing.add(requirement)
976 976
977 977 if missing:
978 978 raise error.RequirementError(
979 979 _(b'repository requires features unknown to this Mercurial: %s')
980 980 % b' '.join(sorted(missing)),
981 981 hint=_(
982 982 b'see https://mercurial-scm.org/wiki/MissingRequirement '
983 983 b'for more information'
984 984 ),
985 985 )
986 986
987 987
988 988 def ensurerequirementscompatible(ui, requirements):
989 989 """Validates that a set of recognized requirements is mutually compatible.
990 990
991 991 Some requirements may not be compatible with others or require
992 992 config options that aren't enabled. This function is called during
993 993 repository opening to ensure that the set of requirements needed
994 994 to open a repository is sane and compatible with config options.
995 995
996 996 Extensions can monkeypatch this function to perform additional
997 997 checking.
998 998
999 999 ``error.RepoError`` should be raised on failure.
1000 1000 """
1001 1001 if (
1002 1002 requirementsmod.SPARSE_REQUIREMENT in requirements
1003 1003 and not sparse.enabled
1004 1004 ):
1005 1005 raise error.RepoError(
1006 1006 _(
1007 1007 b'repository is using sparse feature but '
1008 1008 b'sparse is not enabled; enable the '
1009 1009 b'"sparse" extensions to access'
1010 1010 )
1011 1011 )
1012 1012
1013 1013
1014 1014 def makestore(requirements, path, vfstype):
1015 1015 """Construct a storage object for a repository."""
1016 1016 if requirementsmod.STORE_REQUIREMENT in requirements:
1017 1017 if requirementsmod.FNCACHE_REQUIREMENT in requirements:
1018 1018 dotencode = requirementsmod.DOTENCODE_REQUIREMENT in requirements
1019 1019 return storemod.fncachestore(path, vfstype, dotencode)
1020 1020
1021 1021 return storemod.encodedstore(path, vfstype)
1022 1022
1023 1023 return storemod.basicstore(path, vfstype)
1024 1024
1025 1025
1026 1026 def resolvestorevfsoptions(ui, requirements, features):
1027 1027 """Resolve the options to pass to the store vfs opener.
1028 1028
1029 1029 The returned dict is used to influence behavior of the storage layer.
1030 1030 """
1031 1031 options = {}
1032 1032
1033 1033 if requirementsmod.TREEMANIFEST_REQUIREMENT in requirements:
1034 1034 options[b'treemanifest'] = True
1035 1035
1036 1036 # experimental config: format.manifestcachesize
1037 1037 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
1038 1038 if manifestcachesize is not None:
1039 1039 options[b'manifestcachesize'] = manifestcachesize
1040 1040
1041 1041 # In the absence of another requirement superseding a revlog-related
1042 1042 # requirement, we have to assume the repo is using revlog version 0.
1043 1043 # This revlog format is super old and we don't bother trying to parse
1044 1044 # opener options for it because those options wouldn't do anything
1045 1045 # meaningful on such old repos.
1046 1046 if (
1047 1047 requirementsmod.REVLOGV1_REQUIREMENT in requirements
1048 1048 or requirementsmod.REVLOGV2_REQUIREMENT in requirements
1049 1049 ):
1050 1050 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
1051 1051 else: # explicitly mark repo as using revlogv0
1052 1052 options[b'revlogv0'] = True
1053 1053
1054 1054 if requirementsmod.COPIESSDC_REQUIREMENT in requirements:
1055 1055 options[b'copies-storage'] = b'changeset-sidedata'
1056 1056 else:
1057 1057 writecopiesto = ui.config(b'experimental', b'copies.write-to')
1058 1058 copiesextramode = (b'changeset-only', b'compatibility')
1059 1059 if writecopiesto in copiesextramode:
1060 1060 options[b'copies-storage'] = b'extra'
1061 1061
1062 1062 return options
1063 1063
1064 1064
1065 1065 def resolverevlogstorevfsoptions(ui, requirements, features):
1066 1066 """Resolve opener options specific to revlogs."""
1067 1067
1068 1068 options = {}
1069 1069 options[b'flagprocessors'] = {}
1070 1070
1071 1071 feature_config = options[b'feature-config'] = revlog.FeatureConfig()
1072 1072 data_config = options[b'data-config'] = revlog.DataConfig()
1073 1073 delta_config = options[b'delta-config'] = revlog.DeltaConfig()
1074 1074
1075 1075 if requirementsmod.REVLOGV1_REQUIREMENT in requirements:
1076 1076 options[b'revlogv1'] = True
1077 1077 if requirementsmod.REVLOGV2_REQUIREMENT in requirements:
1078 1078 options[b'revlogv2'] = True
1079 1079 if requirementsmod.CHANGELOGV2_REQUIREMENT in requirements:
1080 1080 options[b'changelogv2'] = True
1081 1081 cmp_rank = ui.configbool(b'experimental', b'changelog-v2.compute-rank')
1082 1082 options[b'changelogv2.compute-rank'] = cmp_rank
1083 1083
1084 1084 if requirementsmod.GENERALDELTA_REQUIREMENT in requirements:
1085 1085 options[b'generaldelta'] = True
1086 1086
1087 1087 # experimental config: format.chunkcachesize
1088 1088 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
1089 1089 if chunkcachesize is not None:
1090 1090 data_config.chunk_cache_size = chunkcachesize
1091 1091
1092 1092 delta_config.delta_both_parents = ui.configbool(
1093 1093 b'storage', b'revlog.optimize-delta-parent-choice'
1094 1094 )
1095 1095 delta_config.candidate_group_chunk_size = ui.configint(
1096 1096 b'storage',
1097 1097 b'revlog.delta-parent-search.candidate-group-chunk-size',
1098 1098 )
1099 1099 delta_config.debug_delta = ui.configbool(b'debug', b'revlog.debug-delta')
1100 1100
1101 1101 issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming')
1102 1102 options[b'issue6528.fix-incoming'] = issue6528
1103 1103
1104 1104 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
1105 1105 lazydeltabase = False
1106 1106 if lazydelta:
1107 1107 lazydeltabase = ui.configbool(
1108 1108 b'storage', b'revlog.reuse-external-delta-parent'
1109 1109 )
1110 1110 if lazydeltabase is None:
1111 1111 lazydeltabase = not scmutil.gddeltaconfig(ui)
1112 1112 delta_config.lazy_delta = lazydelta
1113 1113 delta_config.lazy_delta_base = lazydeltabase
1114 1114
1115 1115 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
1116 1116 if 0 <= chainspan:
1117 1117 delta_config.max_deltachain_span = chainspan
1118 1118
1119 1119 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
1120 1120 if mmapindexthreshold is not None:
1121 1121 data_config.mmap_index_threshold = mmapindexthreshold
1122 1122
1123 1123 withsparseread = ui.configbool(b'experimental', b'sparse-read')
1124 1124 srdensitythres = float(
1125 1125 ui.config(b'experimental', b'sparse-read.density-threshold')
1126 1126 )
1127 1127 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
1128 options[b'with-sparse-read'] = withsparseread
1129 options[b'sparse-read-density-threshold'] = srdensitythres
1130 options[b'sparse-read-min-gap-size'] = srmingapsize
1128 data_config.with_sparse_read = withsparseread
1129 data_config.sr_density_threshold = srdensitythres
1130 data_config.sr_min_gap_size = srmingapsize
1131 1131
1132 1132 sparserevlog = requirementsmod.SPARSEREVLOG_REQUIREMENT in requirements
1133 1133 delta_config.sparse_revlog = sparserevlog
1134 1134 if sparserevlog:
1135 1135 options[b'generaldelta'] = True
1136 data_config.with_sparse_read = True
1136 1137
1137 1138 maxchainlen = None
1138 1139 if sparserevlog:
1139 1140 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
1140 1141 # experimental config: format.maxchainlen
1141 1142 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
1142 1143 if maxchainlen is not None:
1143 1144 delta_config.max_chain_len = maxchainlen
1144 1145
1145 1146 for r in requirements:
1146 1147 # we allow multiple compression engine requirement to co-exist because
1147 1148 # strickly speaking, revlog seems to support mixed compression style.
1148 1149 #
1149 1150 # The compression used for new entries will be "the last one"
1150 1151 prefix = r.startswith
1151 1152 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
1152 1153 feature_config.compression_engine = r.split(b'-', 2)[2]
1153 1154
1154 1155 zlib_level = ui.configint(b'storage', b'revlog.zlib.level')
1155 1156 if zlib_level is not None:
1156 1157 if not (0 <= zlib_level <= 9):
1157 1158 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
1158 1159 raise error.Abort(msg % zlib_level)
1159 1160 feature_config.compression_engine_options[b'zlib.level'] = zlib_level
1160 1161 zstd_level = ui.configint(b'storage', b'revlog.zstd.level')
1161 1162 if zstd_level is not None:
1162 1163 if not (0 <= zstd_level <= 22):
1163 1164 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
1164 1165 raise error.Abort(msg % zstd_level)
1165 1166 feature_config.compression_engine_options[b'zstd.level'] = zstd_level
1166 1167
1167 1168 if requirementsmod.NARROW_REQUIREMENT in requirements:
1168 1169 feature_config.enable_ellipsis = True
1169 1170
1170 1171 if ui.configbool(b'experimental', b'rust.index'):
1171 1172 options[b'rust.index'] = True
1172 1173 if requirementsmod.NODEMAP_REQUIREMENT in requirements:
1173 1174 slow_path = ui.config(
1174 1175 b'storage', b'revlog.persistent-nodemap.slow-path'
1175 1176 )
1176 1177 if slow_path not in (b'allow', b'warn', b'abort'):
1177 1178 default = ui.config_default(
1178 1179 b'storage', b'revlog.persistent-nodemap.slow-path'
1179 1180 )
1180 1181 msg = _(
1181 1182 b'unknown value for config '
1182 1183 b'"storage.revlog.persistent-nodemap.slow-path": "%s"\n'
1183 1184 )
1184 1185 ui.warn(msg % slow_path)
1185 1186 if not ui.quiet:
1186 1187 ui.warn(_(b'falling back to default value: %s\n') % default)
1187 1188 slow_path = default
1188 1189
1189 1190 msg = _(
1190 1191 b"accessing `persistent-nodemap` repository without associated "
1191 1192 b"fast implementation."
1192 1193 )
1193 1194 hint = _(
1194 1195 b"check `hg help config.format.use-persistent-nodemap` "
1195 1196 b"for details"
1196 1197 )
1197 1198 if not revlog.HAS_FAST_PERSISTENT_NODEMAP:
1198 1199 if slow_path == b'warn':
1199 1200 msg = b"warning: " + msg + b'\n'
1200 1201 ui.warn(msg)
1201 1202 if not ui.quiet:
1202 1203 hint = b'(' + hint + b')\n'
1203 1204 ui.warn(hint)
1204 1205 if slow_path == b'abort':
1205 1206 raise error.Abort(msg, hint=hint)
1206 1207 options[b'persistent-nodemap'] = True
1207 1208 if requirementsmod.DIRSTATE_V2_REQUIREMENT in requirements:
1208 1209 slow_path = ui.config(b'storage', b'dirstate-v2.slow-path')
1209 1210 if slow_path not in (b'allow', b'warn', b'abort'):
1210 1211 default = ui.config_default(b'storage', b'dirstate-v2.slow-path')
1211 1212 msg = _(b'unknown value for config "dirstate-v2.slow-path": "%s"\n')
1212 1213 ui.warn(msg % slow_path)
1213 1214 if not ui.quiet:
1214 1215 ui.warn(_(b'falling back to default value: %s\n') % default)
1215 1216 slow_path = default
1216 1217
1217 1218 msg = _(
1218 1219 b"accessing `dirstate-v2` repository without associated "
1219 1220 b"fast implementation."
1220 1221 )
1221 1222 hint = _(
1222 1223 b"check `hg help config.format.use-dirstate-v2` " b"for details"
1223 1224 )
1224 1225 if not dirstate.HAS_FAST_DIRSTATE_V2:
1225 1226 if slow_path == b'warn':
1226 1227 msg = b"warning: " + msg + b'\n'
1227 1228 ui.warn(msg)
1228 1229 if not ui.quiet:
1229 1230 hint = b'(' + hint + b')\n'
1230 1231 ui.warn(hint)
1231 1232 if slow_path == b'abort':
1232 1233 raise error.Abort(msg, hint=hint)
1233 1234 if ui.configbool(b'storage', b'revlog.persistent-nodemap.mmap'):
1234 1235 options[b'persistent-nodemap.mmap'] = True
1235 1236 if ui.configbool(b'devel', b'persistent-nodemap'):
1236 1237 options[b'devel-force-nodemap'] = True
1237 1238
1238 1239 return options
1239 1240
1240 1241
1241 1242 def makemain(**kwargs):
1242 1243 """Produce a type conforming to ``ilocalrepositorymain``."""
1243 1244 return localrepository
1244 1245
1245 1246
1246 1247 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1247 1248 class revlogfilestorage:
1248 1249 """File storage when using revlogs."""
1249 1250
1250 1251 def file(self, path):
1251 1252 if path.startswith(b'/'):
1252 1253 path = path[1:]
1253 1254
1254 1255 try_split = (
1255 1256 self.currenttransaction() is not None
1256 1257 or txnutil.mayhavepending(self.root)
1257 1258 )
1258 1259
1259 1260 return filelog.filelog(self.svfs, path, try_split=try_split)
1260 1261
1261 1262
1262 1263 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1263 1264 class revlognarrowfilestorage:
1264 1265 """File storage when using revlogs and narrow files."""
1265 1266
1266 1267 def file(self, path):
1267 1268 if path.startswith(b'/'):
1268 1269 path = path[1:]
1269 1270
1270 1271 try_split = (
1271 1272 self.currenttransaction() is not None
1272 1273 or txnutil.mayhavepending(self.root)
1273 1274 )
1274 1275 return filelog.narrowfilelog(
1275 1276 self.svfs, path, self._storenarrowmatch, try_split=try_split
1276 1277 )
1277 1278
1278 1279
1279 1280 def makefilestorage(requirements, features, **kwargs):
1280 1281 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1281 1282 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
1282 1283 features.add(repository.REPO_FEATURE_STREAM_CLONE)
1283 1284
1284 1285 if requirementsmod.NARROW_REQUIREMENT in requirements:
1285 1286 return revlognarrowfilestorage
1286 1287 else:
1287 1288 return revlogfilestorage
1288 1289
1289 1290
1290 1291 # List of repository interfaces and factory functions for them. Each
1291 1292 # will be called in order during ``makelocalrepository()`` to iteratively
1292 1293 # derive the final type for a local repository instance. We capture the
1293 1294 # function as a lambda so we don't hold a reference and the module-level
1294 1295 # functions can be wrapped.
1295 1296 REPO_INTERFACES = [
1296 1297 (repository.ilocalrepositorymain, lambda: makemain),
1297 1298 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
1298 1299 ]
1299 1300
1300 1301
1301 1302 @interfaceutil.implementer(repository.ilocalrepositorymain)
1302 1303 class localrepository:
1303 1304 """Main class for representing local repositories.
1304 1305
1305 1306 All local repositories are instances of this class.
1306 1307
1307 1308 Constructed on its own, instances of this class are not usable as
1308 1309 repository objects. To obtain a usable repository object, call
1309 1310 ``hg.repository()``, ``localrepo.instance()``, or
1310 1311 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1311 1312 ``instance()`` adds support for creating new repositories.
1312 1313 ``hg.repository()`` adds more extension integration, including calling
1313 1314 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1314 1315 used.
1315 1316 """
1316 1317
1317 1318 _basesupported = {
1318 1319 requirementsmod.ARCHIVED_PHASE_REQUIREMENT,
1319 1320 requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT,
1320 1321 requirementsmod.CHANGELOGV2_REQUIREMENT,
1321 1322 requirementsmod.COPIESSDC_REQUIREMENT,
1322 1323 requirementsmod.DIRSTATE_TRACKED_HINT_V1,
1323 1324 requirementsmod.DIRSTATE_V2_REQUIREMENT,
1324 1325 requirementsmod.DOTENCODE_REQUIREMENT,
1325 1326 requirementsmod.FNCACHE_REQUIREMENT,
1326 1327 requirementsmod.GENERALDELTA_REQUIREMENT,
1327 1328 requirementsmod.INTERNAL_PHASE_REQUIREMENT,
1328 1329 requirementsmod.NODEMAP_REQUIREMENT,
1329 1330 requirementsmod.RELATIVE_SHARED_REQUIREMENT,
1330 1331 requirementsmod.REVLOGV1_REQUIREMENT,
1331 1332 requirementsmod.REVLOGV2_REQUIREMENT,
1332 1333 requirementsmod.SHARED_REQUIREMENT,
1333 1334 requirementsmod.SHARESAFE_REQUIREMENT,
1334 1335 requirementsmod.SPARSE_REQUIREMENT,
1335 1336 requirementsmod.SPARSEREVLOG_REQUIREMENT,
1336 1337 requirementsmod.STORE_REQUIREMENT,
1337 1338 requirementsmod.TREEMANIFEST_REQUIREMENT,
1338 1339 }
1339 1340
1340 1341 # list of prefix for file which can be written without 'wlock'
1341 1342 # Extensions should extend this list when needed
1342 1343 _wlockfreeprefix = {
1343 1344 # We migh consider requiring 'wlock' for the next
1344 1345 # two, but pretty much all the existing code assume
1345 1346 # wlock is not needed so we keep them excluded for
1346 1347 # now.
1347 1348 b'hgrc',
1348 1349 b'requires',
1349 1350 # XXX cache is a complicatged business someone
1350 1351 # should investigate this in depth at some point
1351 1352 b'cache/',
1352 1353 # XXX bisect was still a bit too messy at the time
1353 1354 # this changeset was introduced. Someone should fix
1354 1355 # the remainig bit and drop this line
1355 1356 b'bisect.state',
1356 1357 }
1357 1358
1358 1359 def __init__(
1359 1360 self,
1360 1361 baseui,
1361 1362 ui,
1362 1363 origroot: bytes,
1363 1364 wdirvfs: vfsmod.vfs,
1364 1365 hgvfs: vfsmod.vfs,
1365 1366 requirements,
1366 1367 supportedrequirements,
1367 1368 sharedpath: bytes,
1368 1369 store,
1369 1370 cachevfs: vfsmod.vfs,
1370 1371 wcachevfs: vfsmod.vfs,
1371 1372 features,
1372 1373 intents=None,
1373 1374 ):
1374 1375 """Create a new local repository instance.
1375 1376
1376 1377 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1377 1378 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1378 1379 object.
1379 1380
1380 1381 Arguments:
1381 1382
1382 1383 baseui
1383 1384 ``ui.ui`` instance that ``ui`` argument was based off of.
1384 1385
1385 1386 ui
1386 1387 ``ui.ui`` instance for use by the repository.
1387 1388
1388 1389 origroot
1389 1390 ``bytes`` path to working directory root of this repository.
1390 1391
1391 1392 wdirvfs
1392 1393 ``vfs.vfs`` rooted at the working directory.
1393 1394
1394 1395 hgvfs
1395 1396 ``vfs.vfs`` rooted at .hg/
1396 1397
1397 1398 requirements
1398 1399 ``set`` of bytestrings representing repository opening requirements.
1399 1400
1400 1401 supportedrequirements
1401 1402 ``set`` of bytestrings representing repository requirements that we
1402 1403 know how to open. May be a supetset of ``requirements``.
1403 1404
1404 1405 sharedpath
1405 1406 ``bytes`` Defining path to storage base directory. Points to a
1406 1407 ``.hg/`` directory somewhere.
1407 1408
1408 1409 store
1409 1410 ``store.basicstore`` (or derived) instance providing access to
1410 1411 versioned storage.
1411 1412
1412 1413 cachevfs
1413 1414 ``vfs.vfs`` used for cache files.
1414 1415
1415 1416 wcachevfs
1416 1417 ``vfs.vfs`` used for cache files related to the working copy.
1417 1418
1418 1419 features
1419 1420 ``set`` of bytestrings defining features/capabilities of this
1420 1421 instance.
1421 1422
1422 1423 intents
1423 1424 ``set`` of system strings indicating what this repo will be used
1424 1425 for.
1425 1426 """
1426 1427 self.baseui = baseui
1427 1428 self.ui = ui
1428 1429 self.origroot = origroot
1429 1430 # vfs rooted at working directory.
1430 1431 self.wvfs = wdirvfs
1431 1432 self.root = wdirvfs.base
1432 1433 # vfs rooted at .hg/. Used to access most non-store paths.
1433 1434 self.vfs = hgvfs
1434 1435 self.path = hgvfs.base
1435 1436 self.requirements = requirements
1436 1437 self.nodeconstants = sha1nodeconstants
1437 1438 self.nullid = self.nodeconstants.nullid
1438 1439 self.supported = supportedrequirements
1439 1440 self.sharedpath = sharedpath
1440 1441 self.store = store
1441 1442 self.cachevfs = cachevfs
1442 1443 self.wcachevfs = wcachevfs
1443 1444 self.features = features
1444 1445
1445 1446 self.filtername = None
1446 1447
1447 1448 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1448 1449 b'devel', b'check-locks'
1449 1450 ):
1450 1451 self.vfs.audit = self._getvfsward(self.vfs.audit)
1451 1452 # A list of callback to shape the phase if no data were found.
1452 1453 # Callback are in the form: func(repo, roots) --> processed root.
1453 1454 # This list it to be filled by extension during repo setup
1454 1455 self._phasedefaults = []
1455 1456
1456 1457 color.setup(self.ui)
1457 1458
1458 1459 self.spath = self.store.path
1459 1460 self.svfs = self.store.vfs
1460 1461 self.sjoin = self.store.join
1461 1462 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1462 1463 b'devel', b'check-locks'
1463 1464 ):
1464 1465 if hasattr(self.svfs, 'vfs'): # this is filtervfs
1465 1466 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1466 1467 else: # standard vfs
1467 1468 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1468 1469
1469 1470 self._dirstatevalidatewarned = False
1470 1471
1471 1472 self._branchcaches = branchmap.BranchMapCache()
1472 1473 self._revbranchcache = None
1473 1474 self._filterpats = {}
1474 1475 self._datafilters = {}
1475 1476 self._transref = self._lockref = self._wlockref = None
1476 1477
1477 1478 # A cache for various files under .hg/ that tracks file changes,
1478 1479 # (used by the filecache decorator)
1479 1480 #
1480 1481 # Maps a property name to its util.filecacheentry
1481 1482 self._filecache = {}
1482 1483
1483 1484 # hold sets of revision to be filtered
1484 1485 # should be cleared when something might have changed the filter value:
1485 1486 # - new changesets,
1486 1487 # - phase change,
1487 1488 # - new obsolescence marker,
1488 1489 # - working directory parent change,
1489 1490 # - bookmark changes
1490 1491 self.filteredrevcache = {}
1491 1492
1492 1493 self._dirstate = None
1493 1494 # post-dirstate-status hooks
1494 1495 self._postdsstatus = []
1495 1496
1496 1497 self._pending_narrow_pats = None
1497 1498 self._pending_narrow_pats_dirstate = None
1498 1499
1499 1500 # generic mapping between names and nodes
1500 1501 self.names = namespaces.namespaces()
1501 1502
1502 1503 # Key to signature value.
1503 1504 self._sparsesignaturecache = {}
1504 1505 # Signature to cached matcher instance.
1505 1506 self._sparsematchercache = {}
1506 1507
1507 1508 self._extrafilterid = repoview.extrafilter(ui)
1508 1509
1509 1510 self.filecopiesmode = None
1510 1511 if requirementsmod.COPIESSDC_REQUIREMENT in self.requirements:
1511 1512 self.filecopiesmode = b'changeset-sidedata'
1512 1513
1513 1514 self._wanted_sidedata = set()
1514 1515 self._sidedata_computers = {}
1515 1516 sidedatamod.set_sidedata_spec_for_repo(self)
1516 1517
1517 1518 def _getvfsward(self, origfunc):
1518 1519 """build a ward for self.vfs"""
1519 1520 rref = weakref.ref(self)
1520 1521
1521 1522 def checkvfs(path, mode=None):
1522 1523 ret = origfunc(path, mode=mode)
1523 1524 repo = rref()
1524 1525 if (
1525 1526 repo is None
1526 1527 or not hasattr(repo, '_wlockref')
1527 1528 or not hasattr(repo, '_lockref')
1528 1529 ):
1529 1530 return
1530 1531 if mode in (None, b'r', b'rb'):
1531 1532 return
1532 1533 if path.startswith(repo.path):
1533 1534 # truncate name relative to the repository (.hg)
1534 1535 path = path[len(repo.path) + 1 :]
1535 1536 if path.startswith(b'cache/'):
1536 1537 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1537 1538 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1538 1539 # path prefixes covered by 'lock'
1539 1540 vfs_path_prefixes = (
1540 1541 b'journal.',
1541 1542 b'undo.',
1542 1543 b'strip-backup/',
1543 1544 b'cache/',
1544 1545 )
1545 1546 if any(path.startswith(prefix) for prefix in vfs_path_prefixes):
1546 1547 if repo._currentlock(repo._lockref) is None:
1547 1548 repo.ui.develwarn(
1548 1549 b'write with no lock: "%s"' % path,
1549 1550 stacklevel=3,
1550 1551 config=b'check-locks',
1551 1552 )
1552 1553 elif repo._currentlock(repo._wlockref) is None:
1553 1554 # rest of vfs files are covered by 'wlock'
1554 1555 #
1555 1556 # exclude special files
1556 1557 for prefix in self._wlockfreeprefix:
1557 1558 if path.startswith(prefix):
1558 1559 return
1559 1560 repo.ui.develwarn(
1560 1561 b'write with no wlock: "%s"' % path,
1561 1562 stacklevel=3,
1562 1563 config=b'check-locks',
1563 1564 )
1564 1565 return ret
1565 1566
1566 1567 return checkvfs
1567 1568
1568 1569 def _getsvfsward(self, origfunc):
1569 1570 """build a ward for self.svfs"""
1570 1571 rref = weakref.ref(self)
1571 1572
1572 1573 def checksvfs(path, mode=None):
1573 1574 ret = origfunc(path, mode=mode)
1574 1575 repo = rref()
1575 1576 if repo is None or not hasattr(repo, '_lockref'):
1576 1577 return
1577 1578 if mode in (None, b'r', b'rb'):
1578 1579 return
1579 1580 if path.startswith(repo.sharedpath):
1580 1581 # truncate name relative to the repository (.hg)
1581 1582 path = path[len(repo.sharedpath) + 1 :]
1582 1583 if repo._currentlock(repo._lockref) is None:
1583 1584 repo.ui.develwarn(
1584 1585 b'write with no lock: "%s"' % path, stacklevel=4
1585 1586 )
1586 1587 return ret
1587 1588
1588 1589 return checksvfs
1589 1590
1590 1591 @property
1591 1592 def vfs_map(self):
1592 1593 return {
1593 1594 b'': self.svfs,
1594 1595 b'plain': self.vfs,
1595 1596 b'store': self.svfs,
1596 1597 }
1597 1598
1598 1599 def close(self):
1599 1600 self._writecaches()
1600 1601
1601 1602 def _writecaches(self):
1602 1603 if self._revbranchcache:
1603 1604 self._revbranchcache.write()
1604 1605
1605 1606 def _restrictcapabilities(self, caps):
1606 1607 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1607 1608 caps = set(caps)
1608 1609 capsblob = bundle2.encodecaps(
1609 1610 bundle2.getrepocaps(self, role=b'client')
1610 1611 )
1611 1612 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1612 1613 if self.ui.configbool(b'experimental', b'narrow'):
1613 1614 caps.add(wireprototypes.NARROWCAP)
1614 1615 return caps
1615 1616
1616 1617 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1617 1618 # self -> auditor -> self._checknested -> self
1618 1619
1619 1620 @property
1620 1621 def auditor(self):
1621 1622 # This is only used by context.workingctx.match in order to
1622 1623 # detect files in subrepos.
1623 1624 return pathutil.pathauditor(self.root, callback=self._checknested)
1624 1625
1625 1626 @property
1626 1627 def nofsauditor(self):
1627 1628 # This is only used by context.basectx.match in order to detect
1628 1629 # files in subrepos.
1629 1630 return pathutil.pathauditor(
1630 1631 self.root, callback=self._checknested, realfs=False, cached=True
1631 1632 )
1632 1633
1633 1634 def _checknested(self, path):
1634 1635 """Determine if path is a legal nested repository."""
1635 1636 if not path.startswith(self.root):
1636 1637 return False
1637 1638 subpath = path[len(self.root) + 1 :]
1638 1639 normsubpath = util.pconvert(subpath)
1639 1640
1640 1641 # XXX: Checking against the current working copy is wrong in
1641 1642 # the sense that it can reject things like
1642 1643 #
1643 1644 # $ hg cat -r 10 sub/x.txt
1644 1645 #
1645 1646 # if sub/ is no longer a subrepository in the working copy
1646 1647 # parent revision.
1647 1648 #
1648 1649 # However, it can of course also allow things that would have
1649 1650 # been rejected before, such as the above cat command if sub/
1650 1651 # is a subrepository now, but was a normal directory before.
1651 1652 # The old path auditor would have rejected by mistake since it
1652 1653 # panics when it sees sub/.hg/.
1653 1654 #
1654 1655 # All in all, checking against the working copy seems sensible
1655 1656 # since we want to prevent access to nested repositories on
1656 1657 # the filesystem *now*.
1657 1658 ctx = self[None]
1658 1659 parts = util.splitpath(subpath)
1659 1660 while parts:
1660 1661 prefix = b'/'.join(parts)
1661 1662 if prefix in ctx.substate:
1662 1663 if prefix == normsubpath:
1663 1664 return True
1664 1665 else:
1665 1666 sub = ctx.sub(prefix)
1666 1667 return sub.checknested(subpath[len(prefix) + 1 :])
1667 1668 else:
1668 1669 parts.pop()
1669 1670 return False
1670 1671
1671 1672 def peer(self, path=None, remotehidden=False):
1672 1673 return localpeer(
1673 1674 self, path=path, remotehidden=remotehidden
1674 1675 ) # not cached to avoid reference cycle
1675 1676
1676 1677 def unfiltered(self):
1677 1678 """Return unfiltered version of the repository
1678 1679
1679 1680 Intended to be overwritten by filtered repo."""
1680 1681 return self
1681 1682
1682 1683 def filtered(self, name, visibilityexceptions=None):
1683 1684 """Return a filtered version of a repository
1684 1685
1685 1686 The `name` parameter is the identifier of the requested view. This
1686 1687 will return a repoview object set "exactly" to the specified view.
1687 1688
1688 1689 This function does not apply recursive filtering to a repository. For
1689 1690 example calling `repo.filtered("served")` will return a repoview using
1690 1691 the "served" view, regardless of the initial view used by `repo`.
1691 1692
1692 1693 In other word, there is always only one level of `repoview` "filtering".
1693 1694 """
1694 1695 if self._extrafilterid is not None and b'%' not in name:
1695 1696 name = name + b'%' + self._extrafilterid
1696 1697
1697 1698 cls = repoview.newtype(self.unfiltered().__class__)
1698 1699 return cls(self, name, visibilityexceptions)
1699 1700
1700 1701 @mixedrepostorecache(
1701 1702 (b'bookmarks', b'plain'),
1702 1703 (b'bookmarks.current', b'plain'),
1703 1704 (b'bookmarks', b''),
1704 1705 (b'00changelog.i', b''),
1705 1706 )
1706 1707 def _bookmarks(self):
1707 1708 # Since the multiple files involved in the transaction cannot be
1708 1709 # written atomically (with current repository format), there is a race
1709 1710 # condition here.
1710 1711 #
1711 1712 # 1) changelog content A is read
1712 1713 # 2) outside transaction update changelog to content B
1713 1714 # 3) outside transaction update bookmark file referring to content B
1714 1715 # 4) bookmarks file content is read and filtered against changelog-A
1715 1716 #
1716 1717 # When this happens, bookmarks against nodes missing from A are dropped.
1717 1718 #
1718 1719 # Having this happening during read is not great, but it become worse
1719 1720 # when this happen during write because the bookmarks to the "unknown"
1720 1721 # nodes will be dropped for good. However, writes happen within locks.
1721 1722 # This locking makes it possible to have a race free consistent read.
1722 1723 # For this purpose data read from disc before locking are
1723 1724 # "invalidated" right after the locks are taken. This invalidations are
1724 1725 # "light", the `filecache` mechanism keep the data in memory and will
1725 1726 # reuse them if the underlying files did not changed. Not parsing the
1726 1727 # same data multiple times helps performances.
1727 1728 #
1728 1729 # Unfortunately in the case describe above, the files tracked by the
1729 1730 # bookmarks file cache might not have changed, but the in-memory
1730 1731 # content is still "wrong" because we used an older changelog content
1731 1732 # to process the on-disk data. So after locking, the changelog would be
1732 1733 # refreshed but `_bookmarks` would be preserved.
1733 1734 # Adding `00changelog.i` to the list of tracked file is not
1734 1735 # enough, because at the time we build the content for `_bookmarks` in
1735 1736 # (4), the changelog file has already diverged from the content used
1736 1737 # for loading `changelog` in (1)
1737 1738 #
1738 1739 # To prevent the issue, we force the changelog to be explicitly
1739 1740 # reloaded while computing `_bookmarks`. The data race can still happen
1740 1741 # without the lock (with a narrower window), but it would no longer go
1741 1742 # undetected during the lock time refresh.
1742 1743 #
1743 1744 # The new schedule is as follow
1744 1745 #
1745 1746 # 1) filecache logic detect that `_bookmarks` needs to be computed
1746 1747 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1747 1748 # 3) We force `changelog` filecache to be tested
1748 1749 # 4) cachestat for `changelog` are captured (for changelog)
1749 1750 # 5) `_bookmarks` is computed and cached
1750 1751 #
1751 1752 # The step in (3) ensure we have a changelog at least as recent as the
1752 1753 # cache stat computed in (1). As a result at locking time:
1753 1754 # * if the changelog did not changed since (1) -> we can reuse the data
1754 1755 # * otherwise -> the bookmarks get refreshed.
1755 1756 self._refreshchangelog()
1756 1757 return bookmarks.bmstore(self)
1757 1758
1758 1759 def _refreshchangelog(self):
1759 1760 """make sure the in memory changelog match the on-disk one"""
1760 1761 if 'changelog' in vars(self) and self.currenttransaction() is None:
1761 1762 del self.changelog
1762 1763
1763 1764 @property
1764 1765 def _activebookmark(self):
1765 1766 return self._bookmarks.active
1766 1767
1767 1768 # _phasesets depend on changelog. what we need is to call
1768 1769 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1769 1770 # can't be easily expressed in filecache mechanism.
1770 1771 @storecache(b'phaseroots', b'00changelog.i')
1771 1772 def _phasecache(self):
1772 1773 return phases.phasecache(self, self._phasedefaults)
1773 1774
1774 1775 @storecache(b'obsstore')
1775 1776 def obsstore(self):
1776 1777 return obsolete.makestore(self.ui, self)
1777 1778
1778 1779 @changelogcache()
1779 1780 def changelog(repo):
1780 1781 # load dirstate before changelog to avoid race see issue6303
1781 1782 repo.dirstate.prefetch_parents()
1782 1783 return repo.store.changelog(
1783 1784 txnutil.mayhavepending(repo.root),
1784 1785 concurrencychecker=revlogchecker.get_checker(repo.ui, b'changelog'),
1785 1786 )
1786 1787
1787 1788 @manifestlogcache()
1788 1789 def manifestlog(self):
1789 1790 return self.store.manifestlog(self, self._storenarrowmatch)
1790 1791
1791 1792 @unfilteredpropertycache
1792 1793 def dirstate(self):
1793 1794 if self._dirstate is None:
1794 1795 self._dirstate = self._makedirstate()
1795 1796 else:
1796 1797 self._dirstate.refresh()
1797 1798 return self._dirstate
1798 1799
1799 1800 def _makedirstate(self):
1800 1801 """Extension point for wrapping the dirstate per-repo."""
1801 1802 sparsematchfn = None
1802 1803 if sparse.use_sparse(self):
1803 1804 sparsematchfn = lambda: sparse.matcher(self)
1804 1805 v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT
1805 1806 th = requirementsmod.DIRSTATE_TRACKED_HINT_V1
1806 1807 use_dirstate_v2 = v2_req in self.requirements
1807 1808 use_tracked_hint = th in self.requirements
1808 1809
1809 1810 return dirstate.dirstate(
1810 1811 self.vfs,
1811 1812 self.ui,
1812 1813 self.root,
1813 1814 self._dirstatevalidate,
1814 1815 sparsematchfn,
1815 1816 self.nodeconstants,
1816 1817 use_dirstate_v2,
1817 1818 use_tracked_hint=use_tracked_hint,
1818 1819 )
1819 1820
1820 1821 def _dirstatevalidate(self, node):
1821 1822 okay = True
1822 1823 try:
1823 1824 self.changelog.rev(node)
1824 1825 except error.LookupError:
1825 1826 # If the parent are unknown it might just be because the changelog
1826 1827 # in memory is lagging behind the dirstate in memory. So try to
1827 1828 # refresh the changelog first.
1828 1829 #
1829 1830 # We only do so if we don't hold the lock, if we do hold the lock
1830 1831 # the invalidation at that time should have taken care of this and
1831 1832 # something is very fishy.
1832 1833 if self.currentlock() is None:
1833 1834 self.invalidate()
1834 1835 try:
1835 1836 self.changelog.rev(node)
1836 1837 except error.LookupError:
1837 1838 okay = False
1838 1839 else:
1839 1840 # XXX we should consider raising an error here.
1840 1841 okay = False
1841 1842 if okay:
1842 1843 return node
1843 1844 else:
1844 1845 if not self._dirstatevalidatewarned:
1845 1846 self._dirstatevalidatewarned = True
1846 1847 self.ui.warn(
1847 1848 _(b"warning: ignoring unknown working parent %s!\n")
1848 1849 % short(node)
1849 1850 )
1850 1851 return self.nullid
1851 1852
1852 1853 @storecache(narrowspec.FILENAME)
1853 1854 def narrowpats(self):
1854 1855 """matcher patterns for this repository's narrowspec
1855 1856
1856 1857 A tuple of (includes, excludes).
1857 1858 """
1858 1859 # the narrow management should probably move into its own object
1859 1860 val = self._pending_narrow_pats
1860 1861 if val is None:
1861 1862 val = narrowspec.load(self)
1862 1863 return val
1863 1864
1864 1865 @storecache(narrowspec.FILENAME)
1865 1866 def _storenarrowmatch(self):
1866 1867 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1867 1868 return matchmod.always()
1868 1869 include, exclude = self.narrowpats
1869 1870 return narrowspec.match(self.root, include=include, exclude=exclude)
1870 1871
1871 1872 @storecache(narrowspec.FILENAME)
1872 1873 def _narrowmatch(self):
1873 1874 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1874 1875 return matchmod.always()
1875 1876 narrowspec.checkworkingcopynarrowspec(self)
1876 1877 include, exclude = self.narrowpats
1877 1878 return narrowspec.match(self.root, include=include, exclude=exclude)
1878 1879
1879 1880 def narrowmatch(self, match=None, includeexact=False):
1880 1881 """matcher corresponding the the repo's narrowspec
1881 1882
1882 1883 If `match` is given, then that will be intersected with the narrow
1883 1884 matcher.
1884 1885
1885 1886 If `includeexact` is True, then any exact matches from `match` will
1886 1887 be included even if they're outside the narrowspec.
1887 1888 """
1888 1889 if match:
1889 1890 if includeexact and not self._narrowmatch.always():
1890 1891 # do not exclude explicitly-specified paths so that they can
1891 1892 # be warned later on
1892 1893 em = matchmod.exact(match.files())
1893 1894 nm = matchmod.unionmatcher([self._narrowmatch, em])
1894 1895 return matchmod.intersectmatchers(match, nm)
1895 1896 return matchmod.intersectmatchers(match, self._narrowmatch)
1896 1897 return self._narrowmatch
1897 1898
1898 1899 def setnarrowpats(self, newincludes, newexcludes):
1899 1900 narrowspec.save(self, newincludes, newexcludes)
1900 1901 self.invalidate(clearfilecache=True)
1901 1902
1902 1903 @unfilteredpropertycache
1903 1904 def _quick_access_changeid_null(self):
1904 1905 return {
1905 1906 b'null': (nullrev, self.nodeconstants.nullid),
1906 1907 nullrev: (nullrev, self.nodeconstants.nullid),
1907 1908 self.nullid: (nullrev, self.nullid),
1908 1909 }
1909 1910
1910 1911 @unfilteredpropertycache
1911 1912 def _quick_access_changeid_wc(self):
1912 1913 # also fast path access to the working copy parents
1913 1914 # however, only do it for filter that ensure wc is visible.
1914 1915 quick = self._quick_access_changeid_null.copy()
1915 1916 cl = self.unfiltered().changelog
1916 1917 for node in self.dirstate.parents():
1917 1918 if node == self.nullid:
1918 1919 continue
1919 1920 rev = cl.index.get_rev(node)
1920 1921 if rev is None:
1921 1922 # unknown working copy parent case:
1922 1923 #
1923 1924 # skip the fast path and let higher code deal with it
1924 1925 continue
1925 1926 pair = (rev, node)
1926 1927 quick[rev] = pair
1927 1928 quick[node] = pair
1928 1929 # also add the parents of the parents
1929 1930 for r in cl.parentrevs(rev):
1930 1931 if r == nullrev:
1931 1932 continue
1932 1933 n = cl.node(r)
1933 1934 pair = (r, n)
1934 1935 quick[r] = pair
1935 1936 quick[n] = pair
1936 1937 p1node = self.dirstate.p1()
1937 1938 if p1node != self.nullid:
1938 1939 quick[b'.'] = quick[p1node]
1939 1940 return quick
1940 1941
1941 1942 @unfilteredmethod
1942 1943 def _quick_access_changeid_invalidate(self):
1943 1944 if '_quick_access_changeid_wc' in vars(self):
1944 1945 del self.__dict__['_quick_access_changeid_wc']
1945 1946
1946 1947 @property
1947 1948 def _quick_access_changeid(self):
1948 1949 """an helper dictionnary for __getitem__ calls
1949 1950
1950 1951 This contains a list of symbol we can recognise right away without
1951 1952 further processing.
1952 1953 """
1953 1954 if self.filtername in repoview.filter_has_wc:
1954 1955 return self._quick_access_changeid_wc
1955 1956 return self._quick_access_changeid_null
1956 1957
1957 1958 def __getitem__(self, changeid):
1958 1959 # dealing with special cases
1959 1960 if changeid is None:
1960 1961 return context.workingctx(self)
1961 1962 if isinstance(changeid, context.basectx):
1962 1963 return changeid
1963 1964
1964 1965 # dealing with multiple revisions
1965 1966 if isinstance(changeid, slice):
1966 1967 # wdirrev isn't contiguous so the slice shouldn't include it
1967 1968 return [
1968 1969 self[i]
1969 1970 for i in range(*changeid.indices(len(self)))
1970 1971 if i not in self.changelog.filteredrevs
1971 1972 ]
1972 1973
1973 1974 # dealing with some special values
1974 1975 quick_access = self._quick_access_changeid.get(changeid)
1975 1976 if quick_access is not None:
1976 1977 rev, node = quick_access
1977 1978 return context.changectx(self, rev, node, maybe_filtered=False)
1978 1979 if changeid == b'tip':
1979 1980 node = self.changelog.tip()
1980 1981 rev = self.changelog.rev(node)
1981 1982 return context.changectx(self, rev, node)
1982 1983
1983 1984 # dealing with arbitrary values
1984 1985 try:
1985 1986 if isinstance(changeid, int):
1986 1987 node = self.changelog.node(changeid)
1987 1988 rev = changeid
1988 1989 elif changeid == b'.':
1989 1990 # this is a hack to delay/avoid loading obsmarkers
1990 1991 # when we know that '.' won't be hidden
1991 1992 node = self.dirstate.p1()
1992 1993 rev = self.unfiltered().changelog.rev(node)
1993 1994 elif len(changeid) == self.nodeconstants.nodelen:
1994 1995 try:
1995 1996 node = changeid
1996 1997 rev = self.changelog.rev(changeid)
1997 1998 except error.FilteredLookupError:
1998 1999 changeid = hex(changeid) # for the error message
1999 2000 raise
2000 2001 except LookupError:
2001 2002 # check if it might have come from damaged dirstate
2002 2003 #
2003 2004 # XXX we could avoid the unfiltered if we had a recognizable
2004 2005 # exception for filtered changeset access
2005 2006 if (
2006 2007 self.local()
2007 2008 and changeid in self.unfiltered().dirstate.parents()
2008 2009 ):
2009 2010 msg = _(b"working directory has unknown parent '%s'!")
2010 2011 raise error.Abort(msg % short(changeid))
2011 2012 changeid = hex(changeid) # for the error message
2012 2013 raise
2013 2014
2014 2015 elif len(changeid) == 2 * self.nodeconstants.nodelen:
2015 2016 node = bin(changeid)
2016 2017 rev = self.changelog.rev(node)
2017 2018 else:
2018 2019 raise error.ProgrammingError(
2019 2020 b"unsupported changeid '%s' of type %s"
2020 2021 % (changeid, pycompat.bytestr(type(changeid)))
2021 2022 )
2022 2023
2023 2024 return context.changectx(self, rev, node)
2024 2025
2025 2026 except (error.FilteredIndexError, error.FilteredLookupError):
2026 2027 raise error.FilteredRepoLookupError(
2027 2028 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
2028 2029 )
2029 2030 except (IndexError, LookupError):
2030 2031 raise error.RepoLookupError(
2031 2032 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
2032 2033 )
2033 2034 except error.WdirUnsupported:
2034 2035 return context.workingctx(self)
2035 2036
2036 2037 def __contains__(self, changeid):
2037 2038 """True if the given changeid exists"""
2038 2039 try:
2039 2040 self[changeid]
2040 2041 return True
2041 2042 except error.RepoLookupError:
2042 2043 return False
2043 2044
2044 2045 def __nonzero__(self):
2045 2046 return True
2046 2047
2047 2048 __bool__ = __nonzero__
2048 2049
2049 2050 def __len__(self):
2050 2051 # no need to pay the cost of repoview.changelog
2051 2052 unfi = self.unfiltered()
2052 2053 return len(unfi.changelog)
2053 2054
2054 2055 def __iter__(self):
2055 2056 return iter(self.changelog)
2056 2057
2057 2058 def revs(self, expr: bytes, *args):
2058 2059 """Find revisions matching a revset.
2059 2060
2060 2061 The revset is specified as a string ``expr`` that may contain
2061 2062 %-formatting to escape certain types. See ``revsetlang.formatspec``.
2062 2063
2063 2064 Revset aliases from the configuration are not expanded. To expand
2064 2065 user aliases, consider calling ``scmutil.revrange()`` or
2065 2066 ``repo.anyrevs([expr], user=True)``.
2066 2067
2067 2068 Returns a smartset.abstractsmartset, which is a list-like interface
2068 2069 that contains integer revisions.
2069 2070 """
2070 2071 tree = revsetlang.spectree(expr, *args)
2071 2072 return revset.makematcher(tree)(self)
2072 2073
2073 2074 def set(self, expr: bytes, *args):
2074 2075 """Find revisions matching a revset and emit changectx instances.
2075 2076
2076 2077 This is a convenience wrapper around ``revs()`` that iterates the
2077 2078 result and is a generator of changectx instances.
2078 2079
2079 2080 Revset aliases from the configuration are not expanded. To expand
2080 2081 user aliases, consider calling ``scmutil.revrange()``.
2081 2082 """
2082 2083 for r in self.revs(expr, *args):
2083 2084 yield self[r]
2084 2085
2085 2086 def anyrevs(self, specs: bytes, user=False, localalias=None):
2086 2087 """Find revisions matching one of the given revsets.
2087 2088
2088 2089 Revset aliases from the configuration are not expanded by default. To
2089 2090 expand user aliases, specify ``user=True``. To provide some local
2090 2091 definitions overriding user aliases, set ``localalias`` to
2091 2092 ``{name: definitionstring}``.
2092 2093 """
2093 2094 if specs == [b'null']:
2094 2095 return revset.baseset([nullrev])
2095 2096 if specs == [b'.']:
2096 2097 quick_data = self._quick_access_changeid.get(b'.')
2097 2098 if quick_data is not None:
2098 2099 return revset.baseset([quick_data[0]])
2099 2100 if user:
2100 2101 m = revset.matchany(
2101 2102 self.ui,
2102 2103 specs,
2103 2104 lookup=revset.lookupfn(self),
2104 2105 localalias=localalias,
2105 2106 )
2106 2107 else:
2107 2108 m = revset.matchany(None, specs, localalias=localalias)
2108 2109 return m(self)
2109 2110
2110 2111 def url(self) -> bytes:
2111 2112 return b'file:' + self.root
2112 2113
2113 2114 def hook(self, name, throw=False, **args):
2114 2115 """Call a hook, passing this repo instance.
2115 2116
2116 2117 This a convenience method to aid invoking hooks. Extensions likely
2117 2118 won't call this unless they have registered a custom hook or are
2118 2119 replacing code that is expected to call a hook.
2119 2120 """
2120 2121 return hook.hook(self.ui, self, name, throw, **args)
2121 2122
2122 2123 @filteredpropertycache
2123 2124 def _tagscache(self):
2124 2125 """Returns a tagscache object that contains various tags related
2125 2126 caches."""
2126 2127
2127 2128 # This simplifies its cache management by having one decorated
2128 2129 # function (this one) and the rest simply fetch things from it.
2129 2130 class tagscache:
2130 2131 def __init__(self):
2131 2132 # These two define the set of tags for this repository. tags
2132 2133 # maps tag name to node; tagtypes maps tag name to 'global' or
2133 2134 # 'local'. (Global tags are defined by .hgtags across all
2134 2135 # heads, and local tags are defined in .hg/localtags.)
2135 2136 # They constitute the in-memory cache of tags.
2136 2137 self.tags = self.tagtypes = None
2137 2138
2138 2139 self.nodetagscache = self.tagslist = None
2139 2140
2140 2141 cache = tagscache()
2141 2142 cache.tags, cache.tagtypes = self._findtags()
2142 2143
2143 2144 return cache
2144 2145
2145 2146 def tags(self):
2146 2147 '''return a mapping of tag to node'''
2147 2148 t = {}
2148 2149 if self.changelog.filteredrevs:
2149 2150 tags, tt = self._findtags()
2150 2151 else:
2151 2152 tags = self._tagscache.tags
2152 2153 rev = self.changelog.rev
2153 2154 for k, v in tags.items():
2154 2155 try:
2155 2156 # ignore tags to unknown nodes
2156 2157 rev(v)
2157 2158 t[k] = v
2158 2159 except (error.LookupError, ValueError):
2159 2160 pass
2160 2161 return t
2161 2162
2162 2163 def _findtags(self):
2163 2164 """Do the hard work of finding tags. Return a pair of dicts
2164 2165 (tags, tagtypes) where tags maps tag name to node, and tagtypes
2165 2166 maps tag name to a string like \'global\' or \'local\'.
2166 2167 Subclasses or extensions are free to add their own tags, but
2167 2168 should be aware that the returned dicts will be retained for the
2168 2169 duration of the localrepo object."""
2169 2170
2170 2171 # XXX what tagtype should subclasses/extensions use? Currently
2171 2172 # mq and bookmarks add tags, but do not set the tagtype at all.
2172 2173 # Should each extension invent its own tag type? Should there
2173 2174 # be one tagtype for all such "virtual" tags? Or is the status
2174 2175 # quo fine?
2175 2176
2176 2177 # map tag name to (node, hist)
2177 2178 alltags = tagsmod.findglobaltags(self.ui, self)
2178 2179 # map tag name to tag type
2179 2180 tagtypes = {tag: b'global' for tag in alltags}
2180 2181
2181 2182 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
2182 2183
2183 2184 # Build the return dicts. Have to re-encode tag names because
2184 2185 # the tags module always uses UTF-8 (in order not to lose info
2185 2186 # writing to the cache), but the rest of Mercurial wants them in
2186 2187 # local encoding.
2187 2188 tags = {}
2188 2189 for name, (node, hist) in alltags.items():
2189 2190 if node != self.nullid:
2190 2191 tags[encoding.tolocal(name)] = node
2191 2192 tags[b'tip'] = self.changelog.tip()
2192 2193 tagtypes = {
2193 2194 encoding.tolocal(name): value for (name, value) in tagtypes.items()
2194 2195 }
2195 2196 return (tags, tagtypes)
2196 2197
2197 2198 def tagtype(self, tagname):
2198 2199 """
2199 2200 return the type of the given tag. result can be:
2200 2201
2201 2202 'local' : a local tag
2202 2203 'global' : a global tag
2203 2204 None : tag does not exist
2204 2205 """
2205 2206
2206 2207 return self._tagscache.tagtypes.get(tagname)
2207 2208
2208 2209 def tagslist(self):
2209 2210 '''return a list of tags ordered by revision'''
2210 2211 if not self._tagscache.tagslist:
2211 2212 l = []
2212 2213 for t, n in self.tags().items():
2213 2214 l.append((self.changelog.rev(n), t, n))
2214 2215 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
2215 2216
2216 2217 return self._tagscache.tagslist
2217 2218
2218 2219 def nodetags(self, node):
2219 2220 '''return the tags associated with a node'''
2220 2221 if not self._tagscache.nodetagscache:
2221 2222 nodetagscache = {}
2222 2223 for t, n in self._tagscache.tags.items():
2223 2224 nodetagscache.setdefault(n, []).append(t)
2224 2225 for tags in nodetagscache.values():
2225 2226 tags.sort()
2226 2227 self._tagscache.nodetagscache = nodetagscache
2227 2228 return self._tagscache.nodetagscache.get(node, [])
2228 2229
2229 2230 def nodebookmarks(self, node):
2230 2231 """return the list of bookmarks pointing to the specified node"""
2231 2232 return self._bookmarks.names(node)
2232 2233
2233 2234 def branchmap(self):
2234 2235 """returns a dictionary {branch: [branchheads]} with branchheads
2235 2236 ordered by increasing revision number"""
2236 2237 return self._branchcaches[self]
2237 2238
2238 2239 @unfilteredmethod
2239 2240 def revbranchcache(self):
2240 2241 if not self._revbranchcache:
2241 2242 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
2242 2243 return self._revbranchcache
2243 2244
2244 2245 def register_changeset(self, rev, changelogrevision):
2245 2246 self.revbranchcache().setdata(rev, changelogrevision)
2246 2247
2247 2248 def branchtip(self, branch, ignoremissing=False):
2248 2249 """return the tip node for a given branch
2249 2250
2250 2251 If ignoremissing is True, then this method will not raise an error.
2251 2252 This is helpful for callers that only expect None for a missing branch
2252 2253 (e.g. namespace).
2253 2254
2254 2255 """
2255 2256 try:
2256 2257 return self.branchmap().branchtip(branch)
2257 2258 except KeyError:
2258 2259 if not ignoremissing:
2259 2260 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
2260 2261 else:
2261 2262 pass
2262 2263
2263 2264 def lookup(self, key):
2264 2265 node = scmutil.revsymbol(self, key).node()
2265 2266 if node is None:
2266 2267 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
2267 2268 return node
2268 2269
2269 2270 def lookupbranch(self, key):
2270 2271 if self.branchmap().hasbranch(key):
2271 2272 return key
2272 2273
2273 2274 return scmutil.revsymbol(self, key).branch()
2274 2275
2275 2276 def known(self, nodes):
2276 2277 cl = self.changelog
2277 2278 get_rev = cl.index.get_rev
2278 2279 filtered = cl.filteredrevs
2279 2280 result = []
2280 2281 for n in nodes:
2281 2282 r = get_rev(n)
2282 2283 resp = not (r is None or r in filtered)
2283 2284 result.append(resp)
2284 2285 return result
2285 2286
2286 2287 def local(self):
2287 2288 return self
2288 2289
2289 2290 def publishing(self):
2290 2291 # it's safe (and desirable) to trust the publish flag unconditionally
2291 2292 # so that we don't finalize changes shared between users via ssh or nfs
2292 2293 return self.ui.configbool(b'phases', b'publish', untrusted=True)
2293 2294
2294 2295 def cancopy(self):
2295 2296 # so statichttprepo's override of local() works
2296 2297 if not self.local():
2297 2298 return False
2298 2299 if not self.publishing():
2299 2300 return True
2300 2301 # if publishing we can't copy if there is filtered content
2301 2302 return not self.filtered(b'visible').changelog.filteredrevs
2302 2303
2303 2304 def shared(self):
2304 2305 '''the type of shared repository (None if not shared)'''
2305 2306 if self.sharedpath != self.path:
2306 2307 return b'store'
2307 2308 return None
2308 2309
2309 2310 def wjoin(self, f: bytes, *insidef: bytes) -> bytes:
2310 2311 return self.vfs.reljoin(self.root, f, *insidef)
2311 2312
2312 2313 def setparents(self, p1, p2=None):
2313 2314 if p2 is None:
2314 2315 p2 = self.nullid
2315 2316 self[None].setparents(p1, p2)
2316 2317 self._quick_access_changeid_invalidate()
2317 2318
2318 2319 def filectx(self, path: bytes, changeid=None, fileid=None, changectx=None):
2319 2320 """changeid must be a changeset revision, if specified.
2320 2321 fileid can be a file revision or node."""
2321 2322 return context.filectx(
2322 2323 self, path, changeid, fileid, changectx=changectx
2323 2324 )
2324 2325
2325 2326 def getcwd(self) -> bytes:
2326 2327 return self.dirstate.getcwd()
2327 2328
2328 2329 def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
2329 2330 return self.dirstate.pathto(f, cwd)
2330 2331
2331 2332 def _loadfilter(self, filter):
2332 2333 if filter not in self._filterpats:
2333 2334 l = []
2334 2335 for pat, cmd in self.ui.configitems(filter):
2335 2336 if cmd == b'!':
2336 2337 continue
2337 2338 mf = matchmod.match(self.root, b'', [pat])
2338 2339 fn = None
2339 2340 params = cmd
2340 2341 for name, filterfn in self._datafilters.items():
2341 2342 if cmd.startswith(name):
2342 2343 fn = filterfn
2343 2344 params = cmd[len(name) :].lstrip()
2344 2345 break
2345 2346 if not fn:
2346 2347 fn = lambda s, c, **kwargs: procutil.filter(s, c)
2347 2348 fn.__name__ = 'commandfilter'
2348 2349 # Wrap old filters not supporting keyword arguments
2349 2350 if not pycompat.getargspec(fn)[2]:
2350 2351 oldfn = fn
2351 2352 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
2352 2353 fn.__name__ = 'compat-' + oldfn.__name__
2353 2354 l.append((mf, fn, params))
2354 2355 self._filterpats[filter] = l
2355 2356 return self._filterpats[filter]
2356 2357
2357 2358 def _filter(self, filterpats, filename, data):
2358 2359 for mf, fn, cmd in filterpats:
2359 2360 if mf(filename):
2360 2361 self.ui.debug(
2361 2362 b"filtering %s through %s\n"
2362 2363 % (filename, cmd or pycompat.sysbytes(fn.__name__))
2363 2364 )
2364 2365 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2365 2366 break
2366 2367
2367 2368 return data
2368 2369
2369 2370 @unfilteredpropertycache
2370 2371 def _encodefilterpats(self):
2371 2372 return self._loadfilter(b'encode')
2372 2373
2373 2374 @unfilteredpropertycache
2374 2375 def _decodefilterpats(self):
2375 2376 return self._loadfilter(b'decode')
2376 2377
2377 2378 def adddatafilter(self, name, filter):
2378 2379 self._datafilters[name] = filter
2379 2380
2380 2381 def wread(self, filename: bytes) -> bytes:
2381 2382 if self.wvfs.islink(filename):
2382 2383 data = self.wvfs.readlink(filename)
2383 2384 else:
2384 2385 data = self.wvfs.read(filename)
2385 2386 return self._filter(self._encodefilterpats, filename, data)
2386 2387
2387 2388 def wwrite(
2388 2389 self,
2389 2390 filename: bytes,
2390 2391 data: bytes,
2391 2392 flags: bytes,
2392 2393 backgroundclose=False,
2393 2394 **kwargs
2394 2395 ) -> int:
2395 2396 """write ``data`` into ``filename`` in the working directory
2396 2397
2397 2398 This returns length of written (maybe decoded) data.
2398 2399 """
2399 2400 data = self._filter(self._decodefilterpats, filename, data)
2400 2401 if b'l' in flags:
2401 2402 self.wvfs.symlink(data, filename)
2402 2403 else:
2403 2404 self.wvfs.write(
2404 2405 filename, data, backgroundclose=backgroundclose, **kwargs
2405 2406 )
2406 2407 if b'x' in flags:
2407 2408 self.wvfs.setflags(filename, False, True)
2408 2409 else:
2409 2410 self.wvfs.setflags(filename, False, False)
2410 2411 return len(data)
2411 2412
2412 2413 def wwritedata(self, filename: bytes, data: bytes) -> bytes:
2413 2414 return self._filter(self._decodefilterpats, filename, data)
2414 2415
2415 2416 def currenttransaction(self):
2416 2417 """return the current transaction or None if non exists"""
2417 2418 if self._transref:
2418 2419 tr = self._transref()
2419 2420 else:
2420 2421 tr = None
2421 2422
2422 2423 if tr and tr.running():
2423 2424 return tr
2424 2425 return None
2425 2426
2426 2427 def transaction(self, desc, report=None):
2427 2428 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2428 2429 b'devel', b'check-locks'
2429 2430 ):
2430 2431 if self._currentlock(self._lockref) is None:
2431 2432 raise error.ProgrammingError(b'transaction requires locking')
2432 2433 tr = self.currenttransaction()
2433 2434 if tr is not None:
2434 2435 return tr.nest(name=desc)
2435 2436
2436 2437 # abort here if the journal already exists
2437 2438 if self.svfs.exists(b"journal"):
2438 2439 raise error.RepoError(
2439 2440 _(b"abandoned transaction found"),
2440 2441 hint=_(b"run 'hg recover' to clean up transaction"),
2441 2442 )
2442 2443
2443 2444 # At that point your dirstate should be clean:
2444 2445 #
2445 2446 # - If you don't have the wlock, why would you still have a dirty
2446 2447 # dirstate ?
2447 2448 #
2448 2449 # - If you hold the wlock, you should not be opening a transaction in
2449 2450 # the middle of a `distate.changing_*` block. The transaction needs to
2450 2451 # be open before that and wrap the change-context.
2451 2452 #
2452 2453 # - If you are not within a `dirstate.changing_*` context, why is our
2453 2454 # dirstate dirty?
2454 2455 if self.dirstate._dirty:
2455 2456 m = "cannot open a transaction with a dirty dirstate"
2456 2457 raise error.ProgrammingError(m)
2457 2458
2458 2459 idbase = b"%.40f#%f" % (random.random(), time.time())
2459 2460 ha = hex(hashutil.sha1(idbase).digest())
2460 2461 txnid = b'TXN:' + ha
2461 2462 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2462 2463
2463 2464 self._writejournal(desc)
2464 2465 if report:
2465 2466 rp = report
2466 2467 else:
2467 2468 rp = self.ui.warn
2468 2469 vfsmap = self.vfs_map
2469 2470 # we must avoid cyclic reference between repo and transaction.
2470 2471 reporef = weakref.ref(self)
2471 2472 # Code to track tag movement
2472 2473 #
2473 2474 # Since tags are all handled as file content, it is actually quite hard
2474 2475 # to track these movement from a code perspective. So we fallback to a
2475 2476 # tracking at the repository level. One could envision to track changes
2476 2477 # to the '.hgtags' file through changegroup apply but that fails to
2477 2478 # cope with case where transaction expose new heads without changegroup
2478 2479 # being involved (eg: phase movement).
2479 2480 #
2480 2481 # For now, We gate the feature behind a flag since this likely comes
2481 2482 # with performance impacts. The current code run more often than needed
2482 2483 # and do not use caches as much as it could. The current focus is on
2483 2484 # the behavior of the feature so we disable it by default. The flag
2484 2485 # will be removed when we are happy with the performance impact.
2485 2486 #
2486 2487 # Once this feature is no longer experimental move the following
2487 2488 # documentation to the appropriate help section:
2488 2489 #
2489 2490 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2490 2491 # tags (new or changed or deleted tags). In addition the details of
2491 2492 # these changes are made available in a file at:
2492 2493 # ``REPOROOT/.hg/changes/tags.changes``.
2493 2494 # Make sure you check for HG_TAG_MOVED before reading that file as it
2494 2495 # might exist from a previous transaction even if no tag were touched
2495 2496 # in this one. Changes are recorded in a line base format::
2496 2497 #
2497 2498 # <action> <hex-node> <tag-name>\n
2498 2499 #
2499 2500 # Actions are defined as follow:
2500 2501 # "-R": tag is removed,
2501 2502 # "+A": tag is added,
2502 2503 # "-M": tag is moved (old value),
2503 2504 # "+M": tag is moved (new value),
2504 2505 tracktags = lambda x: None
2505 2506 # experimental config: experimental.hook-track-tags
2506 2507 shouldtracktags = self.ui.configbool(
2507 2508 b'experimental', b'hook-track-tags'
2508 2509 )
2509 2510 if desc != b'strip' and shouldtracktags:
2510 2511 oldheads = self.changelog.headrevs()
2511 2512
2512 2513 def tracktags(tr2):
2513 2514 repo = reporef()
2514 2515 assert repo is not None # help pytype
2515 2516 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2516 2517 newheads = repo.changelog.headrevs()
2517 2518 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2518 2519 # notes: we compare lists here.
2519 2520 # As we do it only once buiding set would not be cheaper
2520 2521 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2521 2522 if changes:
2522 2523 tr2.hookargs[b'tag_moved'] = b'1'
2523 2524 with repo.vfs(
2524 2525 b'changes/tags.changes', b'w', atomictemp=True
2525 2526 ) as changesfile:
2526 2527 # note: we do not register the file to the transaction
2527 2528 # because we needs it to still exist on the transaction
2528 2529 # is close (for txnclose hooks)
2529 2530 tagsmod.writediff(changesfile, changes)
2530 2531
2531 2532 def validate(tr2):
2532 2533 """will run pre-closing hooks"""
2533 2534 # XXX the transaction API is a bit lacking here so we take a hacky
2534 2535 # path for now
2535 2536 #
2536 2537 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2537 2538 # dict is copied before these run. In addition we needs the data
2538 2539 # available to in memory hooks too.
2539 2540 #
2540 2541 # Moreover, we also need to make sure this runs before txnclose
2541 2542 # hooks and there is no "pending" mechanism that would execute
2542 2543 # logic only if hooks are about to run.
2543 2544 #
2544 2545 # Fixing this limitation of the transaction is also needed to track
2545 2546 # other families of changes (bookmarks, phases, obsolescence).
2546 2547 #
2547 2548 # This will have to be fixed before we remove the experimental
2548 2549 # gating.
2549 2550 tracktags(tr2)
2550 2551 repo = reporef()
2551 2552 assert repo is not None # help pytype
2552 2553
2553 2554 singleheadopt = (b'experimental', b'single-head-per-branch')
2554 2555 singlehead = repo.ui.configbool(*singleheadopt)
2555 2556 if singlehead:
2556 2557 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2557 2558 accountclosed = singleheadsub.get(
2558 2559 b"account-closed-heads", False
2559 2560 )
2560 2561 if singleheadsub.get(b"public-changes-only", False):
2561 2562 filtername = b"immutable"
2562 2563 else:
2563 2564 filtername = b"visible"
2564 2565 scmutil.enforcesinglehead(
2565 2566 repo, tr2, desc, accountclosed, filtername
2566 2567 )
2567 2568 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2568 2569 for name, (old, new) in sorted(
2569 2570 tr.changes[b'bookmarks'].items()
2570 2571 ):
2571 2572 args = tr.hookargs.copy()
2572 2573 args.update(bookmarks.preparehookargs(name, old, new))
2573 2574 repo.hook(
2574 2575 b'pretxnclose-bookmark',
2575 2576 throw=True,
2576 2577 **pycompat.strkwargs(args)
2577 2578 )
2578 2579 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2579 2580 cl = repo.unfiltered().changelog
2580 2581 for revs, (old, new) in tr.changes[b'phases']:
2581 2582 for rev in revs:
2582 2583 args = tr.hookargs.copy()
2583 2584 node = hex(cl.node(rev))
2584 2585 args.update(phases.preparehookargs(node, old, new))
2585 2586 repo.hook(
2586 2587 b'pretxnclose-phase',
2587 2588 throw=True,
2588 2589 **pycompat.strkwargs(args)
2589 2590 )
2590 2591
2591 2592 repo.hook(
2592 2593 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2593 2594 )
2594 2595
2595 2596 def releasefn(tr, success):
2596 2597 repo = reporef()
2597 2598 if repo is None:
2598 2599 # If the repo has been GC'd (and this release function is being
2599 2600 # called from transaction.__del__), there's not much we can do,
2600 2601 # so just leave the unfinished transaction there and let the
2601 2602 # user run `hg recover`.
2602 2603 return
2603 2604 if success:
2604 2605 # this should be explicitly invoked here, because
2605 2606 # in-memory changes aren't written out at closing
2606 2607 # transaction, if tr.addfilegenerator (via
2607 2608 # dirstate.write or so) isn't invoked while
2608 2609 # transaction running
2609 2610 repo.dirstate.write(None)
2610 2611 else:
2611 2612 # discard all changes (including ones already written
2612 2613 # out) in this transaction
2613 2614 repo.invalidate(clearfilecache=True)
2614 2615
2615 2616 tr = transaction.transaction(
2616 2617 rp,
2617 2618 self.svfs,
2618 2619 vfsmap,
2619 2620 b"journal",
2620 2621 b"undo",
2621 2622 lambda: None,
2622 2623 self.store.createmode,
2623 2624 validator=validate,
2624 2625 releasefn=releasefn,
2625 2626 checkambigfiles=_cachedfiles,
2626 2627 name=desc,
2627 2628 )
2628 2629 for vfs_id, path in self._journalfiles():
2629 2630 tr.add_journal(vfs_id, path)
2630 2631 tr.changes[b'origrepolen'] = len(self)
2631 2632 tr.changes[b'obsmarkers'] = set()
2632 2633 tr.changes[b'phases'] = []
2633 2634 tr.changes[b'bookmarks'] = {}
2634 2635
2635 2636 tr.hookargs[b'txnid'] = txnid
2636 2637 tr.hookargs[b'txnname'] = desc
2637 2638 tr.hookargs[b'changes'] = tr.changes
2638 2639 # note: writing the fncache only during finalize mean that the file is
2639 2640 # outdated when running hooks. As fncache is used for streaming clone,
2640 2641 # this is not expected to break anything that happen during the hooks.
2641 2642 tr.addfinalize(b'flush-fncache', self.store.write)
2642 2643
2643 2644 def txnclosehook(tr2):
2644 2645 """To be run if transaction is successful, will schedule a hook run"""
2645 2646 # Don't reference tr2 in hook() so we don't hold a reference.
2646 2647 # This reduces memory consumption when there are multiple
2647 2648 # transactions per lock. This can likely go away if issue5045
2648 2649 # fixes the function accumulation.
2649 2650 hookargs = tr2.hookargs
2650 2651
2651 2652 def hookfunc(unused_success):
2652 2653 repo = reporef()
2653 2654 assert repo is not None # help pytype
2654 2655
2655 2656 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2656 2657 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2657 2658 for name, (old, new) in bmchanges:
2658 2659 args = tr.hookargs.copy()
2659 2660 args.update(bookmarks.preparehookargs(name, old, new))
2660 2661 repo.hook(
2661 2662 b'txnclose-bookmark',
2662 2663 throw=False,
2663 2664 **pycompat.strkwargs(args)
2664 2665 )
2665 2666
2666 2667 if hook.hashook(repo.ui, b'txnclose-phase'):
2667 2668 cl = repo.unfiltered().changelog
2668 2669 phasemv = sorted(
2669 2670 tr.changes[b'phases'], key=lambda r: r[0][0]
2670 2671 )
2671 2672 for revs, (old, new) in phasemv:
2672 2673 for rev in revs:
2673 2674 args = tr.hookargs.copy()
2674 2675 node = hex(cl.node(rev))
2675 2676 args.update(phases.preparehookargs(node, old, new))
2676 2677 repo.hook(
2677 2678 b'txnclose-phase',
2678 2679 throw=False,
2679 2680 **pycompat.strkwargs(args)
2680 2681 )
2681 2682
2682 2683 repo.hook(
2683 2684 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2684 2685 )
2685 2686
2686 2687 repo = reporef()
2687 2688 assert repo is not None # help pytype
2688 2689 repo._afterlock(hookfunc)
2689 2690
2690 2691 tr.addfinalize(b'txnclose-hook', txnclosehook)
2691 2692 # Include a leading "-" to make it happen before the transaction summary
2692 2693 # reports registered via scmutil.registersummarycallback() whose names
2693 2694 # are 00-txnreport etc. That way, the caches will be warm when the
2694 2695 # callbacks run.
2695 2696 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2696 2697
2697 2698 def txnaborthook(tr2):
2698 2699 """To be run if transaction is aborted"""
2699 2700 repo = reporef()
2700 2701 assert repo is not None # help pytype
2701 2702 repo.hook(
2702 2703 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2703 2704 )
2704 2705
2705 2706 tr.addabort(b'txnabort-hook', txnaborthook)
2706 2707 # avoid eager cache invalidation. in-memory data should be identical
2707 2708 # to stored data if transaction has no error.
2708 2709 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2709 2710 self._transref = weakref.ref(tr)
2710 2711 scmutil.registersummarycallback(self, tr, desc)
2711 2712 # This only exist to deal with the need of rollback to have viable
2712 2713 # parents at the end of the operation. So backup viable parents at the
2713 2714 # time of this operation.
2714 2715 #
2715 2716 # We only do it when the `wlock` is taken, otherwise other might be
2716 2717 # altering the dirstate under us.
2717 2718 #
2718 2719 # This is really not a great way to do this (first, because we cannot
2719 2720 # always do it). There are more viable alternative that exists
2720 2721 #
2721 2722 # - backing only the working copy parent in a dedicated files and doing
2722 2723 # a clean "keep-update" to them on `hg rollback`.
2723 2724 #
2724 2725 # - slightly changing the behavior an applying a logic similar to "hg
2725 2726 # strip" to pick a working copy destination on `hg rollback`
2726 2727 if self.currentwlock() is not None:
2727 2728 ds = self.dirstate
2728 2729 if not self.vfs.exists(b'branch'):
2729 2730 # force a file to be written if None exist
2730 2731 ds.setbranch(b'default', None)
2731 2732
2732 2733 def backup_dirstate(tr):
2733 2734 for f in ds.all_file_names():
2734 2735 # hardlink backup is okay because `dirstate` is always
2735 2736 # atomically written and possible data file are append only
2736 2737 # and resistant to trailing data.
2737 2738 tr.addbackup(f, hardlink=True, location=b'plain')
2738 2739
2739 2740 tr.addvalidator(b'dirstate-backup', backup_dirstate)
2740 2741 return tr
2741 2742
2742 2743 def _journalfiles(self):
2743 2744 return (
2744 2745 (self.svfs, b'journal'),
2745 2746 (self.vfs, b'journal.desc'),
2746 2747 )
2747 2748
2748 2749 def undofiles(self):
2749 2750 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2750 2751
2751 2752 @unfilteredmethod
2752 2753 def _writejournal(self, desc):
2753 2754 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2754 2755
2755 2756 def recover(self):
2756 2757 with self.lock():
2757 2758 if self.svfs.exists(b"journal"):
2758 2759 self.ui.status(_(b"rolling back interrupted transaction\n"))
2759 2760 vfsmap = self.vfs_map
2760 2761 transaction.rollback(
2761 2762 self.svfs,
2762 2763 vfsmap,
2763 2764 b"journal",
2764 2765 self.ui.warn,
2765 2766 checkambigfiles=_cachedfiles,
2766 2767 )
2767 2768 self.invalidate()
2768 2769 return True
2769 2770 else:
2770 2771 self.ui.warn(_(b"no interrupted transaction available\n"))
2771 2772 return False
2772 2773
2773 2774 def rollback(self, dryrun=False, force=False):
2774 2775 wlock = lock = None
2775 2776 try:
2776 2777 wlock = self.wlock()
2777 2778 lock = self.lock()
2778 2779 if self.svfs.exists(b"undo"):
2779 2780 return self._rollback(dryrun, force)
2780 2781 else:
2781 2782 self.ui.warn(_(b"no rollback information available\n"))
2782 2783 return 1
2783 2784 finally:
2784 2785 release(lock, wlock)
2785 2786
2786 2787 @unfilteredmethod # Until we get smarter cache management
2787 2788 def _rollback(self, dryrun, force):
2788 2789 ui = self.ui
2789 2790
2790 2791 parents = self.dirstate.parents()
2791 2792 try:
2792 2793 args = self.vfs.read(b'undo.desc').splitlines()
2793 2794 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2794 2795 if len(args) >= 3:
2795 2796 detail = args[2]
2796 2797 oldtip = oldlen - 1
2797 2798
2798 2799 if detail and ui.verbose:
2799 2800 msg = _(
2800 2801 b'repository tip rolled back to revision %d'
2801 2802 b' (undo %s: %s)\n'
2802 2803 ) % (oldtip, desc, detail)
2803 2804 else:
2804 2805 msg = _(
2805 2806 b'repository tip rolled back to revision %d (undo %s)\n'
2806 2807 ) % (oldtip, desc)
2807 2808 parentgone = any(self[p].rev() > oldtip for p in parents)
2808 2809 except IOError:
2809 2810 msg = _(b'rolling back unknown transaction\n')
2810 2811 desc = None
2811 2812 parentgone = True
2812 2813
2813 2814 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2814 2815 raise error.Abort(
2815 2816 _(
2816 2817 b'rollback of last commit while not checked out '
2817 2818 b'may lose data'
2818 2819 ),
2819 2820 hint=_(b'use -f to force'),
2820 2821 )
2821 2822
2822 2823 ui.status(msg)
2823 2824 if dryrun:
2824 2825 return 0
2825 2826
2826 2827 self.destroying()
2827 2828 vfsmap = self.vfs_map
2828 2829 skip_journal_pattern = None
2829 2830 if not parentgone:
2830 2831 skip_journal_pattern = RE_SKIP_DIRSTATE_ROLLBACK
2831 2832 transaction.rollback(
2832 2833 self.svfs,
2833 2834 vfsmap,
2834 2835 b'undo',
2835 2836 ui.warn,
2836 2837 checkambigfiles=_cachedfiles,
2837 2838 skip_journal_pattern=skip_journal_pattern,
2838 2839 )
2839 2840 self.invalidate()
2840 2841 self.dirstate.invalidate()
2841 2842
2842 2843 if parentgone:
2843 2844 # replace this with some explicit parent update in the future.
2844 2845 has_node = self.changelog.index.has_node
2845 2846 if not all(has_node(p) for p in self.dirstate._pl):
2846 2847 # There was no dirstate to backup initially, we need to drop
2847 2848 # the existing one.
2848 2849 with self.dirstate.changing_parents(self):
2849 2850 self.dirstate.setparents(self.nullid)
2850 2851 self.dirstate.clear()
2851 2852
2852 2853 parents = tuple([p.rev() for p in self[None].parents()])
2853 2854 if len(parents) > 1:
2854 2855 ui.status(
2855 2856 _(
2856 2857 b'working directory now based on '
2857 2858 b'revisions %d and %d\n'
2858 2859 )
2859 2860 % parents
2860 2861 )
2861 2862 else:
2862 2863 ui.status(
2863 2864 _(b'working directory now based on revision %d\n') % parents
2864 2865 )
2865 2866 mergestatemod.mergestate.clean(self)
2866 2867
2867 2868 # TODO: if we know which new heads may result from this rollback, pass
2868 2869 # them to destroy(), which will prevent the branchhead cache from being
2869 2870 # invalidated.
2870 2871 self.destroyed()
2871 2872 return 0
2872 2873
2873 2874 def _buildcacheupdater(self, newtransaction):
2874 2875 """called during transaction to build the callback updating cache
2875 2876
2876 2877 Lives on the repository to help extension who might want to augment
2877 2878 this logic. For this purpose, the created transaction is passed to the
2878 2879 method.
2879 2880 """
2880 2881 # we must avoid cyclic reference between repo and transaction.
2881 2882 reporef = weakref.ref(self)
2882 2883
2883 2884 def updater(tr):
2884 2885 repo = reporef()
2885 2886 assert repo is not None # help pytype
2886 2887 repo.updatecaches(tr)
2887 2888
2888 2889 return updater
2889 2890
2890 2891 @unfilteredmethod
2891 2892 def updatecaches(self, tr=None, full=False, caches=None):
2892 2893 """warm appropriate caches
2893 2894
2894 2895 If this function is called after a transaction closed. The transaction
2895 2896 will be available in the 'tr' argument. This can be used to selectively
2896 2897 update caches relevant to the changes in that transaction.
2897 2898
2898 2899 If 'full' is set, make sure all caches the function knows about have
2899 2900 up-to-date data. Even the ones usually loaded more lazily.
2900 2901
2901 2902 The `full` argument can take a special "post-clone" value. In this case
2902 2903 the cache warming is made after a clone and of the slower cache might
2903 2904 be skipped, namely the `.fnodetags` one. This argument is 5.8 specific
2904 2905 as we plan for a cleaner way to deal with this for 5.9.
2905 2906 """
2906 2907 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2907 2908 # During strip, many caches are invalid but
2908 2909 # later call to `destroyed` will refresh them.
2909 2910 return
2910 2911
2911 2912 unfi = self.unfiltered()
2912 2913
2913 2914 if full:
2914 2915 msg = (
2915 2916 "`full` argument for `repo.updatecaches` is deprecated\n"
2916 2917 "(use `caches=repository.CACHE_ALL` instead)"
2917 2918 )
2918 2919 self.ui.deprecwarn(msg, b"5.9")
2919 2920 caches = repository.CACHES_ALL
2920 2921 if full == b"post-clone":
2921 2922 caches = repository.CACHES_POST_CLONE
2922 2923 caches = repository.CACHES_ALL
2923 2924 elif caches is None:
2924 2925 caches = repository.CACHES_DEFAULT
2925 2926
2926 2927 if repository.CACHE_BRANCHMAP_SERVED in caches:
2927 2928 if tr is None or tr.changes[b'origrepolen'] < len(self):
2928 2929 # accessing the 'served' branchmap should refresh all the others,
2929 2930 self.ui.debug(b'updating the branch cache\n')
2930 2931 self.filtered(b'served').branchmap()
2931 2932 self.filtered(b'served.hidden').branchmap()
2932 2933 # flush all possibly delayed write.
2933 2934 self._branchcaches.write_delayed(self)
2934 2935
2935 2936 if repository.CACHE_CHANGELOG_CACHE in caches:
2936 2937 self.changelog.update_caches(transaction=tr)
2937 2938
2938 2939 if repository.CACHE_MANIFESTLOG_CACHE in caches:
2939 2940 self.manifestlog.update_caches(transaction=tr)
2940 2941 for entry in self.store.walk():
2941 2942 if not entry.is_revlog:
2942 2943 continue
2943 2944 if not entry.is_manifestlog:
2944 2945 continue
2945 2946 manifestrevlog = entry.get_revlog_instance(self).get_revlog()
2946 2947 if manifestrevlog is not None:
2947 2948 manifestrevlog.update_caches(transaction=tr)
2948 2949
2949 2950 if repository.CACHE_REV_BRANCH in caches:
2950 2951 rbc = unfi.revbranchcache()
2951 2952 for r in unfi.changelog:
2952 2953 rbc.branchinfo(r)
2953 2954 rbc.write()
2954 2955
2955 2956 if repository.CACHE_FULL_MANIFEST in caches:
2956 2957 # ensure the working copy parents are in the manifestfulltextcache
2957 2958 for ctx in self[b'.'].parents():
2958 2959 ctx.manifest() # accessing the manifest is enough
2959 2960
2960 2961 if repository.CACHE_FILE_NODE_TAGS in caches:
2961 2962 # accessing fnode cache warms the cache
2962 2963 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2963 2964
2964 2965 if repository.CACHE_TAGS_DEFAULT in caches:
2965 2966 # accessing tags warm the cache
2966 2967 self.tags()
2967 2968 if repository.CACHE_TAGS_SERVED in caches:
2968 2969 self.filtered(b'served').tags()
2969 2970
2970 2971 if repository.CACHE_BRANCHMAP_ALL in caches:
2971 2972 # The CACHE_BRANCHMAP_ALL updates lazily-loaded caches immediately,
2972 2973 # so we're forcing a write to cause these caches to be warmed up
2973 2974 # even if they haven't explicitly been requested yet (if they've
2974 2975 # never been used by hg, they won't ever have been written, even if
2975 2976 # they're a subset of another kind of cache that *has* been used).
2976 2977 for filt in repoview.filtertable.keys():
2977 2978 filtered = self.filtered(filt)
2978 2979 filtered.branchmap().write(filtered)
2979 2980
2980 2981 def invalidatecaches(self):
2981 2982 if '_tagscache' in vars(self):
2982 2983 # can't use delattr on proxy
2983 2984 del self.__dict__['_tagscache']
2984 2985
2985 2986 self._branchcaches.clear()
2986 2987 self.invalidatevolatilesets()
2987 2988 self._sparsesignaturecache.clear()
2988 2989
2989 2990 def invalidatevolatilesets(self):
2990 2991 self.filteredrevcache.clear()
2991 2992 obsolete.clearobscaches(self)
2992 2993 self._quick_access_changeid_invalidate()
2993 2994
2994 2995 def invalidatedirstate(self):
2995 2996 """Invalidates the dirstate, causing the next call to dirstate
2996 2997 to check if it was modified since the last time it was read,
2997 2998 rereading it if it has.
2998 2999
2999 3000 This is different to dirstate.invalidate() that it doesn't always
3000 3001 rereads the dirstate. Use dirstate.invalidate() if you want to
3001 3002 explicitly read the dirstate again (i.e. restoring it to a previous
3002 3003 known good state)."""
3003 3004 unfi = self.unfiltered()
3004 3005 if 'dirstate' in unfi.__dict__:
3005 3006 assert not self.dirstate.is_changing_any
3006 3007 del unfi.__dict__['dirstate']
3007 3008
3008 3009 def invalidate(self, clearfilecache=False):
3009 3010 """Invalidates both store and non-store parts other than dirstate
3010 3011
3011 3012 If a transaction is running, invalidation of store is omitted,
3012 3013 because discarding in-memory changes might cause inconsistency
3013 3014 (e.g. incomplete fncache causes unintentional failure, but
3014 3015 redundant one doesn't).
3015 3016 """
3016 3017 unfiltered = self.unfiltered() # all file caches are stored unfiltered
3017 3018 for k in list(self._filecache.keys()):
3018 3019 if (
3019 3020 k == b'changelog'
3020 3021 and self.currenttransaction()
3021 3022 and self.changelog._delayed
3022 3023 ):
3023 3024 # The changelog object may store unwritten revisions. We don't
3024 3025 # want to lose them.
3025 3026 # TODO: Solve the problem instead of working around it.
3026 3027 continue
3027 3028
3028 3029 if clearfilecache:
3029 3030 del self._filecache[k]
3030 3031 try:
3031 3032 # XXX ideally, the key would be a unicode string to match the
3032 3033 # fact it refers to an attribut name. However changing this was
3033 3034 # a bit a scope creep compared to the series cleaning up
3034 3035 # del/set/getattr so we kept thing simple here.
3035 3036 delattr(unfiltered, pycompat.sysstr(k))
3036 3037 except AttributeError:
3037 3038 pass
3038 3039 self.invalidatecaches()
3039 3040 if not self.currenttransaction():
3040 3041 # TODO: Changing contents of store outside transaction
3041 3042 # causes inconsistency. We should make in-memory store
3042 3043 # changes detectable, and abort if changed.
3043 3044 self.store.invalidatecaches()
3044 3045
3045 3046 def invalidateall(self):
3046 3047 """Fully invalidates both store and non-store parts, causing the
3047 3048 subsequent operation to reread any outside changes."""
3048 3049 # extension should hook this to invalidate its caches
3049 3050 self.invalidate()
3050 3051 self.invalidatedirstate()
3051 3052
3052 3053 @unfilteredmethod
3053 3054 def _refreshfilecachestats(self, tr):
3054 3055 """Reload stats of cached files so that they are flagged as valid"""
3055 3056 for k, ce in self._filecache.items():
3056 3057 k = pycompat.sysstr(k)
3057 3058 if k == 'dirstate' or k not in self.__dict__:
3058 3059 continue
3059 3060 ce.refresh()
3060 3061
3061 3062 def _lock(
3062 3063 self,
3063 3064 vfs,
3064 3065 lockname,
3065 3066 wait,
3066 3067 releasefn,
3067 3068 acquirefn,
3068 3069 desc,
3069 3070 ):
3070 3071 timeout = 0
3071 3072 warntimeout = 0
3072 3073 if wait:
3073 3074 timeout = self.ui.configint(b"ui", b"timeout")
3074 3075 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
3075 3076 # internal config: ui.signal-safe-lock
3076 3077 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
3077 3078
3078 3079 l = lockmod.trylock(
3079 3080 self.ui,
3080 3081 vfs,
3081 3082 lockname,
3082 3083 timeout,
3083 3084 warntimeout,
3084 3085 releasefn=releasefn,
3085 3086 acquirefn=acquirefn,
3086 3087 desc=desc,
3087 3088 signalsafe=signalsafe,
3088 3089 )
3089 3090 return l
3090 3091
3091 3092 def _afterlock(self, callback):
3092 3093 """add a callback to be run when the repository is fully unlocked
3093 3094
3094 3095 The callback will be executed when the outermost lock is released
3095 3096 (with wlock being higher level than 'lock')."""
3096 3097 for ref in (self._wlockref, self._lockref):
3097 3098 l = ref and ref()
3098 3099 if l and l.held:
3099 3100 l.postrelease.append(callback)
3100 3101 break
3101 3102 else: # no lock have been found.
3102 3103 callback(True)
3103 3104
3104 3105 def lock(self, wait=True):
3105 3106 """Lock the repository store (.hg/store) and return a weak reference
3106 3107 to the lock. Use this before modifying the store (e.g. committing or
3107 3108 stripping). If you are opening a transaction, get a lock as well.)
3108 3109
3109 3110 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3110 3111 'wlock' first to avoid a dead-lock hazard."""
3111 3112 l = self._currentlock(self._lockref)
3112 3113 if l is not None:
3113 3114 l.lock()
3114 3115 return l
3115 3116
3116 3117 l = self._lock(
3117 3118 vfs=self.svfs,
3118 3119 lockname=b"lock",
3119 3120 wait=wait,
3120 3121 releasefn=None,
3121 3122 acquirefn=self.invalidate,
3122 3123 desc=_(b'repository %s') % self.origroot,
3123 3124 )
3124 3125 self._lockref = weakref.ref(l)
3125 3126 return l
3126 3127
3127 3128 def wlock(self, wait=True):
3128 3129 """Lock the non-store parts of the repository (everything under
3129 3130 .hg except .hg/store) and return a weak reference to the lock.
3130 3131
3131 3132 Use this before modifying files in .hg.
3132 3133
3133 3134 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3134 3135 'wlock' first to avoid a dead-lock hazard."""
3135 3136 l = self._wlockref() if self._wlockref else None
3136 3137 if l is not None and l.held:
3137 3138 l.lock()
3138 3139 return l
3139 3140
3140 3141 # We do not need to check for non-waiting lock acquisition. Such
3141 3142 # acquisition would not cause dead-lock as they would just fail.
3142 3143 if wait and (
3143 3144 self.ui.configbool(b'devel', b'all-warnings')
3144 3145 or self.ui.configbool(b'devel', b'check-locks')
3145 3146 ):
3146 3147 if self._currentlock(self._lockref) is not None:
3147 3148 self.ui.develwarn(b'"wlock" acquired after "lock"')
3148 3149
3149 3150 def unlock():
3150 3151 if self.dirstate.is_changing_any:
3151 3152 msg = b"wlock release in the middle of a changing parents"
3152 3153 self.ui.develwarn(msg)
3153 3154 self.dirstate.invalidate()
3154 3155 else:
3155 3156 if self.dirstate._dirty:
3156 3157 msg = b"dirty dirstate on wlock release"
3157 3158 self.ui.develwarn(msg)
3158 3159 self.dirstate.write(None)
3159 3160
3160 3161 unfi = self.unfiltered()
3161 3162 if 'dirstate' in unfi.__dict__:
3162 3163 del unfi.__dict__['dirstate']
3163 3164
3164 3165 l = self._lock(
3165 3166 self.vfs,
3166 3167 b"wlock",
3167 3168 wait,
3168 3169 unlock,
3169 3170 self.invalidatedirstate,
3170 3171 _(b'working directory of %s') % self.origroot,
3171 3172 )
3172 3173 self._wlockref = weakref.ref(l)
3173 3174 return l
3174 3175
3175 3176 def _currentlock(self, lockref):
3176 3177 """Returns the lock if it's held, or None if it's not."""
3177 3178 if lockref is None:
3178 3179 return None
3179 3180 l = lockref()
3180 3181 if l is None or not l.held:
3181 3182 return None
3182 3183 return l
3183 3184
3184 3185 def currentwlock(self):
3185 3186 """Returns the wlock if it's held, or None if it's not."""
3186 3187 return self._currentlock(self._wlockref)
3187 3188
3188 3189 def currentlock(self):
3189 3190 """Returns the lock if it's held, or None if it's not."""
3190 3191 return self._currentlock(self._lockref)
3191 3192
3192 3193 def checkcommitpatterns(self, wctx, match, status, fail):
3193 3194 """check for commit arguments that aren't committable"""
3194 3195 if match.isexact() or match.prefix():
3195 3196 matched = set(status.modified + status.added + status.removed)
3196 3197
3197 3198 for f in match.files():
3198 3199 f = self.dirstate.normalize(f)
3199 3200 if f == b'.' or f in matched or f in wctx.substate:
3200 3201 continue
3201 3202 if f in status.deleted:
3202 3203 fail(f, _(b'file not found!'))
3203 3204 # Is it a directory that exists or used to exist?
3204 3205 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
3205 3206 d = f + b'/'
3206 3207 for mf in matched:
3207 3208 if mf.startswith(d):
3208 3209 break
3209 3210 else:
3210 3211 fail(f, _(b"no match under directory!"))
3211 3212 elif f not in self.dirstate:
3212 3213 fail(f, _(b"file not tracked!"))
3213 3214
3214 3215 @unfilteredmethod
3215 3216 def commit(
3216 3217 self,
3217 3218 text=b"",
3218 3219 user=None,
3219 3220 date=None,
3220 3221 match=None,
3221 3222 force=False,
3222 3223 editor=None,
3223 3224 extra=None,
3224 3225 ):
3225 3226 """Add a new revision to current repository.
3226 3227
3227 3228 Revision information is gathered from the working directory,
3228 3229 match can be used to filter the committed files. If editor is
3229 3230 supplied, it is called to get a commit message.
3230 3231 """
3231 3232 if extra is None:
3232 3233 extra = {}
3233 3234
3234 3235 def fail(f, msg):
3235 3236 raise error.InputError(b'%s: %s' % (f, msg))
3236 3237
3237 3238 if not match:
3238 3239 match = matchmod.always()
3239 3240
3240 3241 if not force:
3241 3242 match.bad = fail
3242 3243
3243 3244 # lock() for recent changelog (see issue4368)
3244 3245 with self.wlock(), self.lock():
3245 3246 wctx = self[None]
3246 3247 merge = len(wctx.parents()) > 1
3247 3248
3248 3249 if not force and merge and not match.always():
3249 3250 raise error.Abort(
3250 3251 _(
3251 3252 b'cannot partially commit a merge '
3252 3253 b'(do not specify files or patterns)'
3253 3254 )
3254 3255 )
3255 3256
3256 3257 status = self.status(match=match, clean=force)
3257 3258 if force:
3258 3259 status.modified.extend(
3259 3260 status.clean
3260 3261 ) # mq may commit clean files
3261 3262
3262 3263 # check subrepos
3263 3264 subs, commitsubs, newstate = subrepoutil.precommit(
3264 3265 self.ui, wctx, status, match, force=force
3265 3266 )
3266 3267
3267 3268 # make sure all explicit patterns are matched
3268 3269 if not force:
3269 3270 self.checkcommitpatterns(wctx, match, status, fail)
3270 3271
3271 3272 cctx = context.workingcommitctx(
3272 3273 self, status, text, user, date, extra
3273 3274 )
3274 3275
3275 3276 ms = mergestatemod.mergestate.read(self)
3276 3277 mergeutil.checkunresolved(ms)
3277 3278
3278 3279 # internal config: ui.allowemptycommit
3279 3280 if cctx.isempty() and not self.ui.configbool(
3280 3281 b'ui', b'allowemptycommit'
3281 3282 ):
3282 3283 self.ui.debug(b'nothing to commit, clearing merge state\n')
3283 3284 ms.reset()
3284 3285 return None
3285 3286
3286 3287 if merge and cctx.deleted():
3287 3288 raise error.Abort(_(b"cannot commit merge with missing files"))
3288 3289
3289 3290 if editor:
3290 3291 cctx._text = editor(self, cctx, subs)
3291 3292 edited = text != cctx._text
3292 3293
3293 3294 # Save commit message in case this transaction gets rolled back
3294 3295 # (e.g. by a pretxncommit hook). Leave the content alone on
3295 3296 # the assumption that the user will use the same editor again.
3296 3297 msg_path = self.savecommitmessage(cctx._text)
3297 3298
3298 3299 # commit subs and write new state
3299 3300 if subs:
3300 3301 uipathfn = scmutil.getuipathfn(self)
3301 3302 for s in sorted(commitsubs):
3302 3303 sub = wctx.sub(s)
3303 3304 self.ui.status(
3304 3305 _(b'committing subrepository %s\n')
3305 3306 % uipathfn(subrepoutil.subrelpath(sub))
3306 3307 )
3307 3308 sr = sub.commit(cctx._text, user, date)
3308 3309 newstate[s] = (newstate[s][0], sr)
3309 3310 subrepoutil.writestate(self, newstate)
3310 3311
3311 3312 p1, p2 = self.dirstate.parents()
3312 3313 hookp1, hookp2 = hex(p1), (p2 != self.nullid and hex(p2) or b'')
3313 3314 try:
3314 3315 self.hook(
3315 3316 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3316 3317 )
3317 3318 with self.transaction(b'commit'):
3318 3319 ret = self.commitctx(cctx, True)
3319 3320 # update bookmarks, dirstate and mergestate
3320 3321 bookmarks.update(self, [p1, p2], ret)
3321 3322 cctx.markcommitted(ret)
3322 3323 ms.reset()
3323 3324 except: # re-raises
3324 3325 if edited:
3325 3326 self.ui.write(
3326 3327 _(b'note: commit message saved in %s\n') % msg_path
3327 3328 )
3328 3329 self.ui.write(
3329 3330 _(
3330 3331 b"note: use 'hg commit --logfile "
3331 3332 b"%s --edit' to reuse it\n"
3332 3333 )
3333 3334 % msg_path
3334 3335 )
3335 3336 raise
3336 3337
3337 3338 def commithook(unused_success):
3338 3339 # hack for command that use a temporary commit (eg: histedit)
3339 3340 # temporary commit got stripped before hook release
3340 3341 if self.changelog.hasnode(ret):
3341 3342 self.hook(
3342 3343 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3343 3344 )
3344 3345
3345 3346 self._afterlock(commithook)
3346 3347 return ret
3347 3348
3348 3349 @unfilteredmethod
3349 3350 def commitctx(self, ctx, error=False, origctx=None):
3350 3351 return commit.commitctx(self, ctx, error=error, origctx=origctx)
3351 3352
3352 3353 @unfilteredmethod
3353 3354 def destroying(self):
3354 3355 """Inform the repository that nodes are about to be destroyed.
3355 3356 Intended for use by strip and rollback, so there's a common
3356 3357 place for anything that has to be done before destroying history.
3357 3358
3358 3359 This is mostly useful for saving state that is in memory and waiting
3359 3360 to be flushed when the current lock is released. Because a call to
3360 3361 destroyed is imminent, the repo will be invalidated causing those
3361 3362 changes to stay in memory (waiting for the next unlock), or vanish
3362 3363 completely.
3363 3364 """
3364 3365 # When using the same lock to commit and strip, the phasecache is left
3365 3366 # dirty after committing. Then when we strip, the repo is invalidated,
3366 3367 # causing those changes to disappear.
3367 3368 if '_phasecache' in vars(self):
3368 3369 self._phasecache.write()
3369 3370
3370 3371 @unfilteredmethod
3371 3372 def destroyed(self):
3372 3373 """Inform the repository that nodes have been destroyed.
3373 3374 Intended for use by strip and rollback, so there's a common
3374 3375 place for anything that has to be done after destroying history.
3375 3376 """
3376 3377 # When one tries to:
3377 3378 # 1) destroy nodes thus calling this method (e.g. strip)
3378 3379 # 2) use phasecache somewhere (e.g. commit)
3379 3380 #
3380 3381 # then 2) will fail because the phasecache contains nodes that were
3381 3382 # removed. We can either remove phasecache from the filecache,
3382 3383 # causing it to reload next time it is accessed, or simply filter
3383 3384 # the removed nodes now and write the updated cache.
3384 3385 self._phasecache.filterunknown(self)
3385 3386 self._phasecache.write()
3386 3387
3387 3388 # refresh all repository caches
3388 3389 self.updatecaches()
3389 3390
3390 3391 # Ensure the persistent tag cache is updated. Doing it now
3391 3392 # means that the tag cache only has to worry about destroyed
3392 3393 # heads immediately after a strip/rollback. That in turn
3393 3394 # guarantees that "cachetip == currenttip" (comparing both rev
3394 3395 # and node) always means no nodes have been added or destroyed.
3395 3396
3396 3397 # XXX this is suboptimal when qrefresh'ing: we strip the current
3397 3398 # head, refresh the tag cache, then immediately add a new head.
3398 3399 # But I think doing it this way is necessary for the "instant
3399 3400 # tag cache retrieval" case to work.
3400 3401 self.invalidate()
3401 3402
3402 3403 def status(
3403 3404 self,
3404 3405 node1=b'.',
3405 3406 node2=None,
3406 3407 match=None,
3407 3408 ignored=False,
3408 3409 clean=False,
3409 3410 unknown=False,
3410 3411 listsubrepos=False,
3411 3412 ):
3412 3413 '''a convenience method that calls node1.status(node2)'''
3413 3414 return self[node1].status(
3414 3415 node2, match, ignored, clean, unknown, listsubrepos
3415 3416 )
3416 3417
3417 3418 def addpostdsstatus(self, ps):
3418 3419 """Add a callback to run within the wlock, at the point at which status
3419 3420 fixups happen.
3420 3421
3421 3422 On status completion, callback(wctx, status) will be called with the
3422 3423 wlock held, unless the dirstate has changed from underneath or the wlock
3423 3424 couldn't be grabbed.
3424 3425
3425 3426 Callbacks should not capture and use a cached copy of the dirstate --
3426 3427 it might change in the meanwhile. Instead, they should access the
3427 3428 dirstate via wctx.repo().dirstate.
3428 3429
3429 3430 This list is emptied out after each status run -- extensions should
3430 3431 make sure it adds to this list each time dirstate.status is called.
3431 3432 Extensions should also make sure they don't call this for statuses
3432 3433 that don't involve the dirstate.
3433 3434 """
3434 3435
3435 3436 # The list is located here for uniqueness reasons -- it is actually
3436 3437 # managed by the workingctx, but that isn't unique per-repo.
3437 3438 self._postdsstatus.append(ps)
3438 3439
3439 3440 def postdsstatus(self):
3440 3441 """Used by workingctx to get the list of post-dirstate-status hooks."""
3441 3442 return self._postdsstatus
3442 3443
3443 3444 def clearpostdsstatus(self):
3444 3445 """Used by workingctx to clear post-dirstate-status hooks."""
3445 3446 del self._postdsstatus[:]
3446 3447
3447 3448 def heads(self, start=None):
3448 3449 if start is None:
3449 3450 cl = self.changelog
3450 3451 headrevs = reversed(cl.headrevs())
3451 3452 return [cl.node(rev) for rev in headrevs]
3452 3453
3453 3454 heads = self.changelog.heads(start)
3454 3455 # sort the output in rev descending order
3455 3456 return sorted(heads, key=self.changelog.rev, reverse=True)
3456 3457
3457 3458 def branchheads(self, branch=None, start=None, closed=False):
3458 3459 """return a (possibly filtered) list of heads for the given branch
3459 3460
3460 3461 Heads are returned in topological order, from newest to oldest.
3461 3462 If branch is None, use the dirstate branch.
3462 3463 If start is not None, return only heads reachable from start.
3463 3464 If closed is True, return heads that are marked as closed as well.
3464 3465 """
3465 3466 if branch is None:
3466 3467 branch = self[None].branch()
3467 3468 branches = self.branchmap()
3468 3469 if not branches.hasbranch(branch):
3469 3470 return []
3470 3471 # the cache returns heads ordered lowest to highest
3471 3472 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3472 3473 if start is not None:
3473 3474 # filter out the heads that cannot be reached from startrev
3474 3475 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3475 3476 bheads = [h for h in bheads if h in fbheads]
3476 3477 return bheads
3477 3478
3478 3479 def branches(self, nodes):
3479 3480 if not nodes:
3480 3481 nodes = [self.changelog.tip()]
3481 3482 b = []
3482 3483 for n in nodes:
3483 3484 t = n
3484 3485 while True:
3485 3486 p = self.changelog.parents(n)
3486 3487 if p[1] != self.nullid or p[0] == self.nullid:
3487 3488 b.append((t, n, p[0], p[1]))
3488 3489 break
3489 3490 n = p[0]
3490 3491 return b
3491 3492
3492 3493 def between(self, pairs):
3493 3494 r = []
3494 3495
3495 3496 for top, bottom in pairs:
3496 3497 n, l, i = top, [], 0
3497 3498 f = 1
3498 3499
3499 3500 while n != bottom and n != self.nullid:
3500 3501 p = self.changelog.parents(n)[0]
3501 3502 if i == f:
3502 3503 l.append(n)
3503 3504 f = f * 2
3504 3505 n = p
3505 3506 i += 1
3506 3507
3507 3508 r.append(l)
3508 3509
3509 3510 return r
3510 3511
3511 3512 def checkpush(self, pushop):
3512 3513 """Extensions can override this function if additional checks have
3513 3514 to be performed before pushing, or call it if they override push
3514 3515 command.
3515 3516 """
3516 3517
3517 3518 @unfilteredpropertycache
3518 3519 def prepushoutgoinghooks(self):
3519 3520 """Return util.hooks consists of a pushop with repo, remote, outgoing
3520 3521 methods, which are called before pushing changesets.
3521 3522 """
3522 3523 return util.hooks()
3523 3524
3524 3525 def pushkey(self, namespace, key, old, new):
3525 3526 try:
3526 3527 tr = self.currenttransaction()
3527 3528 hookargs = {}
3528 3529 if tr is not None:
3529 3530 hookargs.update(tr.hookargs)
3530 3531 hookargs = pycompat.strkwargs(hookargs)
3531 3532 hookargs['namespace'] = namespace
3532 3533 hookargs['key'] = key
3533 3534 hookargs['old'] = old
3534 3535 hookargs['new'] = new
3535 3536 self.hook(b'prepushkey', throw=True, **hookargs)
3536 3537 except error.HookAbort as exc:
3537 3538 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3538 3539 if exc.hint:
3539 3540 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3540 3541 return False
3541 3542 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3542 3543 ret = pushkey.push(self, namespace, key, old, new)
3543 3544
3544 3545 def runhook(unused_success):
3545 3546 self.hook(
3546 3547 b'pushkey',
3547 3548 namespace=namespace,
3548 3549 key=key,
3549 3550 old=old,
3550 3551 new=new,
3551 3552 ret=ret,
3552 3553 )
3553 3554
3554 3555 self._afterlock(runhook)
3555 3556 return ret
3556 3557
3557 3558 def listkeys(self, namespace):
3558 3559 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3559 3560 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3560 3561 values = pushkey.list(self, namespace)
3561 3562 self.hook(b'listkeys', namespace=namespace, values=values)
3562 3563 return values
3563 3564
3564 3565 def debugwireargs(self, one, two, three=None, four=None, five=None):
3565 3566 '''used to test argument passing over the wire'''
3566 3567 return b"%s %s %s %s %s" % (
3567 3568 one,
3568 3569 two,
3569 3570 pycompat.bytestr(three),
3570 3571 pycompat.bytestr(four),
3571 3572 pycompat.bytestr(five),
3572 3573 )
3573 3574
3574 3575 def savecommitmessage(self, text):
3575 3576 fp = self.vfs(b'last-message.txt', b'wb')
3576 3577 try:
3577 3578 fp.write(text)
3578 3579 finally:
3579 3580 fp.close()
3580 3581 return self.pathto(fp.name[len(self.root) + 1 :])
3581 3582
3582 3583 def register_wanted_sidedata(self, category):
3583 3584 if repository.REPO_FEATURE_SIDE_DATA not in self.features:
3584 3585 # Only revlogv2 repos can want sidedata.
3585 3586 return
3586 3587 self._wanted_sidedata.add(pycompat.bytestr(category))
3587 3588
3588 3589 def register_sidedata_computer(
3589 3590 self, kind, category, keys, computer, flags, replace=False
3590 3591 ):
3591 3592 if kind not in revlogconst.ALL_KINDS:
3592 3593 msg = _(b"unexpected revlog kind '%s'.")
3593 3594 raise error.ProgrammingError(msg % kind)
3594 3595 category = pycompat.bytestr(category)
3595 3596 already_registered = category in self._sidedata_computers.get(kind, [])
3596 3597 if already_registered and not replace:
3597 3598 msg = _(
3598 3599 b"cannot register a sidedata computer twice for category '%s'."
3599 3600 )
3600 3601 raise error.ProgrammingError(msg % category)
3601 3602 if replace and not already_registered:
3602 3603 msg = _(
3603 3604 b"cannot replace a sidedata computer that isn't registered "
3604 3605 b"for category '%s'."
3605 3606 )
3606 3607 raise error.ProgrammingError(msg % category)
3607 3608 self._sidedata_computers.setdefault(kind, {})
3608 3609 self._sidedata_computers[kind][category] = (keys, computer, flags)
3609 3610
3610 3611
3611 3612 def undoname(fn: bytes) -> bytes:
3612 3613 base, name = os.path.split(fn)
3613 3614 assert name.startswith(b'journal')
3614 3615 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3615 3616
3616 3617
3617 3618 def instance(ui, path: bytes, create, intents=None, createopts=None):
3618 3619 # prevent cyclic import localrepo -> upgrade -> localrepo
3619 3620 from . import upgrade
3620 3621
3621 3622 localpath = urlutil.urllocalpath(path)
3622 3623 if create:
3623 3624 createrepository(ui, localpath, createopts=createopts)
3624 3625
3625 3626 def repo_maker():
3626 3627 return makelocalrepository(ui, localpath, intents=intents)
3627 3628
3628 3629 repo = repo_maker()
3629 3630 repo = upgrade.may_auto_upgrade(repo, repo_maker)
3630 3631 return repo
3631 3632
3632 3633
3633 3634 def islocal(path: bytes) -> bool:
3634 3635 return True
3635 3636
3636 3637
3637 3638 def defaultcreateopts(ui, createopts=None):
3638 3639 """Populate the default creation options for a repository.
3639 3640
3640 3641 A dictionary of explicitly requested creation options can be passed
3641 3642 in. Missing keys will be populated.
3642 3643 """
3643 3644 createopts = dict(createopts or {})
3644 3645
3645 3646 if b'backend' not in createopts:
3646 3647 # experimental config: storage.new-repo-backend
3647 3648 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3648 3649
3649 3650 return createopts
3650 3651
3651 3652
3652 3653 def clone_requirements(ui, createopts, srcrepo):
3653 3654 """clone the requirements of a local repo for a local clone
3654 3655
3655 3656 The store requirements are unchanged while the working copy requirements
3656 3657 depends on the configuration
3657 3658 """
3658 3659 target_requirements = set()
3659 3660 if not srcrepo.requirements:
3660 3661 # this is a legacy revlog "v0" repository, we cannot do anything fancy
3661 3662 # with it.
3662 3663 return target_requirements
3663 3664 createopts = defaultcreateopts(ui, createopts=createopts)
3664 3665 for r in newreporequirements(ui, createopts):
3665 3666 if r in requirementsmod.WORKING_DIR_REQUIREMENTS:
3666 3667 target_requirements.add(r)
3667 3668
3668 3669 for r in srcrepo.requirements:
3669 3670 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS:
3670 3671 target_requirements.add(r)
3671 3672 return target_requirements
3672 3673
3673 3674
3674 3675 def newreporequirements(ui, createopts):
3675 3676 """Determine the set of requirements for a new local repository.
3676 3677
3677 3678 Extensions can wrap this function to specify custom requirements for
3678 3679 new repositories.
3679 3680 """
3680 3681
3681 3682 if b'backend' not in createopts:
3682 3683 raise error.ProgrammingError(
3683 3684 b'backend key not present in createopts; '
3684 3685 b'was defaultcreateopts() called?'
3685 3686 )
3686 3687
3687 3688 if createopts[b'backend'] != b'revlogv1':
3688 3689 raise error.Abort(
3689 3690 _(
3690 3691 b'unable to determine repository requirements for '
3691 3692 b'storage backend: %s'
3692 3693 )
3693 3694 % createopts[b'backend']
3694 3695 )
3695 3696
3696 3697 requirements = {requirementsmod.REVLOGV1_REQUIREMENT}
3697 3698 if ui.configbool(b'format', b'usestore'):
3698 3699 requirements.add(requirementsmod.STORE_REQUIREMENT)
3699 3700 if ui.configbool(b'format', b'usefncache'):
3700 3701 requirements.add(requirementsmod.FNCACHE_REQUIREMENT)
3701 3702 if ui.configbool(b'format', b'dotencode'):
3702 3703 requirements.add(requirementsmod.DOTENCODE_REQUIREMENT)
3703 3704
3704 3705 compengines = ui.configlist(b'format', b'revlog-compression')
3705 3706 for compengine in compengines:
3706 3707 if compengine in util.compengines:
3707 3708 engine = util.compengines[compengine]
3708 3709 if engine.available() and engine.revlogheader():
3709 3710 break
3710 3711 else:
3711 3712 raise error.Abort(
3712 3713 _(
3713 3714 b'compression engines %s defined by '
3714 3715 b'format.revlog-compression not available'
3715 3716 )
3716 3717 % b', '.join(b'"%s"' % e for e in compengines),
3717 3718 hint=_(
3718 3719 b'run "hg debuginstall" to list available '
3719 3720 b'compression engines'
3720 3721 ),
3721 3722 )
3722 3723
3723 3724 # zlib is the historical default and doesn't need an explicit requirement.
3724 3725 if compengine == b'zstd':
3725 3726 requirements.add(b'revlog-compression-zstd')
3726 3727 elif compengine != b'zlib':
3727 3728 requirements.add(b'exp-compression-%s' % compengine)
3728 3729
3729 3730 if scmutil.gdinitconfig(ui):
3730 3731 requirements.add(requirementsmod.GENERALDELTA_REQUIREMENT)
3731 3732 if ui.configbool(b'format', b'sparse-revlog'):
3732 3733 requirements.add(requirementsmod.SPARSEREVLOG_REQUIREMENT)
3733 3734
3734 3735 # experimental config: format.use-dirstate-v2
3735 3736 # Keep this logic in sync with `has_dirstate_v2()` in `tests/hghave.py`
3736 3737 if ui.configbool(b'format', b'use-dirstate-v2'):
3737 3738 requirements.add(requirementsmod.DIRSTATE_V2_REQUIREMENT)
3738 3739
3739 3740 # experimental config: format.exp-use-copies-side-data-changeset
3740 3741 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3741 3742 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3742 3743 requirements.add(requirementsmod.COPIESSDC_REQUIREMENT)
3743 3744 if ui.configbool(b'experimental', b'treemanifest'):
3744 3745 requirements.add(requirementsmod.TREEMANIFEST_REQUIREMENT)
3745 3746
3746 3747 changelogv2 = ui.config(b'format', b'exp-use-changelog-v2')
3747 3748 if changelogv2 == b'enable-unstable-format-and-corrupt-my-data':
3748 3749 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3749 3750
3750 3751 revlogv2 = ui.config(b'experimental', b'revlogv2')
3751 3752 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3752 3753 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3753 3754 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3754 3755 # experimental config: format.internal-phase
3755 3756 if ui.configbool(b'format', b'use-internal-phase'):
3756 3757 requirements.add(requirementsmod.INTERNAL_PHASE_REQUIREMENT)
3757 3758
3758 3759 # experimental config: format.exp-archived-phase
3759 3760 if ui.configbool(b'format', b'exp-archived-phase'):
3760 3761 requirements.add(requirementsmod.ARCHIVED_PHASE_REQUIREMENT)
3761 3762
3762 3763 if createopts.get(b'narrowfiles'):
3763 3764 requirements.add(requirementsmod.NARROW_REQUIREMENT)
3764 3765
3765 3766 if createopts.get(b'lfs'):
3766 3767 requirements.add(b'lfs')
3767 3768
3768 3769 if ui.configbool(b'format', b'bookmarks-in-store'):
3769 3770 requirements.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3770 3771
3771 3772 # The feature is disabled unless a fast implementation is available.
3772 3773 persistent_nodemap_default = policy.importrust('revlog') is not None
3773 3774 if ui.configbool(
3774 3775 b'format', b'use-persistent-nodemap', persistent_nodemap_default
3775 3776 ):
3776 3777 requirements.add(requirementsmod.NODEMAP_REQUIREMENT)
3777 3778
3778 3779 # if share-safe is enabled, let's create the new repository with the new
3779 3780 # requirement
3780 3781 if ui.configbool(b'format', b'use-share-safe'):
3781 3782 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
3782 3783
3783 3784 # if we are creating a share-repoΒΉ we have to handle requirement
3784 3785 # differently.
3785 3786 #
3786 3787 # [1] (i.e. reusing the store from another repository, just having a
3787 3788 # working copy)
3788 3789 if b'sharedrepo' in createopts:
3789 3790 source_requirements = set(createopts[b'sharedrepo'].requirements)
3790 3791
3791 3792 if requirementsmod.SHARESAFE_REQUIREMENT not in source_requirements:
3792 3793 # share to an old school repository, we have to copy the
3793 3794 # requirements and hope for the best.
3794 3795 requirements = source_requirements
3795 3796 else:
3796 3797 # We have control on the working copy only, so "copy" the non
3797 3798 # working copy part over, ignoring previous logic.
3798 3799 to_drop = set()
3799 3800 for req in requirements:
3800 3801 if req in requirementsmod.WORKING_DIR_REQUIREMENTS:
3801 3802 continue
3802 3803 if req in source_requirements:
3803 3804 continue
3804 3805 to_drop.add(req)
3805 3806 requirements -= to_drop
3806 3807 requirements |= source_requirements
3807 3808
3808 3809 if createopts.get(b'sharedrelative'):
3809 3810 requirements.add(requirementsmod.RELATIVE_SHARED_REQUIREMENT)
3810 3811 else:
3811 3812 requirements.add(requirementsmod.SHARED_REQUIREMENT)
3812 3813
3813 3814 if ui.configbool(b'format', b'use-dirstate-tracked-hint'):
3814 3815 version = ui.configint(b'format', b'use-dirstate-tracked-hint.version')
3815 3816 msg = _(b"ignoring unknown tracked key version: %d\n")
3816 3817 hint = _(
3817 3818 b"see `hg help config.format.use-dirstate-tracked-hint-version"
3818 3819 )
3819 3820 if version != 1:
3820 3821 ui.warn(msg % version, hint=hint)
3821 3822 else:
3822 3823 requirements.add(requirementsmod.DIRSTATE_TRACKED_HINT_V1)
3823 3824
3824 3825 return requirements
3825 3826
3826 3827
3827 3828 def checkrequirementscompat(ui, requirements):
3828 3829 """Checks compatibility of repository requirements enabled and disabled.
3829 3830
3830 3831 Returns a set of requirements which needs to be dropped because dependend
3831 3832 requirements are not enabled. Also warns users about it"""
3832 3833
3833 3834 dropped = set()
3834 3835
3835 3836 if requirementsmod.STORE_REQUIREMENT not in requirements:
3836 3837 if requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT in requirements:
3837 3838 ui.warn(
3838 3839 _(
3839 3840 b'ignoring enabled \'format.bookmarks-in-store\' config '
3840 3841 b'beacuse it is incompatible with disabled '
3841 3842 b'\'format.usestore\' config\n'
3842 3843 )
3843 3844 )
3844 3845 dropped.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3845 3846
3846 3847 if (
3847 3848 requirementsmod.SHARED_REQUIREMENT in requirements
3848 3849 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
3849 3850 ):
3850 3851 raise error.Abort(
3851 3852 _(
3852 3853 b"cannot create shared repository as source was created"
3853 3854 b" with 'format.usestore' config disabled"
3854 3855 )
3855 3856 )
3856 3857
3857 3858 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
3858 3859 if ui.hasconfig(b'format', b'use-share-safe'):
3859 3860 msg = _(
3860 3861 b"ignoring enabled 'format.use-share-safe' config because "
3861 3862 b"it is incompatible with disabled 'format.usestore'"
3862 3863 b" config\n"
3863 3864 )
3864 3865 ui.warn(msg)
3865 3866 dropped.add(requirementsmod.SHARESAFE_REQUIREMENT)
3866 3867
3867 3868 return dropped
3868 3869
3869 3870
3870 3871 def filterknowncreateopts(ui, createopts):
3871 3872 """Filters a dict of repo creation options against options that are known.
3872 3873
3873 3874 Receives a dict of repo creation options and returns a dict of those
3874 3875 options that we don't know how to handle.
3875 3876
3876 3877 This function is called as part of repository creation. If the
3877 3878 returned dict contains any items, repository creation will not
3878 3879 be allowed, as it means there was a request to create a repository
3879 3880 with options not recognized by loaded code.
3880 3881
3881 3882 Extensions can wrap this function to filter out creation options
3882 3883 they know how to handle.
3883 3884 """
3884 3885 known = {
3885 3886 b'backend',
3886 3887 b'lfs',
3887 3888 b'narrowfiles',
3888 3889 b'sharedrepo',
3889 3890 b'sharedrelative',
3890 3891 b'shareditems',
3891 3892 b'shallowfilestore',
3892 3893 }
3893 3894
3894 3895 return {k: v for k, v in createopts.items() if k not in known}
3895 3896
3896 3897
3897 3898 def createrepository(ui, path: bytes, createopts=None, requirements=None):
3898 3899 """Create a new repository in a vfs.
3899 3900
3900 3901 ``path`` path to the new repo's working directory.
3901 3902 ``createopts`` options for the new repository.
3902 3903 ``requirement`` predefined set of requirements.
3903 3904 (incompatible with ``createopts``)
3904 3905
3905 3906 The following keys for ``createopts`` are recognized:
3906 3907
3907 3908 backend
3908 3909 The storage backend to use.
3909 3910 lfs
3910 3911 Repository will be created with ``lfs`` requirement. The lfs extension
3911 3912 will automatically be loaded when the repository is accessed.
3912 3913 narrowfiles
3913 3914 Set up repository to support narrow file storage.
3914 3915 sharedrepo
3915 3916 Repository object from which storage should be shared.
3916 3917 sharedrelative
3917 3918 Boolean indicating if the path to the shared repo should be
3918 3919 stored as relative. By default, the pointer to the "parent" repo
3919 3920 is stored as an absolute path.
3920 3921 shareditems
3921 3922 Set of items to share to the new repository (in addition to storage).
3922 3923 shallowfilestore
3923 3924 Indicates that storage for files should be shallow (not all ancestor
3924 3925 revisions are known).
3925 3926 """
3926 3927
3927 3928 if requirements is not None:
3928 3929 if createopts is not None:
3929 3930 msg = b'cannot specify both createopts and requirements'
3930 3931 raise error.ProgrammingError(msg)
3931 3932 createopts = {}
3932 3933 else:
3933 3934 createopts = defaultcreateopts(ui, createopts=createopts)
3934 3935
3935 3936 unknownopts = filterknowncreateopts(ui, createopts)
3936 3937
3937 3938 if not isinstance(unknownopts, dict):
3938 3939 raise error.ProgrammingError(
3939 3940 b'filterknowncreateopts() did not return a dict'
3940 3941 )
3941 3942
3942 3943 if unknownopts:
3943 3944 raise error.Abort(
3944 3945 _(
3945 3946 b'unable to create repository because of unknown '
3946 3947 b'creation option: %s'
3947 3948 )
3948 3949 % b', '.join(sorted(unknownopts)),
3949 3950 hint=_(b'is a required extension not loaded?'),
3950 3951 )
3951 3952
3952 3953 requirements = newreporequirements(ui, createopts=createopts)
3953 3954 requirements -= checkrequirementscompat(ui, requirements)
3954 3955
3955 3956 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3956 3957
3957 3958 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3958 3959 if hgvfs.exists():
3959 3960 raise error.RepoError(_(b'repository %s already exists') % path)
3960 3961
3961 3962 if b'sharedrepo' in createopts:
3962 3963 sharedpath = createopts[b'sharedrepo'].sharedpath
3963 3964
3964 3965 if createopts.get(b'sharedrelative'):
3965 3966 try:
3966 3967 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3967 3968 sharedpath = util.pconvert(sharedpath)
3968 3969 except (IOError, ValueError) as e:
3969 3970 # ValueError is raised on Windows if the drive letters differ
3970 3971 # on each path.
3971 3972 raise error.Abort(
3972 3973 _(b'cannot calculate relative path'),
3973 3974 hint=stringutil.forcebytestr(e),
3974 3975 )
3975 3976
3976 3977 if not wdirvfs.exists():
3977 3978 wdirvfs.makedirs()
3978 3979
3979 3980 hgvfs.makedir(notindexed=True)
3980 3981 if b'sharedrepo' not in createopts:
3981 3982 hgvfs.mkdir(b'cache')
3982 3983 hgvfs.mkdir(b'wcache')
3983 3984
3984 3985 has_store = requirementsmod.STORE_REQUIREMENT in requirements
3985 3986 if has_store and b'sharedrepo' not in createopts:
3986 3987 hgvfs.mkdir(b'store')
3987 3988
3988 3989 # We create an invalid changelog outside the store so very old
3989 3990 # Mercurial versions (which didn't know about the requirements
3990 3991 # file) encounter an error on reading the changelog. This
3991 3992 # effectively locks out old clients and prevents them from
3992 3993 # mucking with a repo in an unknown format.
3993 3994 #
3994 3995 # The revlog header has version 65535, which won't be recognized by
3995 3996 # such old clients.
3996 3997 hgvfs.append(
3997 3998 b'00changelog.i',
3998 3999 b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
3999 4000 b'layout',
4000 4001 )
4001 4002
4002 4003 # Filter the requirements into working copy and store ones
4003 4004 wcreq, storereq = scmutil.filterrequirements(requirements)
4004 4005 # write working copy ones
4005 4006 scmutil.writerequires(hgvfs, wcreq)
4006 4007 # If there are store requirements and the current repository
4007 4008 # is not a shared one, write stored requirements
4008 4009 # For new shared repository, we don't need to write the store
4009 4010 # requirements as they are already present in store requires
4010 4011 if storereq and b'sharedrepo' not in createopts:
4011 4012 storevfs = vfsmod.vfs(hgvfs.join(b'store'), cacheaudited=True)
4012 4013 scmutil.writerequires(storevfs, storereq)
4013 4014
4014 4015 # Write out file telling readers where to find the shared store.
4015 4016 if b'sharedrepo' in createopts:
4016 4017 hgvfs.write(b'sharedpath', sharedpath)
4017 4018
4018 4019 if createopts.get(b'shareditems'):
4019 4020 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
4020 4021 hgvfs.write(b'shared', shared)
4021 4022
4022 4023
4023 4024 def poisonrepository(repo):
4024 4025 """Poison a repository instance so it can no longer be used."""
4025 4026 # Perform any cleanup on the instance.
4026 4027 repo.close()
4027 4028
4028 4029 # Our strategy is to replace the type of the object with one that
4029 4030 # has all attribute lookups result in error.
4030 4031 #
4031 4032 # But we have to allow the close() method because some constructors
4032 4033 # of repos call close() on repo references.
4033 4034 class poisonedrepository:
4034 4035 def __getattribute__(self, item):
4035 4036 if item == 'close':
4036 4037 return object.__getattribute__(self, item)
4037 4038
4038 4039 raise error.ProgrammingError(
4039 4040 b'repo instances should not be used after unshare'
4040 4041 )
4041 4042
4042 4043 def close(self):
4043 4044 pass
4044 4045
4045 4046 # We may have a repoview, which intercepts __setattr__. So be sure
4046 4047 # we operate at the lowest level possible.
4047 4048 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,3719 +1,3708 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 return self.delta_config.general_delta
496 496
497 497 @property
498 498 def _checkambig(self):
499 499 """temporary compatibility proxy"""
500 500 return self.data_config.check_ambig
501 501
502 502 @property
503 503 def _mmaplargeindex(self):
504 504 """temporary compatibility proxy"""
505 505 return self.data_config.mmap_large_index
506 506
507 507 @property
508 508 def _censorable(self):
509 509 """temporary compatibility proxy"""
510 510 return self.feature_config.censorable
511 511
512 512 @property
513 513 def _chunkcachesize(self):
514 514 """temporary compatibility proxy"""
515 515 return self.data_config.chunk_cache_size
516 516
517 517 @property
518 518 def _maxchainlen(self):
519 519 """temporary compatibility proxy"""
520 520 return self.delta_config.max_chain_len
521 521
522 522 @property
523 523 def _deltabothparents(self):
524 524 """temporary compatibility proxy"""
525 525 return self.delta_config.delta_both_parents
526 526
527 527 @property
528 528 def _candidate_group_chunk_size(self):
529 529 """temporary compatibility proxy"""
530 530 return self.delta_config.candidate_group_chunk_size
531 531
532 532 @property
533 533 def _debug_delta(self):
534 534 """temporary compatibility proxy"""
535 535 return self.delta_config.debug_delta
536 536
537 537 @property
538 538 def _compengine(self):
539 539 """temporary compatibility proxy"""
540 540 return self.feature_config.compression_engine
541 541
542 542 @property
543 543 def _compengineopts(self):
544 544 """temporary compatibility proxy"""
545 545 return self.feature_config.compression_engine_options
546 546
547 547 @property
548 548 def _maxdeltachainspan(self):
549 549 """temporary compatibility proxy"""
550 550 return self.delta_config.max_deltachain_span
551 551
552 552 @property
553 553 def _withsparseread(self):
554 554 """temporary compatibility proxy"""
555 555 return self.data_config.with_sparse_read
556 556
557 557 @property
558 558 def _sparserevlog(self):
559 559 """temporary compatibility proxy"""
560 560 return self.delta_config.sparse_revlog
561 561
562 562 @property
563 563 def hassidedata(self):
564 564 """temporary compatibility proxy"""
565 565 return self.feature_config.has_side_data
566 566
567 567 @property
568 568 def _srdensitythreshold(self):
569 569 """temporary compatibility proxy"""
570 570 return self.data_config.sr_density_threshold
571 571
572 572 @property
573 573 def _srmingapsize(self):
574 574 """temporary compatibility proxy"""
575 575 return self.data_config.sr_min_gap_size
576 576
577 577 @property
578 578 def _compute_rank(self):
579 579 """temporary compatibility proxy"""
580 580 return self.feature_config.compute_rank
581 581
582 582 @property
583 583 def canonical_parent_order(self):
584 584 """temporary compatibility proxy"""
585 585 return self.feature_config.canonical_parent_order
586 586
587 587 @property
588 588 def _lazydelta(self):
589 589 """temporary compatibility proxy"""
590 590 return self.delta_config.lazy_delta
591 591
592 592 @property
593 593 def _lazydeltabase(self):
594 594 """temporary compatibility proxy"""
595 595 return self.delta_config.lazy_delta_base
596 596
597 597 def _init_opts(self):
598 598 """process options (from above/config) to setup associated default revlog mode
599 599
600 600 These values might be affected when actually reading on disk information.
601 601
602 602 The relevant values are returned for use in _loadindex().
603 603
604 604 * newversionflags:
605 605 version header to use if we need to create a new revlog
606 606
607 607 * mmapindexthreshold:
608 608 minimal index size for start to use mmap
609 609
610 610 * force_nodemap:
611 611 force the usage of a "development" version of the nodemap code
612 612 """
613 613 opts = self.opener.options
614 614
615 615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
616 616 new_header = CHANGELOGV2
617 617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
618 618 self.feature_config.compute_rank = compute_rank
619 619 elif b'revlogv2' in opts:
620 620 new_header = REVLOGV2
621 621 elif b'revlogv1' in opts:
622 622 new_header = REVLOGV1 | FLAG_INLINE_DATA
623 623 if b'generaldelta' in opts:
624 624 new_header |= FLAG_GENERALDELTA
625 625 elif b'revlogv0' in self.opener.options:
626 626 new_header = REVLOGV0
627 627 else:
628 628 new_header = REVLOG_DEFAULT_VERSION
629 629
630 630 mmapindexthreshold = None
631 631 if self._mmaplargeindex:
632 632 mmapindexthreshold = self.data_config.mmap_index_threshold
633 if self.delta_config.sparse_revlog:
634 # sparse-revlog forces sparse-read
635 self.data_config.with_sparse_read = True
636 elif b'with-sparse-read' in opts:
637 self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
638 if b'sparse-read-density-threshold' in opts:
639 self.data_config.sr_density_threshold = opts[
640 b'sparse-read-density-threshold'
641 ]
642 if b'sparse-read-min-gap-size' in opts:
643 self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
644 633 if self.feature_config.enable_ellipsis:
645 634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
646 635
647 636 # revlog v0 doesn't have flag processors
648 637 for flag, processor in opts.get(b'flagprocessors', {}).items():
649 638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
650 639
651 640 chunk_cache_size = self.data_config.chunk_cache_size
652 641 if chunk_cache_size <= 0:
653 642 raise error.RevlogError(
654 643 _(b'revlog chunk cache size %r is not greater than 0')
655 644 % chunk_cache_size
656 645 )
657 646 elif chunk_cache_size & (chunk_cache_size - 1):
658 647 raise error.RevlogError(
659 648 _(b'revlog chunk cache size %r is not a power of 2')
660 649 % chunk_cache_size
661 650 )
662 651 force_nodemap = opts.get(b'devel-force-nodemap', False)
663 652 return new_header, mmapindexthreshold, force_nodemap
664 653
665 654 def _get_data(self, filepath, mmap_threshold, size=None):
666 655 """return a file content with or without mmap
667 656
668 657 If the file is missing return the empty string"""
669 658 try:
670 659 with self.opener(filepath) as fp:
671 660 if mmap_threshold is not None:
672 661 file_size = self.opener.fstat(fp).st_size
673 662 if file_size >= mmap_threshold:
674 663 if size is not None:
675 664 # avoid potentiel mmap crash
676 665 size = min(file_size, size)
677 666 # TODO: should .close() to release resources without
678 667 # relying on Python GC
679 668 if size is None:
680 669 return util.buffer(util.mmapread(fp))
681 670 else:
682 671 return util.buffer(util.mmapread(fp, size))
683 672 if size is None:
684 673 return fp.read()
685 674 else:
686 675 return fp.read(size)
687 676 except FileNotFoundError:
688 677 return b''
689 678
690 679 def get_streams(self, max_linkrev, force_inline=False):
691 680 """return a list of streams that represent this revlog
692 681
693 682 This is used by stream-clone to do bytes to bytes copies of a repository.
694 683
695 684 This streams data for all revisions that refer to a changelog revision up
696 685 to `max_linkrev`.
697 686
698 687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
699 688
700 689 It returns is a list of three-tuple:
701 690
702 691 [
703 692 (filename, bytes_stream, stream_size),
704 693 …
705 694 ]
706 695 """
707 696 n = len(self)
708 697 index = self.index
709 698 while n > 0:
710 699 linkrev = index[n - 1][4]
711 700 if linkrev < max_linkrev:
712 701 break
713 702 # note: this loop will rarely go through multiple iterations, since
714 703 # it only traverses commits created during the current streaming
715 704 # pull operation.
716 705 #
717 706 # If this become a problem, using a binary search should cap the
718 707 # runtime of this.
719 708 n = n - 1
720 709 if n == 0:
721 710 # no data to send
722 711 return []
723 712 index_size = n * index.entry_size
724 713 data_size = self.end(n - 1)
725 714
726 715 # XXX we might have been split (or stripped) since the object
727 716 # initialization, We need to close this race too, but having a way to
728 717 # pre-open the file we feed to the revlog and never closing them before
729 718 # we are done streaming.
730 719
731 720 if self._inline:
732 721
733 722 def get_stream():
734 723 with self._indexfp() as fp:
735 724 yield None
736 725 size = index_size + data_size
737 726 if size <= 65536:
738 727 yield fp.read(size)
739 728 else:
740 729 yield from util.filechunkiter(fp, limit=size)
741 730
742 731 inline_stream = get_stream()
743 732 next(inline_stream)
744 733 return [
745 734 (self._indexfile, inline_stream, index_size + data_size),
746 735 ]
747 736 elif force_inline:
748 737
749 738 def get_stream():
750 739 with self.reading():
751 740 yield None
752 741
753 742 for rev in range(n):
754 743 idx = self.index.entry_binary(rev)
755 744 if rev == 0 and self._docket is None:
756 745 # re-inject the inline flag
757 746 header = self._format_flags
758 747 header |= self._format_version
759 748 header |= FLAG_INLINE_DATA
760 749 header = self.index.pack_header(header)
761 750 idx = header + idx
762 751 yield idx
763 752 yield self._getsegmentforrevs(rev, rev)[1]
764 753
765 754 inline_stream = get_stream()
766 755 next(inline_stream)
767 756 return [
768 757 (self._indexfile, inline_stream, index_size + data_size),
769 758 ]
770 759 else:
771 760
772 761 def get_index_stream():
773 762 with self._indexfp() as fp:
774 763 yield None
775 764 if index_size <= 65536:
776 765 yield fp.read(index_size)
777 766 else:
778 767 yield from util.filechunkiter(fp, limit=index_size)
779 768
780 769 def get_data_stream():
781 770 with self._datafp() as fp:
782 771 yield None
783 772 if data_size <= 65536:
784 773 yield fp.read(data_size)
785 774 else:
786 775 yield from util.filechunkiter(fp, limit=data_size)
787 776
788 777 index_stream = get_index_stream()
789 778 next(index_stream)
790 779 data_stream = get_data_stream()
791 780 next(data_stream)
792 781 return [
793 782 (self._datafile, data_stream, data_size),
794 783 (self._indexfile, index_stream, index_size),
795 784 ]
796 785
797 786 def _loadindex(self, docket=None):
798 787
799 788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
800 789
801 790 if self.postfix is not None:
802 791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
803 792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
804 793 entry_point = b'%s.i.a' % self.radix
805 794 elif self._try_split and self.opener.exists(self._split_index_file):
806 795 entry_point = self._split_index_file
807 796 else:
808 797 entry_point = b'%s.i' % self.radix
809 798
810 799 if docket is not None:
811 800 self._docket = docket
812 801 self._docket_file = entry_point
813 802 else:
814 803 self._initempty = True
815 804 entry_data = self._get_data(entry_point, mmapindexthreshold)
816 805 if len(entry_data) > 0:
817 806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
818 807 self._initempty = False
819 808 else:
820 809 header = new_header
821 810
822 811 self._format_flags = header & ~0xFFFF
823 812 self._format_version = header & 0xFFFF
824 813
825 814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
826 815 if supported_flags is None:
827 816 msg = _(b'unknown version (%d) in revlog %s')
828 817 msg %= (self._format_version, self.display_id)
829 818 raise error.RevlogError(msg)
830 819 elif self._format_flags & ~supported_flags:
831 820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
832 821 display_flag = self._format_flags >> 16
833 822 msg %= (display_flag, self._format_version, self.display_id)
834 823 raise error.RevlogError(msg)
835 824
836 825 features = FEATURES_BY_VERSION[self._format_version]
837 826 self._inline = features[b'inline'](self._format_flags)
838 827 self.delta_config.general_delta = features[b'generaldelta'](
839 828 self._format_flags
840 829 )
841 830 self.feature_config.has_side_data = features[b'sidedata']
842 831
843 832 if not features[b'docket']:
844 833 self._indexfile = entry_point
845 834 index_data = entry_data
846 835 else:
847 836 self._docket_file = entry_point
848 837 if self._initempty:
849 838 self._docket = docketutil.default_docket(self, header)
850 839 else:
851 840 self._docket = docketutil.parse_docket(
852 841 self, entry_data, use_pending=self._trypending
853 842 )
854 843
855 844 if self._docket is not None:
856 845 self._indexfile = self._docket.index_filepath()
857 846 index_data = b''
858 847 index_size = self._docket.index_end
859 848 if index_size > 0:
860 849 index_data = self._get_data(
861 850 self._indexfile, mmapindexthreshold, size=index_size
862 851 )
863 852 if len(index_data) < index_size:
864 853 msg = _(b'too few index data for %s: got %d, expected %d')
865 854 msg %= (self.display_id, len(index_data), index_size)
866 855 raise error.RevlogError(msg)
867 856
868 857 self._inline = False
869 858 # generaldelta implied by version 2 revlogs.
870 859 self.delta_config.general_delta = True
871 860 # the logic for persistent nodemap will be dealt with within the
872 861 # main docket, so disable it for now.
873 862 self._nodemap_file = None
874 863
875 864 if self._docket is not None:
876 865 self._datafile = self._docket.data_filepath()
877 866 self._sidedatafile = self._docket.sidedata_filepath()
878 867 elif self.postfix is None:
879 868 self._datafile = b'%s.d' % self.radix
880 869 else:
881 870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
882 871
883 872 self.nodeconstants = sha1nodeconstants
884 873 self.nullid = self.nodeconstants.nullid
885 874
886 875 # sparse-revlog can't be on without general-delta (issue6056)
887 876 if not self._generaldelta:
888 877 self.delta_config.sparse_revlog = False
889 878
890 879 self._storedeltachains = True
891 880
892 881 devel_nodemap = (
893 882 self._nodemap_file
894 883 and force_nodemap
895 884 and parse_index_v1_nodemap is not None
896 885 )
897 886
898 887 use_rust_index = False
899 888 if rustrevlog is not None:
900 889 if self._nodemap_file is not None:
901 890 use_rust_index = True
902 891 else:
903 892 use_rust_index = self.opener.options.get(b'rust.index')
904 893
905 894 self._parse_index = parse_index_v1
906 895 if self._format_version == REVLOGV0:
907 896 self._parse_index = revlogv0.parse_index_v0
908 897 elif self._format_version == REVLOGV2:
909 898 self._parse_index = parse_index_v2
910 899 elif self._format_version == CHANGELOGV2:
911 900 self._parse_index = parse_index_cl_v2
912 901 elif devel_nodemap:
913 902 self._parse_index = parse_index_v1_nodemap
914 903 elif use_rust_index:
915 904 self._parse_index = parse_index_v1_mixed
916 905 try:
917 906 d = self._parse_index(index_data, self._inline)
918 907 index, chunkcache = d
919 908 use_nodemap = (
920 909 not self._inline
921 910 and self._nodemap_file is not None
922 911 and hasattr(index, 'update_nodemap_data')
923 912 )
924 913 if use_nodemap:
925 914 nodemap_data = nodemaputil.persisted_data(self)
926 915 if nodemap_data is not None:
927 916 docket = nodemap_data[0]
928 917 if (
929 918 len(d[0]) > docket.tip_rev
930 919 and d[0][docket.tip_rev][7] == docket.tip_node
931 920 ):
932 921 # no changelog tampering
933 922 self._nodemap_docket = docket
934 923 index.update_nodemap_data(*nodemap_data)
935 924 except (ValueError, IndexError):
936 925 raise error.RevlogError(
937 926 _(b"index %s is corrupted") % self.display_id
938 927 )
939 928 self.index = index
940 929 self._segmentfile = randomaccessfile.randomaccessfile(
941 930 self.opener,
942 931 (self._indexfile if self._inline else self._datafile),
943 932 self._chunkcachesize,
944 933 chunkcache,
945 934 )
946 935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
947 936 self.opener,
948 937 self._sidedatafile,
949 938 self._chunkcachesize,
950 939 )
951 940 # revnum -> (chain-length, sum-delta-length)
952 941 self._chaininfocache = util.lrucachedict(500)
953 942 # revlog header -> revlog compressor
954 943 self._decompressors = {}
955 944
956 945 def get_revlog(self):
957 946 """simple function to mirror API of other not-really-revlog API"""
958 947 return self
959 948
960 949 @util.propertycache
961 950 def revlog_kind(self):
962 951 return self.target[0]
963 952
964 953 @util.propertycache
965 954 def display_id(self):
966 955 """The public facing "ID" of the revlog that we use in message"""
967 956 if self.revlog_kind == KIND_FILELOG:
968 957 # Reference the file without the "data/" prefix, so it is familiar
969 958 # to the user.
970 959 return self.target[1]
971 960 else:
972 961 return self.radix
973 962
974 963 def _get_decompressor(self, t):
975 964 try:
976 965 compressor = self._decompressors[t]
977 966 except KeyError:
978 967 try:
979 968 engine = util.compengines.forrevlogheader(t)
980 969 compressor = engine.revlogcompressor(self._compengineopts)
981 970 self._decompressors[t] = compressor
982 971 except KeyError:
983 972 raise error.RevlogError(
984 973 _(b'unknown compression type %s') % binascii.hexlify(t)
985 974 )
986 975 return compressor
987 976
988 977 @util.propertycache
989 978 def _compressor(self):
990 979 engine = util.compengines[self._compengine]
991 980 return engine.revlogcompressor(self._compengineopts)
992 981
993 982 @util.propertycache
994 983 def _decompressor(self):
995 984 """the default decompressor"""
996 985 if self._docket is None:
997 986 return None
998 987 t = self._docket.default_compression_header
999 988 c = self._get_decompressor(t)
1000 989 return c.decompress
1001 990
1002 991 def _indexfp(self):
1003 992 """file object for the revlog's index file"""
1004 993 return self.opener(self._indexfile, mode=b"r")
1005 994
1006 995 def __index_write_fp(self):
1007 996 # You should not use this directly and use `_writing` instead
1008 997 try:
1009 998 f = self.opener(
1010 999 self._indexfile, mode=b"r+", checkambig=self._checkambig
1011 1000 )
1012 1001 if self._docket is None:
1013 1002 f.seek(0, os.SEEK_END)
1014 1003 else:
1015 1004 f.seek(self._docket.index_end, os.SEEK_SET)
1016 1005 return f
1017 1006 except FileNotFoundError:
1018 1007 return self.opener(
1019 1008 self._indexfile, mode=b"w+", checkambig=self._checkambig
1020 1009 )
1021 1010
1022 1011 def __index_new_fp(self):
1023 1012 # You should not use this unless you are upgrading from inline revlog
1024 1013 return self.opener(
1025 1014 self._indexfile,
1026 1015 mode=b"w",
1027 1016 checkambig=self._checkambig,
1028 1017 atomictemp=True,
1029 1018 )
1030 1019
1031 1020 def _datafp(self, mode=b'r'):
1032 1021 """file object for the revlog's data file"""
1033 1022 return self.opener(self._datafile, mode=mode)
1034 1023
1035 1024 @contextlib.contextmanager
1036 1025 def _sidedatareadfp(self):
1037 1026 """file object suitable to read sidedata"""
1038 1027 if self._writinghandles:
1039 1028 yield self._writinghandles[2]
1040 1029 else:
1041 1030 with self.opener(self._sidedatafile) as fp:
1042 1031 yield fp
1043 1032
1044 1033 def tiprev(self):
1045 1034 return len(self.index) - 1
1046 1035
1047 1036 def tip(self):
1048 1037 return self.node(self.tiprev())
1049 1038
1050 1039 def __contains__(self, rev):
1051 1040 return 0 <= rev < len(self)
1052 1041
1053 1042 def __len__(self):
1054 1043 return len(self.index)
1055 1044
1056 1045 def __iter__(self):
1057 1046 return iter(range(len(self)))
1058 1047
1059 1048 def revs(self, start=0, stop=None):
1060 1049 """iterate over all rev in this revlog (from start to stop)"""
1061 1050 return storageutil.iterrevs(len(self), start=start, stop=stop)
1062 1051
1063 1052 def hasnode(self, node):
1064 1053 try:
1065 1054 self.rev(node)
1066 1055 return True
1067 1056 except KeyError:
1068 1057 return False
1069 1058
1070 1059 def _candelta(self, baserev, rev):
1071 1060 """whether two revisions (baserev, rev) can be delta-ed or not"""
1072 1061 # Disable delta if either rev requires a content-changing flag
1073 1062 # processor (ex. LFS). This is because such flag processor can alter
1074 1063 # the rawtext content that the delta will be based on, and two clients
1075 1064 # could have a same revlog node with different flags (i.e. different
1076 1065 # rawtext contents) and the delta could be incompatible.
1077 1066 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1078 1067 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1079 1068 ):
1080 1069 return False
1081 1070 return True
1082 1071
1083 1072 def update_caches(self, transaction):
1084 1073 """update on disk cache
1085 1074
1086 1075 If a transaction is passed, the update may be delayed to transaction
1087 1076 commit."""
1088 1077 if self._nodemap_file is not None:
1089 1078 if transaction is None:
1090 1079 nodemaputil.update_persistent_nodemap(self)
1091 1080 else:
1092 1081 nodemaputil.setup_persistent_nodemap(transaction, self)
1093 1082
1094 1083 def clearcaches(self):
1095 1084 """Clear in-memory caches"""
1096 1085 self._revisioncache = None
1097 1086 self._chainbasecache.clear()
1098 1087 self._segmentfile.clear_cache()
1099 1088 self._segmentfile_sidedata.clear_cache()
1100 1089 self._pcache = {}
1101 1090 self._nodemap_docket = None
1102 1091 self.index.clearcaches()
1103 1092 # The python code is the one responsible for validating the docket, we
1104 1093 # end up having to refresh it here.
1105 1094 use_nodemap = (
1106 1095 not self._inline
1107 1096 and self._nodemap_file is not None
1108 1097 and hasattr(self.index, 'update_nodemap_data')
1109 1098 )
1110 1099 if use_nodemap:
1111 1100 nodemap_data = nodemaputil.persisted_data(self)
1112 1101 if nodemap_data is not None:
1113 1102 self._nodemap_docket = nodemap_data[0]
1114 1103 self.index.update_nodemap_data(*nodemap_data)
1115 1104
1116 1105 def rev(self, node):
1117 1106 """return the revision number associated with a <nodeid>"""
1118 1107 try:
1119 1108 return self.index.rev(node)
1120 1109 except TypeError:
1121 1110 raise
1122 1111 except error.RevlogError:
1123 1112 # parsers.c radix tree lookup failed
1124 1113 if (
1125 1114 node == self.nodeconstants.wdirid
1126 1115 or node in self.nodeconstants.wdirfilenodeids
1127 1116 ):
1128 1117 raise error.WdirUnsupported
1129 1118 raise error.LookupError(node, self.display_id, _(b'no node'))
1130 1119
1131 1120 # Accessors for index entries.
1132 1121
1133 1122 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1134 1123 # are flags.
1135 1124 def start(self, rev):
1136 1125 return int(self.index[rev][0] >> 16)
1137 1126
1138 1127 def sidedata_cut_off(self, rev):
1139 1128 sd_cut_off = self.index[rev][8]
1140 1129 if sd_cut_off != 0:
1141 1130 return sd_cut_off
1142 1131 # This is some annoying dance, because entries without sidedata
1143 1132 # currently use 0 as their ofsset. (instead of previous-offset +
1144 1133 # previous-size)
1145 1134 #
1146 1135 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1147 1136 # In the meantime, we need this.
1148 1137 while 0 <= rev:
1149 1138 e = self.index[rev]
1150 1139 if e[9] != 0:
1151 1140 return e[8] + e[9]
1152 1141 rev -= 1
1153 1142 return 0
1154 1143
1155 1144 def flags(self, rev):
1156 1145 return self.index[rev][0] & 0xFFFF
1157 1146
1158 1147 def length(self, rev):
1159 1148 return self.index[rev][1]
1160 1149
1161 1150 def sidedata_length(self, rev):
1162 1151 if not self.hassidedata:
1163 1152 return 0
1164 1153 return self.index[rev][9]
1165 1154
1166 1155 def rawsize(self, rev):
1167 1156 """return the length of the uncompressed text for a given revision"""
1168 1157 l = self.index[rev][2]
1169 1158 if l >= 0:
1170 1159 return l
1171 1160
1172 1161 t = self.rawdata(rev)
1173 1162 return len(t)
1174 1163
1175 1164 def size(self, rev):
1176 1165 """length of non-raw text (processed by a "read" flag processor)"""
1177 1166 # fast path: if no "read" flag processor could change the content,
1178 1167 # size is rawsize. note: ELLIPSIS is known to not change the content.
1179 1168 flags = self.flags(rev)
1180 1169 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1181 1170 return self.rawsize(rev)
1182 1171
1183 1172 return len(self.revision(rev))
1184 1173
1185 1174 def fast_rank(self, rev):
1186 1175 """Return the rank of a revision if already known, or None otherwise.
1187 1176
1188 1177 The rank of a revision is the size of the sub-graph it defines as a
1189 1178 head. Equivalently, the rank of a revision `r` is the size of the set
1190 1179 `ancestors(r)`, `r` included.
1191 1180
1192 1181 This method returns the rank retrieved from the revlog in constant
1193 1182 time. It makes no attempt at computing unknown values for versions of
1194 1183 the revlog which do not persist the rank.
1195 1184 """
1196 1185 rank = self.index[rev][ENTRY_RANK]
1197 1186 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1198 1187 return None
1199 1188 if rev == nullrev:
1200 1189 return 0 # convention
1201 1190 return rank
1202 1191
1203 1192 def chainbase(self, rev):
1204 1193 base = self._chainbasecache.get(rev)
1205 1194 if base is not None:
1206 1195 return base
1207 1196
1208 1197 index = self.index
1209 1198 iterrev = rev
1210 1199 base = index[iterrev][3]
1211 1200 while base != iterrev:
1212 1201 iterrev = base
1213 1202 base = index[iterrev][3]
1214 1203
1215 1204 self._chainbasecache[rev] = base
1216 1205 return base
1217 1206
1218 1207 def linkrev(self, rev):
1219 1208 return self.index[rev][4]
1220 1209
1221 1210 def parentrevs(self, rev):
1222 1211 try:
1223 1212 entry = self.index[rev]
1224 1213 except IndexError:
1225 1214 if rev == wdirrev:
1226 1215 raise error.WdirUnsupported
1227 1216 raise
1228 1217
1229 1218 if self.canonical_parent_order and entry[5] == nullrev:
1230 1219 return entry[6], entry[5]
1231 1220 else:
1232 1221 return entry[5], entry[6]
1233 1222
1234 1223 # fast parentrevs(rev) where rev isn't filtered
1235 1224 _uncheckedparentrevs = parentrevs
1236 1225
1237 1226 def node(self, rev):
1238 1227 try:
1239 1228 return self.index[rev][7]
1240 1229 except IndexError:
1241 1230 if rev == wdirrev:
1242 1231 raise error.WdirUnsupported
1243 1232 raise
1244 1233
1245 1234 # Derived from index values.
1246 1235
1247 1236 def end(self, rev):
1248 1237 return self.start(rev) + self.length(rev)
1249 1238
1250 1239 def parents(self, node):
1251 1240 i = self.index
1252 1241 d = i[self.rev(node)]
1253 1242 # inline node() to avoid function call overhead
1254 1243 if self.canonical_parent_order and d[5] == self.nullid:
1255 1244 return i[d[6]][7], i[d[5]][7]
1256 1245 else:
1257 1246 return i[d[5]][7], i[d[6]][7]
1258 1247
1259 1248 def chainlen(self, rev):
1260 1249 return self._chaininfo(rev)[0]
1261 1250
1262 1251 def _chaininfo(self, rev):
1263 1252 chaininfocache = self._chaininfocache
1264 1253 if rev in chaininfocache:
1265 1254 return chaininfocache[rev]
1266 1255 index = self.index
1267 1256 generaldelta = self._generaldelta
1268 1257 iterrev = rev
1269 1258 e = index[iterrev]
1270 1259 clen = 0
1271 1260 compresseddeltalen = 0
1272 1261 while iterrev != e[3]:
1273 1262 clen += 1
1274 1263 compresseddeltalen += e[1]
1275 1264 if generaldelta:
1276 1265 iterrev = e[3]
1277 1266 else:
1278 1267 iterrev -= 1
1279 1268 if iterrev in chaininfocache:
1280 1269 t = chaininfocache[iterrev]
1281 1270 clen += t[0]
1282 1271 compresseddeltalen += t[1]
1283 1272 break
1284 1273 e = index[iterrev]
1285 1274 else:
1286 1275 # Add text length of base since decompressing that also takes
1287 1276 # work. For cache hits the length is already included.
1288 1277 compresseddeltalen += e[1]
1289 1278 r = (clen, compresseddeltalen)
1290 1279 chaininfocache[rev] = r
1291 1280 return r
1292 1281
1293 1282 def _deltachain(self, rev, stoprev=None):
1294 1283 """Obtain the delta chain for a revision.
1295 1284
1296 1285 ``stoprev`` specifies a revision to stop at. If not specified, we
1297 1286 stop at the base of the chain.
1298 1287
1299 1288 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1300 1289 revs in ascending order and ``stopped`` is a bool indicating whether
1301 1290 ``stoprev`` was hit.
1302 1291 """
1303 1292 # Try C implementation.
1304 1293 try:
1305 1294 return self.index.deltachain(rev, stoprev, self._generaldelta)
1306 1295 except AttributeError:
1307 1296 pass
1308 1297
1309 1298 chain = []
1310 1299
1311 1300 # Alias to prevent attribute lookup in tight loop.
1312 1301 index = self.index
1313 1302 generaldelta = self._generaldelta
1314 1303
1315 1304 iterrev = rev
1316 1305 e = index[iterrev]
1317 1306 while iterrev != e[3] and iterrev != stoprev:
1318 1307 chain.append(iterrev)
1319 1308 if generaldelta:
1320 1309 iterrev = e[3]
1321 1310 else:
1322 1311 iterrev -= 1
1323 1312 e = index[iterrev]
1324 1313
1325 1314 if iterrev == stoprev:
1326 1315 stopped = True
1327 1316 else:
1328 1317 chain.append(iterrev)
1329 1318 stopped = False
1330 1319
1331 1320 chain.reverse()
1332 1321 return chain, stopped
1333 1322
1334 1323 def ancestors(self, revs, stoprev=0, inclusive=False):
1335 1324 """Generate the ancestors of 'revs' in reverse revision order.
1336 1325 Does not generate revs lower than stoprev.
1337 1326
1338 1327 See the documentation for ancestor.lazyancestors for more details."""
1339 1328
1340 1329 # first, make sure start revisions aren't filtered
1341 1330 revs = list(revs)
1342 1331 checkrev = self.node
1343 1332 for r in revs:
1344 1333 checkrev(r)
1345 1334 # and we're sure ancestors aren't filtered as well
1346 1335
1347 1336 if rustancestor is not None and self.index.rust_ext_compat:
1348 1337 lazyancestors = rustancestor.LazyAncestors
1349 1338 arg = self.index
1350 1339 else:
1351 1340 lazyancestors = ancestor.lazyancestors
1352 1341 arg = self._uncheckedparentrevs
1353 1342 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1354 1343
1355 1344 def descendants(self, revs):
1356 1345 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1357 1346
1358 1347 def findcommonmissing(self, common=None, heads=None):
1359 1348 """Return a tuple of the ancestors of common and the ancestors of heads
1360 1349 that are not ancestors of common. In revset terminology, we return the
1361 1350 tuple:
1362 1351
1363 1352 ::common, (::heads) - (::common)
1364 1353
1365 1354 The list is sorted by revision number, meaning it is
1366 1355 topologically sorted.
1367 1356
1368 1357 'heads' and 'common' are both lists of node IDs. If heads is
1369 1358 not supplied, uses all of the revlog's heads. If common is not
1370 1359 supplied, uses nullid."""
1371 1360 if common is None:
1372 1361 common = [self.nullid]
1373 1362 if heads is None:
1374 1363 heads = self.heads()
1375 1364
1376 1365 common = [self.rev(n) for n in common]
1377 1366 heads = [self.rev(n) for n in heads]
1378 1367
1379 1368 # we want the ancestors, but inclusive
1380 1369 class lazyset:
1381 1370 def __init__(self, lazyvalues):
1382 1371 self.addedvalues = set()
1383 1372 self.lazyvalues = lazyvalues
1384 1373
1385 1374 def __contains__(self, value):
1386 1375 return value in self.addedvalues or value in self.lazyvalues
1387 1376
1388 1377 def __iter__(self):
1389 1378 added = self.addedvalues
1390 1379 for r in added:
1391 1380 yield r
1392 1381 for r in self.lazyvalues:
1393 1382 if not r in added:
1394 1383 yield r
1395 1384
1396 1385 def add(self, value):
1397 1386 self.addedvalues.add(value)
1398 1387
1399 1388 def update(self, values):
1400 1389 self.addedvalues.update(values)
1401 1390
1402 1391 has = lazyset(self.ancestors(common))
1403 1392 has.add(nullrev)
1404 1393 has.update(common)
1405 1394
1406 1395 # take all ancestors from heads that aren't in has
1407 1396 missing = set()
1408 1397 visit = collections.deque(r for r in heads if r not in has)
1409 1398 while visit:
1410 1399 r = visit.popleft()
1411 1400 if r in missing:
1412 1401 continue
1413 1402 else:
1414 1403 missing.add(r)
1415 1404 for p in self.parentrevs(r):
1416 1405 if p not in has:
1417 1406 visit.append(p)
1418 1407 missing = list(missing)
1419 1408 missing.sort()
1420 1409 return has, [self.node(miss) for miss in missing]
1421 1410
1422 1411 def incrementalmissingrevs(self, common=None):
1423 1412 """Return an object that can be used to incrementally compute the
1424 1413 revision numbers of the ancestors of arbitrary sets that are not
1425 1414 ancestors of common. This is an ancestor.incrementalmissingancestors
1426 1415 object.
1427 1416
1428 1417 'common' is a list of revision numbers. If common is not supplied, uses
1429 1418 nullrev.
1430 1419 """
1431 1420 if common is None:
1432 1421 common = [nullrev]
1433 1422
1434 1423 if rustancestor is not None and self.index.rust_ext_compat:
1435 1424 return rustancestor.MissingAncestors(self.index, common)
1436 1425 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1437 1426
1438 1427 def findmissingrevs(self, common=None, heads=None):
1439 1428 """Return the revision numbers of the ancestors of heads that
1440 1429 are not ancestors of common.
1441 1430
1442 1431 More specifically, return a list of revision numbers corresponding to
1443 1432 nodes N such that every N satisfies the following constraints:
1444 1433
1445 1434 1. N is an ancestor of some node in 'heads'
1446 1435 2. N is not an ancestor of any node in 'common'
1447 1436
1448 1437 The list is sorted by revision number, meaning it is
1449 1438 topologically sorted.
1450 1439
1451 1440 'heads' and 'common' are both lists of revision numbers. If heads is
1452 1441 not supplied, uses all of the revlog's heads. If common is not
1453 1442 supplied, uses nullid."""
1454 1443 if common is None:
1455 1444 common = [nullrev]
1456 1445 if heads is None:
1457 1446 heads = self.headrevs()
1458 1447
1459 1448 inc = self.incrementalmissingrevs(common=common)
1460 1449 return inc.missingancestors(heads)
1461 1450
1462 1451 def findmissing(self, common=None, heads=None):
1463 1452 """Return the ancestors of heads that are not ancestors of common.
1464 1453
1465 1454 More specifically, return a list of nodes N such that every N
1466 1455 satisfies the following constraints:
1467 1456
1468 1457 1. N is an ancestor of some node in 'heads'
1469 1458 2. N is not an ancestor of any node in 'common'
1470 1459
1471 1460 The list is sorted by revision number, meaning it is
1472 1461 topologically sorted.
1473 1462
1474 1463 'heads' and 'common' are both lists of node IDs. If heads is
1475 1464 not supplied, uses all of the revlog's heads. If common is not
1476 1465 supplied, uses nullid."""
1477 1466 if common is None:
1478 1467 common = [self.nullid]
1479 1468 if heads is None:
1480 1469 heads = self.heads()
1481 1470
1482 1471 common = [self.rev(n) for n in common]
1483 1472 heads = [self.rev(n) for n in heads]
1484 1473
1485 1474 inc = self.incrementalmissingrevs(common=common)
1486 1475 return [self.node(r) for r in inc.missingancestors(heads)]
1487 1476
1488 1477 def nodesbetween(self, roots=None, heads=None):
1489 1478 """Return a topological path from 'roots' to 'heads'.
1490 1479
1491 1480 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1492 1481 topologically sorted list of all nodes N that satisfy both of
1493 1482 these constraints:
1494 1483
1495 1484 1. N is a descendant of some node in 'roots'
1496 1485 2. N is an ancestor of some node in 'heads'
1497 1486
1498 1487 Every node is considered to be both a descendant and an ancestor
1499 1488 of itself, so every reachable node in 'roots' and 'heads' will be
1500 1489 included in 'nodes'.
1501 1490
1502 1491 'outroots' is the list of reachable nodes in 'roots', i.e., the
1503 1492 subset of 'roots' that is returned in 'nodes'. Likewise,
1504 1493 'outheads' is the subset of 'heads' that is also in 'nodes'.
1505 1494
1506 1495 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1507 1496 unspecified, uses nullid as the only root. If 'heads' is
1508 1497 unspecified, uses list of all of the revlog's heads."""
1509 1498 nonodes = ([], [], [])
1510 1499 if roots is not None:
1511 1500 roots = list(roots)
1512 1501 if not roots:
1513 1502 return nonodes
1514 1503 lowestrev = min([self.rev(n) for n in roots])
1515 1504 else:
1516 1505 roots = [self.nullid] # Everybody's a descendant of nullid
1517 1506 lowestrev = nullrev
1518 1507 if (lowestrev == nullrev) and (heads is None):
1519 1508 # We want _all_ the nodes!
1520 1509 return (
1521 1510 [self.node(r) for r in self],
1522 1511 [self.nullid],
1523 1512 list(self.heads()),
1524 1513 )
1525 1514 if heads is None:
1526 1515 # All nodes are ancestors, so the latest ancestor is the last
1527 1516 # node.
1528 1517 highestrev = len(self) - 1
1529 1518 # Set ancestors to None to signal that every node is an ancestor.
1530 1519 ancestors = None
1531 1520 # Set heads to an empty dictionary for later discovery of heads
1532 1521 heads = {}
1533 1522 else:
1534 1523 heads = list(heads)
1535 1524 if not heads:
1536 1525 return nonodes
1537 1526 ancestors = set()
1538 1527 # Turn heads into a dictionary so we can remove 'fake' heads.
1539 1528 # Also, later we will be using it to filter out the heads we can't
1540 1529 # find from roots.
1541 1530 heads = dict.fromkeys(heads, False)
1542 1531 # Start at the top and keep marking parents until we're done.
1543 1532 nodestotag = set(heads)
1544 1533 # Remember where the top was so we can use it as a limit later.
1545 1534 highestrev = max([self.rev(n) for n in nodestotag])
1546 1535 while nodestotag:
1547 1536 # grab a node to tag
1548 1537 n = nodestotag.pop()
1549 1538 # Never tag nullid
1550 1539 if n == self.nullid:
1551 1540 continue
1552 1541 # A node's revision number represents its place in a
1553 1542 # topologically sorted list of nodes.
1554 1543 r = self.rev(n)
1555 1544 if r >= lowestrev:
1556 1545 if n not in ancestors:
1557 1546 # If we are possibly a descendant of one of the roots
1558 1547 # and we haven't already been marked as an ancestor
1559 1548 ancestors.add(n) # Mark as ancestor
1560 1549 # Add non-nullid parents to list of nodes to tag.
1561 1550 nodestotag.update(
1562 1551 [p for p in self.parents(n) if p != self.nullid]
1563 1552 )
1564 1553 elif n in heads: # We've seen it before, is it a fake head?
1565 1554 # So it is, real heads should not be the ancestors of
1566 1555 # any other heads.
1567 1556 heads.pop(n)
1568 1557 if not ancestors:
1569 1558 return nonodes
1570 1559 # Now that we have our set of ancestors, we want to remove any
1571 1560 # roots that are not ancestors.
1572 1561
1573 1562 # If one of the roots was nullid, everything is included anyway.
1574 1563 if lowestrev > nullrev:
1575 1564 # But, since we weren't, let's recompute the lowest rev to not
1576 1565 # include roots that aren't ancestors.
1577 1566
1578 1567 # Filter out roots that aren't ancestors of heads
1579 1568 roots = [root for root in roots if root in ancestors]
1580 1569 # Recompute the lowest revision
1581 1570 if roots:
1582 1571 lowestrev = min([self.rev(root) for root in roots])
1583 1572 else:
1584 1573 # No more roots? Return empty list
1585 1574 return nonodes
1586 1575 else:
1587 1576 # We are descending from nullid, and don't need to care about
1588 1577 # any other roots.
1589 1578 lowestrev = nullrev
1590 1579 roots = [self.nullid]
1591 1580 # Transform our roots list into a set.
1592 1581 descendants = set(roots)
1593 1582 # Also, keep the original roots so we can filter out roots that aren't
1594 1583 # 'real' roots (i.e. are descended from other roots).
1595 1584 roots = descendants.copy()
1596 1585 # Our topologically sorted list of output nodes.
1597 1586 orderedout = []
1598 1587 # Don't start at nullid since we don't want nullid in our output list,
1599 1588 # and if nullid shows up in descendants, empty parents will look like
1600 1589 # they're descendants.
1601 1590 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1602 1591 n = self.node(r)
1603 1592 isdescendant = False
1604 1593 if lowestrev == nullrev: # Everybody is a descendant of nullid
1605 1594 isdescendant = True
1606 1595 elif n in descendants:
1607 1596 # n is already a descendant
1608 1597 isdescendant = True
1609 1598 # This check only needs to be done here because all the roots
1610 1599 # will start being marked is descendants before the loop.
1611 1600 if n in roots:
1612 1601 # If n was a root, check if it's a 'real' root.
1613 1602 p = tuple(self.parents(n))
1614 1603 # If any of its parents are descendants, it's not a root.
1615 1604 if (p[0] in descendants) or (p[1] in descendants):
1616 1605 roots.remove(n)
1617 1606 else:
1618 1607 p = tuple(self.parents(n))
1619 1608 # A node is a descendant if either of its parents are
1620 1609 # descendants. (We seeded the dependents list with the roots
1621 1610 # up there, remember?)
1622 1611 if (p[0] in descendants) or (p[1] in descendants):
1623 1612 descendants.add(n)
1624 1613 isdescendant = True
1625 1614 if isdescendant and ((ancestors is None) or (n in ancestors)):
1626 1615 # Only include nodes that are both descendants and ancestors.
1627 1616 orderedout.append(n)
1628 1617 if (ancestors is not None) and (n in heads):
1629 1618 # We're trying to figure out which heads are reachable
1630 1619 # from roots.
1631 1620 # Mark this head as having been reached
1632 1621 heads[n] = True
1633 1622 elif ancestors is None:
1634 1623 # Otherwise, we're trying to discover the heads.
1635 1624 # Assume this is a head because if it isn't, the next step
1636 1625 # will eventually remove it.
1637 1626 heads[n] = True
1638 1627 # But, obviously its parents aren't.
1639 1628 for p in self.parents(n):
1640 1629 heads.pop(p, None)
1641 1630 heads = [head for head, flag in heads.items() if flag]
1642 1631 roots = list(roots)
1643 1632 assert orderedout
1644 1633 assert roots
1645 1634 assert heads
1646 1635 return (orderedout, roots, heads)
1647 1636
1648 1637 def headrevs(self, revs=None):
1649 1638 if revs is None:
1650 1639 try:
1651 1640 return self.index.headrevs()
1652 1641 except AttributeError:
1653 1642 return self._headrevs()
1654 1643 if rustdagop is not None and self.index.rust_ext_compat:
1655 1644 return rustdagop.headrevs(self.index, revs)
1656 1645 return dagop.headrevs(revs, self._uncheckedparentrevs)
1657 1646
1658 1647 def computephases(self, roots):
1659 1648 return self.index.computephasesmapsets(roots)
1660 1649
1661 1650 def _headrevs(self):
1662 1651 count = len(self)
1663 1652 if not count:
1664 1653 return [nullrev]
1665 1654 # we won't iter over filtered rev so nobody is a head at start
1666 1655 ishead = [0] * (count + 1)
1667 1656 index = self.index
1668 1657 for r in self:
1669 1658 ishead[r] = 1 # I may be an head
1670 1659 e = index[r]
1671 1660 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1672 1661 return [r for r, val in enumerate(ishead) if val]
1673 1662
1674 1663 def heads(self, start=None, stop=None):
1675 1664 """return the list of all nodes that have no children
1676 1665
1677 1666 if start is specified, only heads that are descendants of
1678 1667 start will be returned
1679 1668 if stop is specified, it will consider all the revs from stop
1680 1669 as if they had no children
1681 1670 """
1682 1671 if start is None and stop is None:
1683 1672 if not len(self):
1684 1673 return [self.nullid]
1685 1674 return [self.node(r) for r in self.headrevs()]
1686 1675
1687 1676 if start is None:
1688 1677 start = nullrev
1689 1678 else:
1690 1679 start = self.rev(start)
1691 1680
1692 1681 stoprevs = {self.rev(n) for n in stop or []}
1693 1682
1694 1683 revs = dagop.headrevssubset(
1695 1684 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1696 1685 )
1697 1686
1698 1687 return [self.node(rev) for rev in revs]
1699 1688
1700 1689 def children(self, node):
1701 1690 """find the children of a given node"""
1702 1691 c = []
1703 1692 p = self.rev(node)
1704 1693 for r in self.revs(start=p + 1):
1705 1694 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1706 1695 if prevs:
1707 1696 for pr in prevs:
1708 1697 if pr == p:
1709 1698 c.append(self.node(r))
1710 1699 elif p == nullrev:
1711 1700 c.append(self.node(r))
1712 1701 return c
1713 1702
1714 1703 def commonancestorsheads(self, a, b):
1715 1704 """calculate all the heads of the common ancestors of nodes a and b"""
1716 1705 a, b = self.rev(a), self.rev(b)
1717 1706 ancs = self._commonancestorsheads(a, b)
1718 1707 return pycompat.maplist(self.node, ancs)
1719 1708
1720 1709 def _commonancestorsheads(self, *revs):
1721 1710 """calculate all the heads of the common ancestors of revs"""
1722 1711 try:
1723 1712 ancs = self.index.commonancestorsheads(*revs)
1724 1713 except (AttributeError, OverflowError): # C implementation failed
1725 1714 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1726 1715 return ancs
1727 1716
1728 1717 def isancestor(self, a, b):
1729 1718 """return True if node a is an ancestor of node b
1730 1719
1731 1720 A revision is considered an ancestor of itself."""
1732 1721 a, b = self.rev(a), self.rev(b)
1733 1722 return self.isancestorrev(a, b)
1734 1723
1735 1724 def isancestorrev(self, a, b):
1736 1725 """return True if revision a is an ancestor of revision b
1737 1726
1738 1727 A revision is considered an ancestor of itself.
1739 1728
1740 1729 The implementation of this is trivial but the use of
1741 1730 reachableroots is not."""
1742 1731 if a == nullrev:
1743 1732 return True
1744 1733 elif a == b:
1745 1734 return True
1746 1735 elif a > b:
1747 1736 return False
1748 1737 return bool(self.reachableroots(a, [b], [a], includepath=False))
1749 1738
1750 1739 def reachableroots(self, minroot, heads, roots, includepath=False):
1751 1740 """return (heads(::(<roots> and <roots>::<heads>)))
1752 1741
1753 1742 If includepath is True, return (<roots>::<heads>)."""
1754 1743 try:
1755 1744 return self.index.reachableroots2(
1756 1745 minroot, heads, roots, includepath
1757 1746 )
1758 1747 except AttributeError:
1759 1748 return dagop._reachablerootspure(
1760 1749 self.parentrevs, minroot, roots, heads, includepath
1761 1750 )
1762 1751
1763 1752 def ancestor(self, a, b):
1764 1753 """calculate the "best" common ancestor of nodes a and b"""
1765 1754
1766 1755 a, b = self.rev(a), self.rev(b)
1767 1756 try:
1768 1757 ancs = self.index.ancestors(a, b)
1769 1758 except (AttributeError, OverflowError):
1770 1759 ancs = ancestor.ancestors(self.parentrevs, a, b)
1771 1760 if ancs:
1772 1761 # choose a consistent winner when there's a tie
1773 1762 return min(map(self.node, ancs))
1774 1763 return self.nullid
1775 1764
1776 1765 def _match(self, id):
1777 1766 if isinstance(id, int):
1778 1767 # rev
1779 1768 return self.node(id)
1780 1769 if len(id) == self.nodeconstants.nodelen:
1781 1770 # possibly a binary node
1782 1771 # odds of a binary node being all hex in ASCII are 1 in 10**25
1783 1772 try:
1784 1773 node = id
1785 1774 self.rev(node) # quick search the index
1786 1775 return node
1787 1776 except error.LookupError:
1788 1777 pass # may be partial hex id
1789 1778 try:
1790 1779 # str(rev)
1791 1780 rev = int(id)
1792 1781 if b"%d" % rev != id:
1793 1782 raise ValueError
1794 1783 if rev < 0:
1795 1784 rev = len(self) + rev
1796 1785 if rev < 0 or rev >= len(self):
1797 1786 raise ValueError
1798 1787 return self.node(rev)
1799 1788 except (ValueError, OverflowError):
1800 1789 pass
1801 1790 if len(id) == 2 * self.nodeconstants.nodelen:
1802 1791 try:
1803 1792 # a full hex nodeid?
1804 1793 node = bin(id)
1805 1794 self.rev(node)
1806 1795 return node
1807 1796 except (binascii.Error, error.LookupError):
1808 1797 pass
1809 1798
1810 1799 def _partialmatch(self, id):
1811 1800 # we don't care wdirfilenodeids as they should be always full hash
1812 1801 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1813 1802 ambiguous = False
1814 1803 try:
1815 1804 partial = self.index.partialmatch(id)
1816 1805 if partial and self.hasnode(partial):
1817 1806 if maybewdir:
1818 1807 # single 'ff...' match in radix tree, ambiguous with wdir
1819 1808 ambiguous = True
1820 1809 else:
1821 1810 return partial
1822 1811 elif maybewdir:
1823 1812 # no 'ff...' match in radix tree, wdir identified
1824 1813 raise error.WdirUnsupported
1825 1814 else:
1826 1815 return None
1827 1816 except error.RevlogError:
1828 1817 # parsers.c radix tree lookup gave multiple matches
1829 1818 # fast path: for unfiltered changelog, radix tree is accurate
1830 1819 if not getattr(self, 'filteredrevs', None):
1831 1820 ambiguous = True
1832 1821 # fall through to slow path that filters hidden revisions
1833 1822 except (AttributeError, ValueError):
1834 1823 # we are pure python, or key is not hex
1835 1824 pass
1836 1825 if ambiguous:
1837 1826 raise error.AmbiguousPrefixLookupError(
1838 1827 id, self.display_id, _(b'ambiguous identifier')
1839 1828 )
1840 1829
1841 1830 if id in self._pcache:
1842 1831 return self._pcache[id]
1843 1832
1844 1833 if len(id) <= 40:
1845 1834 # hex(node)[:...]
1846 1835 l = len(id) // 2 * 2 # grab an even number of digits
1847 1836 try:
1848 1837 # we're dropping the last digit, so let's check that it's hex,
1849 1838 # to avoid the expensive computation below if it's not
1850 1839 if len(id) % 2 > 0:
1851 1840 if not (id[-1] in hexdigits):
1852 1841 return None
1853 1842 prefix = bin(id[:l])
1854 1843 except binascii.Error:
1855 1844 pass
1856 1845 else:
1857 1846 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1858 1847 nl = [
1859 1848 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1860 1849 ]
1861 1850 if self.nodeconstants.nullhex.startswith(id):
1862 1851 nl.append(self.nullid)
1863 1852 if len(nl) > 0:
1864 1853 if len(nl) == 1 and not maybewdir:
1865 1854 self._pcache[id] = nl[0]
1866 1855 return nl[0]
1867 1856 raise error.AmbiguousPrefixLookupError(
1868 1857 id, self.display_id, _(b'ambiguous identifier')
1869 1858 )
1870 1859 if maybewdir:
1871 1860 raise error.WdirUnsupported
1872 1861 return None
1873 1862
1874 1863 def lookup(self, id):
1875 1864 """locate a node based on:
1876 1865 - revision number or str(revision number)
1877 1866 - nodeid or subset of hex nodeid
1878 1867 """
1879 1868 n = self._match(id)
1880 1869 if n is not None:
1881 1870 return n
1882 1871 n = self._partialmatch(id)
1883 1872 if n:
1884 1873 return n
1885 1874
1886 1875 raise error.LookupError(id, self.display_id, _(b'no match found'))
1887 1876
1888 1877 def shortest(self, node, minlength=1):
1889 1878 """Find the shortest unambiguous prefix that matches node."""
1890 1879
1891 1880 def isvalid(prefix):
1892 1881 try:
1893 1882 matchednode = self._partialmatch(prefix)
1894 1883 except error.AmbiguousPrefixLookupError:
1895 1884 return False
1896 1885 except error.WdirUnsupported:
1897 1886 # single 'ff...' match
1898 1887 return True
1899 1888 if matchednode is None:
1900 1889 raise error.LookupError(node, self.display_id, _(b'no node'))
1901 1890 return True
1902 1891
1903 1892 def maybewdir(prefix):
1904 1893 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1905 1894
1906 1895 hexnode = hex(node)
1907 1896
1908 1897 def disambiguate(hexnode, minlength):
1909 1898 """Disambiguate against wdirid."""
1910 1899 for length in range(minlength, len(hexnode) + 1):
1911 1900 prefix = hexnode[:length]
1912 1901 if not maybewdir(prefix):
1913 1902 return prefix
1914 1903
1915 1904 if not getattr(self, 'filteredrevs', None):
1916 1905 try:
1917 1906 length = max(self.index.shortest(node), minlength)
1918 1907 return disambiguate(hexnode, length)
1919 1908 except error.RevlogError:
1920 1909 if node != self.nodeconstants.wdirid:
1921 1910 raise error.LookupError(
1922 1911 node, self.display_id, _(b'no node')
1923 1912 )
1924 1913 except AttributeError:
1925 1914 # Fall through to pure code
1926 1915 pass
1927 1916
1928 1917 if node == self.nodeconstants.wdirid:
1929 1918 for length in range(minlength, len(hexnode) + 1):
1930 1919 prefix = hexnode[:length]
1931 1920 if isvalid(prefix):
1932 1921 return prefix
1933 1922
1934 1923 for length in range(minlength, len(hexnode) + 1):
1935 1924 prefix = hexnode[:length]
1936 1925 if isvalid(prefix):
1937 1926 return disambiguate(hexnode, length)
1938 1927
1939 1928 def cmp(self, node, text):
1940 1929 """compare text with a given file revision
1941 1930
1942 1931 returns True if text is different than what is stored.
1943 1932 """
1944 1933 p1, p2 = self.parents(node)
1945 1934 return storageutil.hashrevisionsha1(text, p1, p2) != node
1946 1935
1947 1936 def _getsegmentforrevs(self, startrev, endrev):
1948 1937 """Obtain a segment of raw data corresponding to a range of revisions.
1949 1938
1950 1939 Accepts the start and end revisions and an optional already-open
1951 1940 file handle to be used for reading. If the file handle is read, its
1952 1941 seek position will not be preserved.
1953 1942
1954 1943 Requests for data may be satisfied by a cache.
1955 1944
1956 1945 Returns a 2-tuple of (offset, data) for the requested range of
1957 1946 revisions. Offset is the integer offset from the beginning of the
1958 1947 revlog and data is a str or buffer of the raw byte data.
1959 1948
1960 1949 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1961 1950 to determine where each revision's data begins and ends.
1962 1951 """
1963 1952 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1964 1953 # (functions are expensive).
1965 1954 index = self.index
1966 1955 istart = index[startrev]
1967 1956 start = int(istart[0] >> 16)
1968 1957 if startrev == endrev:
1969 1958 end = start + istart[1]
1970 1959 else:
1971 1960 iend = index[endrev]
1972 1961 end = int(iend[0] >> 16) + iend[1]
1973 1962
1974 1963 if self._inline:
1975 1964 start += (startrev + 1) * self.index.entry_size
1976 1965 end += (endrev + 1) * self.index.entry_size
1977 1966 length = end - start
1978 1967
1979 1968 return start, self._segmentfile.read_chunk(start, length)
1980 1969
1981 1970 def _chunk(self, rev):
1982 1971 """Obtain a single decompressed chunk for a revision.
1983 1972
1984 1973 Accepts an integer revision and an optional already-open file handle
1985 1974 to be used for reading. If used, the seek position of the file will not
1986 1975 be preserved.
1987 1976
1988 1977 Returns a str holding uncompressed data for the requested revision.
1989 1978 """
1990 1979 compression_mode = self.index[rev][10]
1991 1980 data = self._getsegmentforrevs(rev, rev)[1]
1992 1981 if compression_mode == COMP_MODE_PLAIN:
1993 1982 return data
1994 1983 elif compression_mode == COMP_MODE_DEFAULT:
1995 1984 return self._decompressor(data)
1996 1985 elif compression_mode == COMP_MODE_INLINE:
1997 1986 return self.decompress(data)
1998 1987 else:
1999 1988 msg = b'unknown compression mode %d'
2000 1989 msg %= compression_mode
2001 1990 raise error.RevlogError(msg)
2002 1991
2003 1992 def _chunks(self, revs, targetsize=None):
2004 1993 """Obtain decompressed chunks for the specified revisions.
2005 1994
2006 1995 Accepts an iterable of numeric revisions that are assumed to be in
2007 1996 ascending order. Also accepts an optional already-open file handle
2008 1997 to be used for reading. If used, the seek position of the file will
2009 1998 not be preserved.
2010 1999
2011 2000 This function is similar to calling ``self._chunk()`` multiple times,
2012 2001 but is faster.
2013 2002
2014 2003 Returns a list with decompressed data for each requested revision.
2015 2004 """
2016 2005 if not revs:
2017 2006 return []
2018 2007 start = self.start
2019 2008 length = self.length
2020 2009 inline = self._inline
2021 2010 iosize = self.index.entry_size
2022 2011 buffer = util.buffer
2023 2012
2024 2013 l = []
2025 2014 ladd = l.append
2026 2015
2027 2016 if not self._withsparseread:
2028 2017 slicedchunks = (revs,)
2029 2018 else:
2030 2019 slicedchunks = deltautil.slicechunk(
2031 2020 self, revs, targetsize=targetsize
2032 2021 )
2033 2022
2034 2023 for revschunk in slicedchunks:
2035 2024 firstrev = revschunk[0]
2036 2025 # Skip trailing revisions with empty diff
2037 2026 for lastrev in revschunk[::-1]:
2038 2027 if length(lastrev) != 0:
2039 2028 break
2040 2029
2041 2030 try:
2042 2031 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2043 2032 except OverflowError:
2044 2033 # issue4215 - we can't cache a run of chunks greater than
2045 2034 # 2G on Windows
2046 2035 return [self._chunk(rev) for rev in revschunk]
2047 2036
2048 2037 decomp = self.decompress
2049 2038 # self._decompressor might be None, but will not be used in that case
2050 2039 def_decomp = self._decompressor
2051 2040 for rev in revschunk:
2052 2041 chunkstart = start(rev)
2053 2042 if inline:
2054 2043 chunkstart += (rev + 1) * iosize
2055 2044 chunklength = length(rev)
2056 2045 comp_mode = self.index[rev][10]
2057 2046 c = buffer(data, chunkstart - offset, chunklength)
2058 2047 if comp_mode == COMP_MODE_PLAIN:
2059 2048 ladd(c)
2060 2049 elif comp_mode == COMP_MODE_INLINE:
2061 2050 ladd(decomp(c))
2062 2051 elif comp_mode == COMP_MODE_DEFAULT:
2063 2052 ladd(def_decomp(c))
2064 2053 else:
2065 2054 msg = b'unknown compression mode %d'
2066 2055 msg %= comp_mode
2067 2056 raise error.RevlogError(msg)
2068 2057
2069 2058 return l
2070 2059
2071 2060 def deltaparent(self, rev):
2072 2061 """return deltaparent of the given revision"""
2073 2062 base = self.index[rev][3]
2074 2063 if base == rev:
2075 2064 return nullrev
2076 2065 elif self._generaldelta:
2077 2066 return base
2078 2067 else:
2079 2068 return rev - 1
2080 2069
2081 2070 def issnapshot(self, rev):
2082 2071 """tells whether rev is a snapshot"""
2083 2072 if not self._sparserevlog:
2084 2073 return self.deltaparent(rev) == nullrev
2085 2074 elif hasattr(self.index, 'issnapshot'):
2086 2075 # directly assign the method to cache the testing and access
2087 2076 self.issnapshot = self.index.issnapshot
2088 2077 return self.issnapshot(rev)
2089 2078 if rev == nullrev:
2090 2079 return True
2091 2080 entry = self.index[rev]
2092 2081 base = entry[3]
2093 2082 if base == rev:
2094 2083 return True
2095 2084 if base == nullrev:
2096 2085 return True
2097 2086 p1 = entry[5]
2098 2087 while self.length(p1) == 0:
2099 2088 b = self.deltaparent(p1)
2100 2089 if b == p1:
2101 2090 break
2102 2091 p1 = b
2103 2092 p2 = entry[6]
2104 2093 while self.length(p2) == 0:
2105 2094 b = self.deltaparent(p2)
2106 2095 if b == p2:
2107 2096 break
2108 2097 p2 = b
2109 2098 if base == p1 or base == p2:
2110 2099 return False
2111 2100 return self.issnapshot(base)
2112 2101
2113 2102 def snapshotdepth(self, rev):
2114 2103 """number of snapshot in the chain before this one"""
2115 2104 if not self.issnapshot(rev):
2116 2105 raise error.ProgrammingError(b'revision %d not a snapshot')
2117 2106 return len(self._deltachain(rev)[0]) - 1
2118 2107
2119 2108 def revdiff(self, rev1, rev2):
2120 2109 """return or calculate a delta between two revisions
2121 2110
2122 2111 The delta calculated is in binary form and is intended to be written to
2123 2112 revlog data directly. So this function needs raw revision data.
2124 2113 """
2125 2114 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2126 2115 return bytes(self._chunk(rev2))
2127 2116
2128 2117 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2129 2118
2130 2119 def revision(self, nodeorrev):
2131 2120 """return an uncompressed revision of a given node or revision
2132 2121 number.
2133 2122 """
2134 2123 return self._revisiondata(nodeorrev)
2135 2124
2136 2125 def sidedata(self, nodeorrev):
2137 2126 """a map of extra data related to the changeset but not part of the hash
2138 2127
2139 2128 This function currently return a dictionary. However, more advanced
2140 2129 mapping object will likely be used in the future for a more
2141 2130 efficient/lazy code.
2142 2131 """
2143 2132 # deal with <nodeorrev> argument type
2144 2133 if isinstance(nodeorrev, int):
2145 2134 rev = nodeorrev
2146 2135 else:
2147 2136 rev = self.rev(nodeorrev)
2148 2137 return self._sidedata(rev)
2149 2138
2150 2139 def _revisiondata(self, nodeorrev, raw=False):
2151 2140 # deal with <nodeorrev> argument type
2152 2141 if isinstance(nodeorrev, int):
2153 2142 rev = nodeorrev
2154 2143 node = self.node(rev)
2155 2144 else:
2156 2145 node = nodeorrev
2157 2146 rev = None
2158 2147
2159 2148 # fast path the special `nullid` rev
2160 2149 if node == self.nullid:
2161 2150 return b""
2162 2151
2163 2152 # ``rawtext`` is the text as stored inside the revlog. Might be the
2164 2153 # revision or might need to be processed to retrieve the revision.
2165 2154 rev, rawtext, validated = self._rawtext(node, rev)
2166 2155
2167 2156 if raw and validated:
2168 2157 # if we don't want to process the raw text and that raw
2169 2158 # text is cached, we can exit early.
2170 2159 return rawtext
2171 2160 if rev is None:
2172 2161 rev = self.rev(node)
2173 2162 # the revlog's flag for this revision
2174 2163 # (usually alter its state or content)
2175 2164 flags = self.flags(rev)
2176 2165
2177 2166 if validated and flags == REVIDX_DEFAULT_FLAGS:
2178 2167 # no extra flags set, no flag processor runs, text = rawtext
2179 2168 return rawtext
2180 2169
2181 2170 if raw:
2182 2171 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2183 2172 text = rawtext
2184 2173 else:
2185 2174 r = flagutil.processflagsread(self, rawtext, flags)
2186 2175 text, validatehash = r
2187 2176 if validatehash:
2188 2177 self.checkhash(text, node, rev=rev)
2189 2178 if not validated:
2190 2179 self._revisioncache = (node, rev, rawtext)
2191 2180
2192 2181 return text
2193 2182
2194 2183 def _rawtext(self, node, rev):
2195 2184 """return the possibly unvalidated rawtext for a revision
2196 2185
2197 2186 returns (rev, rawtext, validated)
2198 2187 """
2199 2188
2200 2189 # revision in the cache (could be useful to apply delta)
2201 2190 cachedrev = None
2202 2191 # An intermediate text to apply deltas to
2203 2192 basetext = None
2204 2193
2205 2194 # Check if we have the entry in cache
2206 2195 # The cache entry looks like (node, rev, rawtext)
2207 2196 if self._revisioncache:
2208 2197 if self._revisioncache[0] == node:
2209 2198 return (rev, self._revisioncache[2], True)
2210 2199 cachedrev = self._revisioncache[1]
2211 2200
2212 2201 if rev is None:
2213 2202 rev = self.rev(node)
2214 2203
2215 2204 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2216 2205 if stopped:
2217 2206 basetext = self._revisioncache[2]
2218 2207
2219 2208 # drop cache to save memory, the caller is expected to
2220 2209 # update self._revisioncache after validating the text
2221 2210 self._revisioncache = None
2222 2211
2223 2212 targetsize = None
2224 2213 rawsize = self.index[rev][2]
2225 2214 if 0 <= rawsize:
2226 2215 targetsize = 4 * rawsize
2227 2216
2228 2217 bins = self._chunks(chain, targetsize=targetsize)
2229 2218 if basetext is None:
2230 2219 basetext = bytes(bins[0])
2231 2220 bins = bins[1:]
2232 2221
2233 2222 rawtext = mdiff.patches(basetext, bins)
2234 2223 del basetext # let us have a chance to free memory early
2235 2224 return (rev, rawtext, False)
2236 2225
2237 2226 def _sidedata(self, rev):
2238 2227 """Return the sidedata for a given revision number."""
2239 2228 index_entry = self.index[rev]
2240 2229 sidedata_offset = index_entry[8]
2241 2230 sidedata_size = index_entry[9]
2242 2231
2243 2232 if self._inline:
2244 2233 sidedata_offset += self.index.entry_size * (1 + rev)
2245 2234 if sidedata_size == 0:
2246 2235 return {}
2247 2236
2248 2237 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2249 2238 filename = self._sidedatafile
2250 2239 end = self._docket.sidedata_end
2251 2240 offset = sidedata_offset
2252 2241 length = sidedata_size
2253 2242 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2254 2243 raise error.RevlogError(m)
2255 2244
2256 2245 comp_segment = self._segmentfile_sidedata.read_chunk(
2257 2246 sidedata_offset, sidedata_size
2258 2247 )
2259 2248
2260 2249 comp = self.index[rev][11]
2261 2250 if comp == COMP_MODE_PLAIN:
2262 2251 segment = comp_segment
2263 2252 elif comp == COMP_MODE_DEFAULT:
2264 2253 segment = self._decompressor(comp_segment)
2265 2254 elif comp == COMP_MODE_INLINE:
2266 2255 segment = self.decompress(comp_segment)
2267 2256 else:
2268 2257 msg = b'unknown compression mode %d'
2269 2258 msg %= comp
2270 2259 raise error.RevlogError(msg)
2271 2260
2272 2261 sidedata = sidedatautil.deserialize_sidedata(segment)
2273 2262 return sidedata
2274 2263
2275 2264 def rawdata(self, nodeorrev):
2276 2265 """return an uncompressed raw data of a given node or revision number."""
2277 2266 return self._revisiondata(nodeorrev, raw=True)
2278 2267
2279 2268 def hash(self, text, p1, p2):
2280 2269 """Compute a node hash.
2281 2270
2282 2271 Available as a function so that subclasses can replace the hash
2283 2272 as needed.
2284 2273 """
2285 2274 return storageutil.hashrevisionsha1(text, p1, p2)
2286 2275
2287 2276 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2288 2277 """Check node hash integrity.
2289 2278
2290 2279 Available as a function so that subclasses can extend hash mismatch
2291 2280 behaviors as needed.
2292 2281 """
2293 2282 try:
2294 2283 if p1 is None and p2 is None:
2295 2284 p1, p2 = self.parents(node)
2296 2285 if node != self.hash(text, p1, p2):
2297 2286 # Clear the revision cache on hash failure. The revision cache
2298 2287 # only stores the raw revision and clearing the cache does have
2299 2288 # the side-effect that we won't have a cache hit when the raw
2300 2289 # revision data is accessed. But this case should be rare and
2301 2290 # it is extra work to teach the cache about the hash
2302 2291 # verification state.
2303 2292 if self._revisioncache and self._revisioncache[0] == node:
2304 2293 self._revisioncache = None
2305 2294
2306 2295 revornode = rev
2307 2296 if revornode is None:
2308 2297 revornode = templatefilters.short(hex(node))
2309 2298 raise error.RevlogError(
2310 2299 _(b"integrity check failed on %s:%s")
2311 2300 % (self.display_id, pycompat.bytestr(revornode))
2312 2301 )
2313 2302 except error.RevlogError:
2314 2303 if self._censorable and storageutil.iscensoredtext(text):
2315 2304 raise error.CensoredNodeError(self.display_id, node, text)
2316 2305 raise
2317 2306
2318 2307 @property
2319 2308 def _split_index_file(self):
2320 2309 """the path where to expect the index of an ongoing splitting operation
2321 2310
2322 2311 The file will only exist if a splitting operation is in progress, but
2323 2312 it is always expected at the same location."""
2324 2313 parts = self.radix.split(b'/')
2325 2314 if len(parts) > 1:
2326 2315 # adds a '-s' prefix to the ``data/` or `meta/` base
2327 2316 head = parts[0] + b'-s'
2328 2317 mids = parts[1:-1]
2329 2318 tail = parts[-1] + b'.i'
2330 2319 pieces = [head] + mids + [tail]
2331 2320 return b'/'.join(pieces)
2332 2321 else:
2333 2322 # the revlog is stored at the root of the store (changelog or
2334 2323 # manifest), no risk of collision.
2335 2324 return self.radix + b'.i.s'
2336 2325
2337 2326 def _enforceinlinesize(self, tr, side_write=True):
2338 2327 """Check if the revlog is too big for inline and convert if so.
2339 2328
2340 2329 This should be called after revisions are added to the revlog. If the
2341 2330 revlog has grown too large to be an inline revlog, it will convert it
2342 2331 to use multiple index and data files.
2343 2332 """
2344 2333 tiprev = len(self) - 1
2345 2334 total_size = self.start(tiprev) + self.length(tiprev)
2346 2335 if not self._inline or total_size < _maxinline:
2347 2336 return
2348 2337
2349 2338 troffset = tr.findoffset(self._indexfile)
2350 2339 if troffset is None:
2351 2340 raise error.RevlogError(
2352 2341 _(b"%s not found in the transaction") % self._indexfile
2353 2342 )
2354 2343 if troffset:
2355 2344 tr.addbackup(self._indexfile, for_offset=True)
2356 2345 tr.add(self._datafile, 0)
2357 2346
2358 2347 existing_handles = False
2359 2348 if self._writinghandles is not None:
2360 2349 existing_handles = True
2361 2350 fp = self._writinghandles[0]
2362 2351 fp.flush()
2363 2352 fp.close()
2364 2353 # We can't use the cached file handle after close(). So prevent
2365 2354 # its usage.
2366 2355 self._writinghandles = None
2367 2356 self._segmentfile.writing_handle = None
2368 2357 # No need to deal with sidedata writing handle as it is only
2369 2358 # relevant with revlog-v2 which is never inline, not reaching
2370 2359 # this code
2371 2360 if side_write:
2372 2361 old_index_file_path = self._indexfile
2373 2362 new_index_file_path = self._split_index_file
2374 2363 opener = self.opener
2375 2364 weak_self = weakref.ref(self)
2376 2365
2377 2366 # the "split" index replace the real index when the transaction is finalized
2378 2367 def finalize_callback(tr):
2379 2368 opener.rename(
2380 2369 new_index_file_path,
2381 2370 old_index_file_path,
2382 2371 checkambig=True,
2383 2372 )
2384 2373 maybe_self = weak_self()
2385 2374 if maybe_self is not None:
2386 2375 maybe_self._indexfile = old_index_file_path
2387 2376
2388 2377 def abort_callback(tr):
2389 2378 maybe_self = weak_self()
2390 2379 if maybe_self is not None:
2391 2380 maybe_self._indexfile = old_index_file_path
2392 2381
2393 2382 tr.registertmp(new_index_file_path)
2394 2383 if self.target[1] is not None:
2395 2384 callback_id = b'000-revlog-split-%d-%s' % self.target
2396 2385 else:
2397 2386 callback_id = b'000-revlog-split-%d' % self.target[0]
2398 2387 tr.addfinalize(callback_id, finalize_callback)
2399 2388 tr.addabort(callback_id, abort_callback)
2400 2389
2401 2390 new_dfh = self._datafp(b'w+')
2402 2391 new_dfh.truncate(0) # drop any potentially existing data
2403 2392 try:
2404 2393 with self.reading():
2405 2394 for r in self:
2406 2395 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2407 2396 new_dfh.flush()
2408 2397
2409 2398 if side_write:
2410 2399 self._indexfile = new_index_file_path
2411 2400 with self.__index_new_fp() as fp:
2412 2401 self._format_flags &= ~FLAG_INLINE_DATA
2413 2402 self._inline = False
2414 2403 for i in self:
2415 2404 e = self.index.entry_binary(i)
2416 2405 if i == 0 and self._docket is None:
2417 2406 header = self._format_flags | self._format_version
2418 2407 header = self.index.pack_header(header)
2419 2408 e = header + e
2420 2409 fp.write(e)
2421 2410 if self._docket is not None:
2422 2411 self._docket.index_end = fp.tell()
2423 2412
2424 2413 # If we don't use side-write, the temp file replace the real
2425 2414 # index when we exit the context manager
2426 2415
2427 2416 nodemaputil.setup_persistent_nodemap(tr, self)
2428 2417 self._segmentfile = randomaccessfile.randomaccessfile(
2429 2418 self.opener,
2430 2419 self._datafile,
2431 2420 self._chunkcachesize,
2432 2421 )
2433 2422
2434 2423 if existing_handles:
2435 2424 # switched from inline to conventional reopen the index
2436 2425 ifh = self.__index_write_fp()
2437 2426 self._writinghandles = (ifh, new_dfh, None)
2438 2427 self._segmentfile.writing_handle = new_dfh
2439 2428 new_dfh = None
2440 2429 # No need to deal with sidedata writing handle as it is only
2441 2430 # relevant with revlog-v2 which is never inline, not reaching
2442 2431 # this code
2443 2432 finally:
2444 2433 if new_dfh is not None:
2445 2434 new_dfh.close()
2446 2435
2447 2436 def _nodeduplicatecallback(self, transaction, node):
2448 2437 """called when trying to add a node already stored."""
2449 2438
2450 2439 @contextlib.contextmanager
2451 2440 def reading(self):
2452 2441 """Context manager that keeps data and sidedata files open for reading"""
2453 2442 if len(self.index) == 0:
2454 2443 yield # nothing to be read
2455 2444 else:
2456 2445 with self._segmentfile.reading():
2457 2446 with self._segmentfile_sidedata.reading():
2458 2447 yield
2459 2448
2460 2449 @contextlib.contextmanager
2461 2450 def _writing(self, transaction):
2462 2451 if self._trypending:
2463 2452 msg = b'try to write in a `trypending` revlog: %s'
2464 2453 msg %= self.display_id
2465 2454 raise error.ProgrammingError(msg)
2466 2455 if self._writinghandles is not None:
2467 2456 yield
2468 2457 else:
2469 2458 ifh = dfh = sdfh = None
2470 2459 try:
2471 2460 r = len(self)
2472 2461 # opening the data file.
2473 2462 dsize = 0
2474 2463 if r:
2475 2464 dsize = self.end(r - 1)
2476 2465 dfh = None
2477 2466 if not self._inline:
2478 2467 try:
2479 2468 dfh = self._datafp(b"r+")
2480 2469 if self._docket is None:
2481 2470 dfh.seek(0, os.SEEK_END)
2482 2471 else:
2483 2472 dfh.seek(self._docket.data_end, os.SEEK_SET)
2484 2473 except FileNotFoundError:
2485 2474 dfh = self._datafp(b"w+")
2486 2475 transaction.add(self._datafile, dsize)
2487 2476 if self._sidedatafile is not None:
2488 2477 # revlog-v2 does not inline, help Pytype
2489 2478 assert dfh is not None
2490 2479 try:
2491 2480 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2492 2481 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2493 2482 except FileNotFoundError:
2494 2483 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2495 2484 transaction.add(
2496 2485 self._sidedatafile, self._docket.sidedata_end
2497 2486 )
2498 2487
2499 2488 # opening the index file.
2500 2489 isize = r * self.index.entry_size
2501 2490 ifh = self.__index_write_fp()
2502 2491 if self._inline:
2503 2492 transaction.add(self._indexfile, dsize + isize)
2504 2493 else:
2505 2494 transaction.add(self._indexfile, isize)
2506 2495 # exposing all file handle for writing.
2507 2496 self._writinghandles = (ifh, dfh, sdfh)
2508 2497 self._segmentfile.writing_handle = ifh if self._inline else dfh
2509 2498 self._segmentfile_sidedata.writing_handle = sdfh
2510 2499 yield
2511 2500 if self._docket is not None:
2512 2501 self._write_docket(transaction)
2513 2502 finally:
2514 2503 self._writinghandles = None
2515 2504 self._segmentfile.writing_handle = None
2516 2505 self._segmentfile_sidedata.writing_handle = None
2517 2506 if dfh is not None:
2518 2507 dfh.close()
2519 2508 if sdfh is not None:
2520 2509 sdfh.close()
2521 2510 # closing the index file last to avoid exposing referent to
2522 2511 # potential unflushed data content.
2523 2512 if ifh is not None:
2524 2513 ifh.close()
2525 2514
2526 2515 def _write_docket(self, transaction):
2527 2516 """write the current docket on disk
2528 2517
2529 2518 Exist as a method to help changelog to implement transaction logic
2530 2519
2531 2520 We could also imagine using the same transaction logic for all revlog
2532 2521 since docket are cheap."""
2533 2522 self._docket.write(transaction)
2534 2523
2535 2524 def addrevision(
2536 2525 self,
2537 2526 text,
2538 2527 transaction,
2539 2528 link,
2540 2529 p1,
2541 2530 p2,
2542 2531 cachedelta=None,
2543 2532 node=None,
2544 2533 flags=REVIDX_DEFAULT_FLAGS,
2545 2534 deltacomputer=None,
2546 2535 sidedata=None,
2547 2536 ):
2548 2537 """add a revision to the log
2549 2538
2550 2539 text - the revision data to add
2551 2540 transaction - the transaction object used for rollback
2552 2541 link - the linkrev data to add
2553 2542 p1, p2 - the parent nodeids of the revision
2554 2543 cachedelta - an optional precomputed delta
2555 2544 node - nodeid of revision; typically node is not specified, and it is
2556 2545 computed by default as hash(text, p1, p2), however subclasses might
2557 2546 use different hashing method (and override checkhash() in such case)
2558 2547 flags - the known flags to set on the revision
2559 2548 deltacomputer - an optional deltacomputer instance shared between
2560 2549 multiple calls
2561 2550 """
2562 2551 if link == nullrev:
2563 2552 raise error.RevlogError(
2564 2553 _(b"attempted to add linkrev -1 to %s") % self.display_id
2565 2554 )
2566 2555
2567 2556 if sidedata is None:
2568 2557 sidedata = {}
2569 2558 elif sidedata and not self.hassidedata:
2570 2559 raise error.ProgrammingError(
2571 2560 _(b"trying to add sidedata to a revlog who don't support them")
2572 2561 )
2573 2562
2574 2563 if flags:
2575 2564 node = node or self.hash(text, p1, p2)
2576 2565
2577 2566 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2578 2567
2579 2568 # If the flag processor modifies the revision data, ignore any provided
2580 2569 # cachedelta.
2581 2570 if rawtext != text:
2582 2571 cachedelta = None
2583 2572
2584 2573 if len(rawtext) > _maxentrysize:
2585 2574 raise error.RevlogError(
2586 2575 _(
2587 2576 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2588 2577 )
2589 2578 % (self.display_id, len(rawtext))
2590 2579 )
2591 2580
2592 2581 node = node or self.hash(rawtext, p1, p2)
2593 2582 rev = self.index.get_rev(node)
2594 2583 if rev is not None:
2595 2584 return rev
2596 2585
2597 2586 if validatehash:
2598 2587 self.checkhash(rawtext, node, p1=p1, p2=p2)
2599 2588
2600 2589 return self.addrawrevision(
2601 2590 rawtext,
2602 2591 transaction,
2603 2592 link,
2604 2593 p1,
2605 2594 p2,
2606 2595 node,
2607 2596 flags,
2608 2597 cachedelta=cachedelta,
2609 2598 deltacomputer=deltacomputer,
2610 2599 sidedata=sidedata,
2611 2600 )
2612 2601
2613 2602 def addrawrevision(
2614 2603 self,
2615 2604 rawtext,
2616 2605 transaction,
2617 2606 link,
2618 2607 p1,
2619 2608 p2,
2620 2609 node,
2621 2610 flags,
2622 2611 cachedelta=None,
2623 2612 deltacomputer=None,
2624 2613 sidedata=None,
2625 2614 ):
2626 2615 """add a raw revision with known flags, node and parents
2627 2616 useful when reusing a revision not stored in this revlog (ex: received
2628 2617 over wire, or read from an external bundle).
2629 2618 """
2630 2619 with self._writing(transaction):
2631 2620 return self._addrevision(
2632 2621 node,
2633 2622 rawtext,
2634 2623 transaction,
2635 2624 link,
2636 2625 p1,
2637 2626 p2,
2638 2627 flags,
2639 2628 cachedelta,
2640 2629 deltacomputer=deltacomputer,
2641 2630 sidedata=sidedata,
2642 2631 )
2643 2632
2644 2633 def compress(self, data):
2645 2634 """Generate a possibly-compressed representation of data."""
2646 2635 if not data:
2647 2636 return b'', data
2648 2637
2649 2638 compressed = self._compressor.compress(data)
2650 2639
2651 2640 if compressed:
2652 2641 # The revlog compressor added the header in the returned data.
2653 2642 return b'', compressed
2654 2643
2655 2644 if data[0:1] == b'\0':
2656 2645 return b'', data
2657 2646 return b'u', data
2658 2647
2659 2648 def decompress(self, data):
2660 2649 """Decompress a revlog chunk.
2661 2650
2662 2651 The chunk is expected to begin with a header identifying the
2663 2652 format type so it can be routed to an appropriate decompressor.
2664 2653 """
2665 2654 if not data:
2666 2655 return data
2667 2656
2668 2657 # Revlogs are read much more frequently than they are written and many
2669 2658 # chunks only take microseconds to decompress, so performance is
2670 2659 # important here.
2671 2660 #
2672 2661 # We can make a few assumptions about revlogs:
2673 2662 #
2674 2663 # 1) the majority of chunks will be compressed (as opposed to inline
2675 2664 # raw data).
2676 2665 # 2) decompressing *any* data will likely by at least 10x slower than
2677 2666 # returning raw inline data.
2678 2667 # 3) we want to prioritize common and officially supported compression
2679 2668 # engines
2680 2669 #
2681 2670 # It follows that we want to optimize for "decompress compressed data
2682 2671 # when encoded with common and officially supported compression engines"
2683 2672 # case over "raw data" and "data encoded by less common or non-official
2684 2673 # compression engines." That is why we have the inline lookup first
2685 2674 # followed by the compengines lookup.
2686 2675 #
2687 2676 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2688 2677 # compressed chunks. And this matters for changelog and manifest reads.
2689 2678 t = data[0:1]
2690 2679
2691 2680 if t == b'x':
2692 2681 try:
2693 2682 return _zlibdecompress(data)
2694 2683 except zlib.error as e:
2695 2684 raise error.RevlogError(
2696 2685 _(b'revlog decompress error: %s')
2697 2686 % stringutil.forcebytestr(e)
2698 2687 )
2699 2688 # '\0' is more common than 'u' so it goes first.
2700 2689 elif t == b'\0':
2701 2690 return data
2702 2691 elif t == b'u':
2703 2692 return util.buffer(data, 1)
2704 2693
2705 2694 compressor = self._get_decompressor(t)
2706 2695
2707 2696 return compressor.decompress(data)
2708 2697
2709 2698 def _addrevision(
2710 2699 self,
2711 2700 node,
2712 2701 rawtext,
2713 2702 transaction,
2714 2703 link,
2715 2704 p1,
2716 2705 p2,
2717 2706 flags,
2718 2707 cachedelta,
2719 2708 alwayscache=False,
2720 2709 deltacomputer=None,
2721 2710 sidedata=None,
2722 2711 ):
2723 2712 """internal function to add revisions to the log
2724 2713
2725 2714 see addrevision for argument descriptions.
2726 2715
2727 2716 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2728 2717
2729 2718 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2730 2719 be used.
2731 2720
2732 2721 invariants:
2733 2722 - rawtext is optional (can be None); if not set, cachedelta must be set.
2734 2723 if both are set, they must correspond to each other.
2735 2724 """
2736 2725 if node == self.nullid:
2737 2726 raise error.RevlogError(
2738 2727 _(b"%s: attempt to add null revision") % self.display_id
2739 2728 )
2740 2729 if (
2741 2730 node == self.nodeconstants.wdirid
2742 2731 or node in self.nodeconstants.wdirfilenodeids
2743 2732 ):
2744 2733 raise error.RevlogError(
2745 2734 _(b"%s: attempt to add wdir revision") % self.display_id
2746 2735 )
2747 2736 if self._writinghandles is None:
2748 2737 msg = b'adding revision outside `revlog._writing` context'
2749 2738 raise error.ProgrammingError(msg)
2750 2739
2751 2740 btext = [rawtext]
2752 2741
2753 2742 curr = len(self)
2754 2743 prev = curr - 1
2755 2744
2756 2745 offset = self._get_data_offset(prev)
2757 2746
2758 2747 if self._concurrencychecker:
2759 2748 ifh, dfh, sdfh = self._writinghandles
2760 2749 # XXX no checking for the sidedata file
2761 2750 if self._inline:
2762 2751 # offset is "as if" it were in the .d file, so we need to add on
2763 2752 # the size of the entry metadata.
2764 2753 self._concurrencychecker(
2765 2754 ifh, self._indexfile, offset + curr * self.index.entry_size
2766 2755 )
2767 2756 else:
2768 2757 # Entries in the .i are a consistent size.
2769 2758 self._concurrencychecker(
2770 2759 ifh, self._indexfile, curr * self.index.entry_size
2771 2760 )
2772 2761 self._concurrencychecker(dfh, self._datafile, offset)
2773 2762
2774 2763 p1r, p2r = self.rev(p1), self.rev(p2)
2775 2764
2776 2765 # full versions are inserted when the needed deltas
2777 2766 # become comparable to the uncompressed text
2778 2767 if rawtext is None:
2779 2768 # need rawtext size, before changed by flag processors, which is
2780 2769 # the non-raw size. use revlog explicitly to avoid filelog's extra
2781 2770 # logic that might remove metadata size.
2782 2771 textlen = mdiff.patchedsize(
2783 2772 revlog.size(self, cachedelta[0]), cachedelta[1]
2784 2773 )
2785 2774 else:
2786 2775 textlen = len(rawtext)
2787 2776
2788 2777 if deltacomputer is None:
2789 2778 write_debug = None
2790 2779 if self._debug_delta:
2791 2780 write_debug = transaction._report
2792 2781 deltacomputer = deltautil.deltacomputer(
2793 2782 self, write_debug=write_debug
2794 2783 )
2795 2784
2796 2785 if cachedelta is not None and len(cachedelta) == 2:
2797 2786 # If the cached delta has no information about how it should be
2798 2787 # reused, add the default reuse instruction according to the
2799 2788 # revlog's configuration.
2800 2789 if self._generaldelta and self._lazydeltabase:
2801 2790 delta_base_reuse = DELTA_BASE_REUSE_TRY
2802 2791 else:
2803 2792 delta_base_reuse = DELTA_BASE_REUSE_NO
2804 2793 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2805 2794
2806 2795 revinfo = revlogutils.revisioninfo(
2807 2796 node,
2808 2797 p1,
2809 2798 p2,
2810 2799 btext,
2811 2800 textlen,
2812 2801 cachedelta,
2813 2802 flags,
2814 2803 )
2815 2804
2816 2805 deltainfo = deltacomputer.finddeltainfo(revinfo)
2817 2806
2818 2807 compression_mode = COMP_MODE_INLINE
2819 2808 if self._docket is not None:
2820 2809 default_comp = self._docket.default_compression_header
2821 2810 r = deltautil.delta_compression(default_comp, deltainfo)
2822 2811 compression_mode, deltainfo = r
2823 2812
2824 2813 sidedata_compression_mode = COMP_MODE_INLINE
2825 2814 if sidedata and self.hassidedata:
2826 2815 sidedata_compression_mode = COMP_MODE_PLAIN
2827 2816 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2828 2817 sidedata_offset = self._docket.sidedata_end
2829 2818 h, comp_sidedata = self.compress(serialized_sidedata)
2830 2819 if (
2831 2820 h != b'u'
2832 2821 and comp_sidedata[0:1] != b'\0'
2833 2822 and len(comp_sidedata) < len(serialized_sidedata)
2834 2823 ):
2835 2824 assert not h
2836 2825 if (
2837 2826 comp_sidedata[0:1]
2838 2827 == self._docket.default_compression_header
2839 2828 ):
2840 2829 sidedata_compression_mode = COMP_MODE_DEFAULT
2841 2830 serialized_sidedata = comp_sidedata
2842 2831 else:
2843 2832 sidedata_compression_mode = COMP_MODE_INLINE
2844 2833 serialized_sidedata = comp_sidedata
2845 2834 else:
2846 2835 serialized_sidedata = b""
2847 2836 # Don't store the offset if the sidedata is empty, that way
2848 2837 # we can easily detect empty sidedata and they will be no different
2849 2838 # than ones we manually add.
2850 2839 sidedata_offset = 0
2851 2840
2852 2841 rank = RANK_UNKNOWN
2853 2842 if self._compute_rank:
2854 2843 if (p1r, p2r) == (nullrev, nullrev):
2855 2844 rank = 1
2856 2845 elif p1r != nullrev and p2r == nullrev:
2857 2846 rank = 1 + self.fast_rank(p1r)
2858 2847 elif p1r == nullrev and p2r != nullrev:
2859 2848 rank = 1 + self.fast_rank(p2r)
2860 2849 else: # merge node
2861 2850 if rustdagop is not None and self.index.rust_ext_compat:
2862 2851 rank = rustdagop.rank(self.index, p1r, p2r)
2863 2852 else:
2864 2853 pmin, pmax = sorted((p1r, p2r))
2865 2854 rank = 1 + self.fast_rank(pmax)
2866 2855 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2867 2856
2868 2857 e = revlogutils.entry(
2869 2858 flags=flags,
2870 2859 data_offset=offset,
2871 2860 data_compressed_length=deltainfo.deltalen,
2872 2861 data_uncompressed_length=textlen,
2873 2862 data_compression_mode=compression_mode,
2874 2863 data_delta_base=deltainfo.base,
2875 2864 link_rev=link,
2876 2865 parent_rev_1=p1r,
2877 2866 parent_rev_2=p2r,
2878 2867 node_id=node,
2879 2868 sidedata_offset=sidedata_offset,
2880 2869 sidedata_compressed_length=len(serialized_sidedata),
2881 2870 sidedata_compression_mode=sidedata_compression_mode,
2882 2871 rank=rank,
2883 2872 )
2884 2873
2885 2874 self.index.append(e)
2886 2875 entry = self.index.entry_binary(curr)
2887 2876 if curr == 0 and self._docket is None:
2888 2877 header = self._format_flags | self._format_version
2889 2878 header = self.index.pack_header(header)
2890 2879 entry = header + entry
2891 2880 self._writeentry(
2892 2881 transaction,
2893 2882 entry,
2894 2883 deltainfo.data,
2895 2884 link,
2896 2885 offset,
2897 2886 serialized_sidedata,
2898 2887 sidedata_offset,
2899 2888 )
2900 2889
2901 2890 rawtext = btext[0]
2902 2891
2903 2892 if alwayscache and rawtext is None:
2904 2893 rawtext = deltacomputer.buildtext(revinfo)
2905 2894
2906 2895 if type(rawtext) == bytes: # only accept immutable objects
2907 2896 self._revisioncache = (node, curr, rawtext)
2908 2897 self._chainbasecache[curr] = deltainfo.chainbase
2909 2898 return curr
2910 2899
2911 2900 def _get_data_offset(self, prev):
2912 2901 """Returns the current offset in the (in-transaction) data file.
2913 2902 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2914 2903 file to store that information: since sidedata can be rewritten to the
2915 2904 end of the data file within a transaction, you can have cases where, for
2916 2905 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2917 2906 to `n - 1`'s sidedata being written after `n`'s data.
2918 2907
2919 2908 TODO cache this in a docket file before getting out of experimental."""
2920 2909 if self._docket is None:
2921 2910 return self.end(prev)
2922 2911 else:
2923 2912 return self._docket.data_end
2924 2913
2925 2914 def _writeentry(
2926 2915 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2927 2916 ):
2928 2917 # Files opened in a+ mode have inconsistent behavior on various
2929 2918 # platforms. Windows requires that a file positioning call be made
2930 2919 # when the file handle transitions between reads and writes. See
2931 2920 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2932 2921 # platforms, Python or the platform itself can be buggy. Some versions
2933 2922 # of Solaris have been observed to not append at the end of the file
2934 2923 # if the file was seeked to before the end. See issue4943 for more.
2935 2924 #
2936 2925 # We work around this issue by inserting a seek() before writing.
2937 2926 # Note: This is likely not necessary on Python 3. However, because
2938 2927 # the file handle is reused for reads and may be seeked there, we need
2939 2928 # to be careful before changing this.
2940 2929 if self._writinghandles is None:
2941 2930 msg = b'adding revision outside `revlog._writing` context'
2942 2931 raise error.ProgrammingError(msg)
2943 2932 ifh, dfh, sdfh = self._writinghandles
2944 2933 if self._docket is None:
2945 2934 ifh.seek(0, os.SEEK_END)
2946 2935 else:
2947 2936 ifh.seek(self._docket.index_end, os.SEEK_SET)
2948 2937 if dfh:
2949 2938 if self._docket is None:
2950 2939 dfh.seek(0, os.SEEK_END)
2951 2940 else:
2952 2941 dfh.seek(self._docket.data_end, os.SEEK_SET)
2953 2942 if sdfh:
2954 2943 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2955 2944
2956 2945 curr = len(self) - 1
2957 2946 if not self._inline:
2958 2947 transaction.add(self._datafile, offset)
2959 2948 if self._sidedatafile:
2960 2949 transaction.add(self._sidedatafile, sidedata_offset)
2961 2950 transaction.add(self._indexfile, curr * len(entry))
2962 2951 if data[0]:
2963 2952 dfh.write(data[0])
2964 2953 dfh.write(data[1])
2965 2954 if sidedata:
2966 2955 sdfh.write(sidedata)
2967 2956 ifh.write(entry)
2968 2957 else:
2969 2958 offset += curr * self.index.entry_size
2970 2959 transaction.add(self._indexfile, offset)
2971 2960 ifh.write(entry)
2972 2961 ifh.write(data[0])
2973 2962 ifh.write(data[1])
2974 2963 assert not sidedata
2975 2964 self._enforceinlinesize(transaction)
2976 2965 if self._docket is not None:
2977 2966 # revlog-v2 always has 3 writing handles, help Pytype
2978 2967 wh1 = self._writinghandles[0]
2979 2968 wh2 = self._writinghandles[1]
2980 2969 wh3 = self._writinghandles[2]
2981 2970 assert wh1 is not None
2982 2971 assert wh2 is not None
2983 2972 assert wh3 is not None
2984 2973 self._docket.index_end = wh1.tell()
2985 2974 self._docket.data_end = wh2.tell()
2986 2975 self._docket.sidedata_end = wh3.tell()
2987 2976
2988 2977 nodemaputil.setup_persistent_nodemap(transaction, self)
2989 2978
2990 2979 def addgroup(
2991 2980 self,
2992 2981 deltas,
2993 2982 linkmapper,
2994 2983 transaction,
2995 2984 alwayscache=False,
2996 2985 addrevisioncb=None,
2997 2986 duplicaterevisioncb=None,
2998 2987 debug_info=None,
2999 2988 delta_base_reuse_policy=None,
3000 2989 ):
3001 2990 """
3002 2991 add a delta group
3003 2992
3004 2993 given a set of deltas, add them to the revision log. the
3005 2994 first delta is against its parent, which should be in our
3006 2995 log, the rest are against the previous delta.
3007 2996
3008 2997 If ``addrevisioncb`` is defined, it will be called with arguments of
3009 2998 this revlog and the node that was added.
3010 2999 """
3011 3000
3012 3001 if self._adding_group:
3013 3002 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3014 3003
3015 3004 # read the default delta-base reuse policy from revlog config if the
3016 3005 # group did not specify one.
3017 3006 if delta_base_reuse_policy is None:
3018 3007 if self._generaldelta and self._lazydeltabase:
3019 3008 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3020 3009 else:
3021 3010 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3022 3011
3023 3012 self._adding_group = True
3024 3013 empty = True
3025 3014 try:
3026 3015 with self._writing(transaction):
3027 3016 write_debug = None
3028 3017 if self._debug_delta:
3029 3018 write_debug = transaction._report
3030 3019 deltacomputer = deltautil.deltacomputer(
3031 3020 self,
3032 3021 write_debug=write_debug,
3033 3022 debug_info=debug_info,
3034 3023 )
3035 3024 # loop through our set of deltas
3036 3025 for data in deltas:
3037 3026 (
3038 3027 node,
3039 3028 p1,
3040 3029 p2,
3041 3030 linknode,
3042 3031 deltabase,
3043 3032 delta,
3044 3033 flags,
3045 3034 sidedata,
3046 3035 ) = data
3047 3036 link = linkmapper(linknode)
3048 3037 flags = flags or REVIDX_DEFAULT_FLAGS
3049 3038
3050 3039 rev = self.index.get_rev(node)
3051 3040 if rev is not None:
3052 3041 # this can happen if two branches make the same change
3053 3042 self._nodeduplicatecallback(transaction, rev)
3054 3043 if duplicaterevisioncb:
3055 3044 duplicaterevisioncb(self, rev)
3056 3045 empty = False
3057 3046 continue
3058 3047
3059 3048 for p in (p1, p2):
3060 3049 if not self.index.has_node(p):
3061 3050 raise error.LookupError(
3062 3051 p, self.radix, _(b'unknown parent')
3063 3052 )
3064 3053
3065 3054 if not self.index.has_node(deltabase):
3066 3055 raise error.LookupError(
3067 3056 deltabase, self.display_id, _(b'unknown delta base')
3068 3057 )
3069 3058
3070 3059 baserev = self.rev(deltabase)
3071 3060
3072 3061 if baserev != nullrev and self.iscensored(baserev):
3073 3062 # if base is censored, delta must be full replacement in a
3074 3063 # single patch operation
3075 3064 hlen = struct.calcsize(b">lll")
3076 3065 oldlen = self.rawsize(baserev)
3077 3066 newlen = len(delta) - hlen
3078 3067 if delta[:hlen] != mdiff.replacediffheader(
3079 3068 oldlen, newlen
3080 3069 ):
3081 3070 raise error.CensoredBaseError(
3082 3071 self.display_id, self.node(baserev)
3083 3072 )
3084 3073
3085 3074 if not flags and self._peek_iscensored(baserev, delta):
3086 3075 flags |= REVIDX_ISCENSORED
3087 3076
3088 3077 # We assume consumers of addrevisioncb will want to retrieve
3089 3078 # the added revision, which will require a call to
3090 3079 # revision(). revision() will fast path if there is a cache
3091 3080 # hit. So, we tell _addrevision() to always cache in this case.
3092 3081 # We're only using addgroup() in the context of changegroup
3093 3082 # generation so the revision data can always be handled as raw
3094 3083 # by the flagprocessor.
3095 3084 rev = self._addrevision(
3096 3085 node,
3097 3086 None,
3098 3087 transaction,
3099 3088 link,
3100 3089 p1,
3101 3090 p2,
3102 3091 flags,
3103 3092 (baserev, delta, delta_base_reuse_policy),
3104 3093 alwayscache=alwayscache,
3105 3094 deltacomputer=deltacomputer,
3106 3095 sidedata=sidedata,
3107 3096 )
3108 3097
3109 3098 if addrevisioncb:
3110 3099 addrevisioncb(self, rev)
3111 3100 empty = False
3112 3101 finally:
3113 3102 self._adding_group = False
3114 3103 return not empty
3115 3104
3116 3105 def iscensored(self, rev):
3117 3106 """Check if a file revision is censored."""
3118 3107 if not self._censorable:
3119 3108 return False
3120 3109
3121 3110 return self.flags(rev) & REVIDX_ISCENSORED
3122 3111
3123 3112 def _peek_iscensored(self, baserev, delta):
3124 3113 """Quickly check if a delta produces a censored revision."""
3125 3114 if not self._censorable:
3126 3115 return False
3127 3116
3128 3117 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3129 3118
3130 3119 def getstrippoint(self, minlink):
3131 3120 """find the minimum rev that must be stripped to strip the linkrev
3132 3121
3133 3122 Returns a tuple containing the minimum rev and a set of all revs that
3134 3123 have linkrevs that will be broken by this strip.
3135 3124 """
3136 3125 return storageutil.resolvestripinfo(
3137 3126 minlink,
3138 3127 len(self) - 1,
3139 3128 self.headrevs(),
3140 3129 self.linkrev,
3141 3130 self.parentrevs,
3142 3131 )
3143 3132
3144 3133 def strip(self, minlink, transaction):
3145 3134 """truncate the revlog on the first revision with a linkrev >= minlink
3146 3135
3147 3136 This function is called when we're stripping revision minlink and
3148 3137 its descendants from the repository.
3149 3138
3150 3139 We have to remove all revisions with linkrev >= minlink, because
3151 3140 the equivalent changelog revisions will be renumbered after the
3152 3141 strip.
3153 3142
3154 3143 So we truncate the revlog on the first of these revisions, and
3155 3144 trust that the caller has saved the revisions that shouldn't be
3156 3145 removed and that it'll re-add them after this truncation.
3157 3146 """
3158 3147 if len(self) == 0:
3159 3148 return
3160 3149
3161 3150 rev, _ = self.getstrippoint(minlink)
3162 3151 if rev == len(self):
3163 3152 return
3164 3153
3165 3154 # first truncate the files on disk
3166 3155 data_end = self.start(rev)
3167 3156 if not self._inline:
3168 3157 transaction.add(self._datafile, data_end)
3169 3158 end = rev * self.index.entry_size
3170 3159 else:
3171 3160 end = data_end + (rev * self.index.entry_size)
3172 3161
3173 3162 if self._sidedatafile:
3174 3163 sidedata_end = self.sidedata_cut_off(rev)
3175 3164 transaction.add(self._sidedatafile, sidedata_end)
3176 3165
3177 3166 transaction.add(self._indexfile, end)
3178 3167 if self._docket is not None:
3179 3168 # XXX we could, leverage the docket while stripping. However it is
3180 3169 # not powerfull enough at the time of this comment
3181 3170 self._docket.index_end = end
3182 3171 self._docket.data_end = data_end
3183 3172 self._docket.sidedata_end = sidedata_end
3184 3173 self._docket.write(transaction, stripping=True)
3185 3174
3186 3175 # then reset internal state in memory to forget those revisions
3187 3176 self._revisioncache = None
3188 3177 self._chaininfocache = util.lrucachedict(500)
3189 3178 self._segmentfile.clear_cache()
3190 3179 self._segmentfile_sidedata.clear_cache()
3191 3180
3192 3181 del self.index[rev:-1]
3193 3182
3194 3183 def checksize(self):
3195 3184 """Check size of index and data files
3196 3185
3197 3186 return a (dd, di) tuple.
3198 3187 - dd: extra bytes for the "data" file
3199 3188 - di: extra bytes for the "index" file
3200 3189
3201 3190 A healthy revlog will return (0, 0).
3202 3191 """
3203 3192 expected = 0
3204 3193 if len(self):
3205 3194 expected = max(0, self.end(len(self) - 1))
3206 3195
3207 3196 try:
3208 3197 with self._datafp() as f:
3209 3198 f.seek(0, io.SEEK_END)
3210 3199 actual = f.tell()
3211 3200 dd = actual - expected
3212 3201 except FileNotFoundError:
3213 3202 dd = 0
3214 3203
3215 3204 try:
3216 3205 f = self.opener(self._indexfile)
3217 3206 f.seek(0, io.SEEK_END)
3218 3207 actual = f.tell()
3219 3208 f.close()
3220 3209 s = self.index.entry_size
3221 3210 i = max(0, actual // s)
3222 3211 di = actual - (i * s)
3223 3212 if self._inline:
3224 3213 databytes = 0
3225 3214 for r in self:
3226 3215 databytes += max(0, self.length(r))
3227 3216 dd = 0
3228 3217 di = actual - len(self) * s - databytes
3229 3218 except FileNotFoundError:
3230 3219 di = 0
3231 3220
3232 3221 return (dd, di)
3233 3222
3234 3223 def files(self):
3235 3224 res = [self._indexfile]
3236 3225 if self._docket_file is None:
3237 3226 if not self._inline:
3238 3227 res.append(self._datafile)
3239 3228 else:
3240 3229 res.append(self._docket_file)
3241 3230 res.extend(self._docket.old_index_filepaths(include_empty=False))
3242 3231 if self._docket.data_end:
3243 3232 res.append(self._datafile)
3244 3233 res.extend(self._docket.old_data_filepaths(include_empty=False))
3245 3234 if self._docket.sidedata_end:
3246 3235 res.append(self._sidedatafile)
3247 3236 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3248 3237 return res
3249 3238
3250 3239 def emitrevisions(
3251 3240 self,
3252 3241 nodes,
3253 3242 nodesorder=None,
3254 3243 revisiondata=False,
3255 3244 assumehaveparentrevisions=False,
3256 3245 deltamode=repository.CG_DELTAMODE_STD,
3257 3246 sidedata_helpers=None,
3258 3247 debug_info=None,
3259 3248 ):
3260 3249 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3261 3250 raise error.ProgrammingError(
3262 3251 b'unhandled value for nodesorder: %s' % nodesorder
3263 3252 )
3264 3253
3265 3254 if nodesorder is None and not self._generaldelta:
3266 3255 nodesorder = b'storage'
3267 3256
3268 3257 if (
3269 3258 not self._storedeltachains
3270 3259 and deltamode != repository.CG_DELTAMODE_PREV
3271 3260 ):
3272 3261 deltamode = repository.CG_DELTAMODE_FULL
3273 3262
3274 3263 return storageutil.emitrevisions(
3275 3264 self,
3276 3265 nodes,
3277 3266 nodesorder,
3278 3267 revlogrevisiondelta,
3279 3268 deltaparentfn=self.deltaparent,
3280 3269 candeltafn=self._candelta,
3281 3270 rawsizefn=self.rawsize,
3282 3271 revdifffn=self.revdiff,
3283 3272 flagsfn=self.flags,
3284 3273 deltamode=deltamode,
3285 3274 revisiondata=revisiondata,
3286 3275 assumehaveparentrevisions=assumehaveparentrevisions,
3287 3276 sidedata_helpers=sidedata_helpers,
3288 3277 debug_info=debug_info,
3289 3278 )
3290 3279
3291 3280 DELTAREUSEALWAYS = b'always'
3292 3281 DELTAREUSESAMEREVS = b'samerevs'
3293 3282 DELTAREUSENEVER = b'never'
3294 3283
3295 3284 DELTAREUSEFULLADD = b'fulladd'
3296 3285
3297 3286 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3298 3287
3299 3288 def clone(
3300 3289 self,
3301 3290 tr,
3302 3291 destrevlog,
3303 3292 addrevisioncb=None,
3304 3293 deltareuse=DELTAREUSESAMEREVS,
3305 3294 forcedeltabothparents=None,
3306 3295 sidedata_helpers=None,
3307 3296 ):
3308 3297 """Copy this revlog to another, possibly with format changes.
3309 3298
3310 3299 The destination revlog will contain the same revisions and nodes.
3311 3300 However, it may not be bit-for-bit identical due to e.g. delta encoding
3312 3301 differences.
3313 3302
3314 3303 The ``deltareuse`` argument control how deltas from the existing revlog
3315 3304 are preserved in the destination revlog. The argument can have the
3316 3305 following values:
3317 3306
3318 3307 DELTAREUSEALWAYS
3319 3308 Deltas will always be reused (if possible), even if the destination
3320 3309 revlog would not select the same revisions for the delta. This is the
3321 3310 fastest mode of operation.
3322 3311 DELTAREUSESAMEREVS
3323 3312 Deltas will be reused if the destination revlog would pick the same
3324 3313 revisions for the delta. This mode strikes a balance between speed
3325 3314 and optimization.
3326 3315 DELTAREUSENEVER
3327 3316 Deltas will never be reused. This is the slowest mode of execution.
3328 3317 This mode can be used to recompute deltas (e.g. if the diff/delta
3329 3318 algorithm changes).
3330 3319 DELTAREUSEFULLADD
3331 3320 Revision will be re-added as if their were new content. This is
3332 3321 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3333 3322 eg: large file detection and handling.
3334 3323
3335 3324 Delta computation can be slow, so the choice of delta reuse policy can
3336 3325 significantly affect run time.
3337 3326
3338 3327 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3339 3328 two extremes. Deltas will be reused if they are appropriate. But if the
3340 3329 delta could choose a better revision, it will do so. This means if you
3341 3330 are converting a non-generaldelta revlog to a generaldelta revlog,
3342 3331 deltas will be recomputed if the delta's parent isn't a parent of the
3343 3332 revision.
3344 3333
3345 3334 In addition to the delta policy, the ``forcedeltabothparents``
3346 3335 argument controls whether to force compute deltas against both parents
3347 3336 for merges. By default, the current default is used.
3348 3337
3349 3338 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3350 3339 `sidedata_helpers`.
3351 3340 """
3352 3341 if deltareuse not in self.DELTAREUSEALL:
3353 3342 raise ValueError(
3354 3343 _(b'value for deltareuse invalid: %s') % deltareuse
3355 3344 )
3356 3345
3357 3346 if len(destrevlog):
3358 3347 raise ValueError(_(b'destination revlog is not empty'))
3359 3348
3360 3349 if getattr(self, 'filteredrevs', None):
3361 3350 raise ValueError(_(b'source revlog has filtered revisions'))
3362 3351 if getattr(destrevlog, 'filteredrevs', None):
3363 3352 raise ValueError(_(b'destination revlog has filtered revisions'))
3364 3353
3365 3354 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3366 3355 # if possible.
3367 3356 old_delta_config = destrevlog.delta_config
3368 3357 destrevlog.delta_config = destrevlog.delta_config.copy()
3369 3358
3370 3359 try:
3371 3360 if deltareuse == self.DELTAREUSEALWAYS:
3372 3361 destrevlog.delta_config.lazy_delta_base = True
3373 3362 destrevlog.delta_config.lazy_delta = True
3374 3363 elif deltareuse == self.DELTAREUSESAMEREVS:
3375 3364 destrevlog.delta_config.lazy_delta_base = False
3376 3365 destrevlog.delta_config.lazy_delta = True
3377 3366 elif deltareuse == self.DELTAREUSENEVER:
3378 3367 destrevlog.delta_config.lazy_delta_base = False
3379 3368 destrevlog.delta_config.lazy_delta = False
3380 3369
3381 3370 delta_both_parents = (
3382 3371 forcedeltabothparents or old_delta_config.delta_both_parents
3383 3372 )
3384 3373 destrevlog.delta_config.delta_both_parents = delta_both_parents
3385 3374
3386 3375 with self.reading():
3387 3376 self._clone(
3388 3377 tr,
3389 3378 destrevlog,
3390 3379 addrevisioncb,
3391 3380 deltareuse,
3392 3381 forcedeltabothparents,
3393 3382 sidedata_helpers,
3394 3383 )
3395 3384
3396 3385 finally:
3397 3386 destrevlog.delta_config = old_delta_config
3398 3387
3399 3388 def _clone(
3400 3389 self,
3401 3390 tr,
3402 3391 destrevlog,
3403 3392 addrevisioncb,
3404 3393 deltareuse,
3405 3394 forcedeltabothparents,
3406 3395 sidedata_helpers,
3407 3396 ):
3408 3397 """perform the core duty of `revlog.clone` after parameter processing"""
3409 3398 write_debug = None
3410 3399 if self._debug_delta:
3411 3400 write_debug = tr._report
3412 3401 deltacomputer = deltautil.deltacomputer(
3413 3402 destrevlog,
3414 3403 write_debug=write_debug,
3415 3404 )
3416 3405 index = self.index
3417 3406 for rev in self:
3418 3407 entry = index[rev]
3419 3408
3420 3409 # Some classes override linkrev to take filtered revs into
3421 3410 # account. Use raw entry from index.
3422 3411 flags = entry[0] & 0xFFFF
3423 3412 linkrev = entry[4]
3424 3413 p1 = index[entry[5]][7]
3425 3414 p2 = index[entry[6]][7]
3426 3415 node = entry[7]
3427 3416
3428 3417 # (Possibly) reuse the delta from the revlog if allowed and
3429 3418 # the revlog chunk is a delta.
3430 3419 cachedelta = None
3431 3420 rawtext = None
3432 3421 if deltareuse == self.DELTAREUSEFULLADD:
3433 3422 text = self._revisiondata(rev)
3434 3423 sidedata = self.sidedata(rev)
3435 3424
3436 3425 if sidedata_helpers is not None:
3437 3426 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3438 3427 self, sidedata_helpers, sidedata, rev
3439 3428 )
3440 3429 flags = flags | new_flags[0] & ~new_flags[1]
3441 3430
3442 3431 destrevlog.addrevision(
3443 3432 text,
3444 3433 tr,
3445 3434 linkrev,
3446 3435 p1,
3447 3436 p2,
3448 3437 cachedelta=cachedelta,
3449 3438 node=node,
3450 3439 flags=flags,
3451 3440 deltacomputer=deltacomputer,
3452 3441 sidedata=sidedata,
3453 3442 )
3454 3443 else:
3455 3444 if destrevlog._lazydelta:
3456 3445 dp = self.deltaparent(rev)
3457 3446 if dp != nullrev:
3458 3447 cachedelta = (dp, bytes(self._chunk(rev)))
3459 3448
3460 3449 sidedata = None
3461 3450 if not cachedelta:
3462 3451 rawtext = self._revisiondata(rev)
3463 3452 sidedata = self.sidedata(rev)
3464 3453 if sidedata is None:
3465 3454 sidedata = self.sidedata(rev)
3466 3455
3467 3456 if sidedata_helpers is not None:
3468 3457 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3469 3458 self, sidedata_helpers, sidedata, rev
3470 3459 )
3471 3460 flags = flags | new_flags[0] & ~new_flags[1]
3472 3461
3473 3462 with destrevlog._writing(tr):
3474 3463 destrevlog._addrevision(
3475 3464 node,
3476 3465 rawtext,
3477 3466 tr,
3478 3467 linkrev,
3479 3468 p1,
3480 3469 p2,
3481 3470 flags,
3482 3471 cachedelta,
3483 3472 deltacomputer=deltacomputer,
3484 3473 sidedata=sidedata,
3485 3474 )
3486 3475
3487 3476 if addrevisioncb:
3488 3477 addrevisioncb(self, rev, node)
3489 3478
3490 3479 def censorrevision(self, tr, censornode, tombstone=b''):
3491 3480 if self._format_version == REVLOGV0:
3492 3481 raise error.RevlogError(
3493 3482 _(b'cannot censor with version %d revlogs')
3494 3483 % self._format_version
3495 3484 )
3496 3485 elif self._format_version == REVLOGV1:
3497 3486 rewrite.v1_censor(self, tr, censornode, tombstone)
3498 3487 else:
3499 3488 rewrite.v2_censor(self, tr, censornode, tombstone)
3500 3489
3501 3490 def verifyintegrity(self, state):
3502 3491 """Verifies the integrity of the revlog.
3503 3492
3504 3493 Yields ``revlogproblem`` instances describing problems that are
3505 3494 found.
3506 3495 """
3507 3496 dd, di = self.checksize()
3508 3497 if dd:
3509 3498 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3510 3499 if di:
3511 3500 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3512 3501
3513 3502 version = self._format_version
3514 3503
3515 3504 # The verifier tells us what version revlog we should be.
3516 3505 if version != state[b'expectedversion']:
3517 3506 yield revlogproblem(
3518 3507 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3519 3508 % (self.display_id, version, state[b'expectedversion'])
3520 3509 )
3521 3510
3522 3511 state[b'skipread'] = set()
3523 3512 state[b'safe_renamed'] = set()
3524 3513
3525 3514 for rev in self:
3526 3515 node = self.node(rev)
3527 3516
3528 3517 # Verify contents. 4 cases to care about:
3529 3518 #
3530 3519 # common: the most common case
3531 3520 # rename: with a rename
3532 3521 # meta: file content starts with b'\1\n', the metadata
3533 3522 # header defined in filelog.py, but without a rename
3534 3523 # ext: content stored externally
3535 3524 #
3536 3525 # More formally, their differences are shown below:
3537 3526 #
3538 3527 # | common | rename | meta | ext
3539 3528 # -------------------------------------------------------
3540 3529 # flags() | 0 | 0 | 0 | not 0
3541 3530 # renamed() | False | True | False | ?
3542 3531 # rawtext[0:2]=='\1\n'| False | True | True | ?
3543 3532 #
3544 3533 # "rawtext" means the raw text stored in revlog data, which
3545 3534 # could be retrieved by "rawdata(rev)". "text"
3546 3535 # mentioned below is "revision(rev)".
3547 3536 #
3548 3537 # There are 3 different lengths stored physically:
3549 3538 # 1. L1: rawsize, stored in revlog index
3550 3539 # 2. L2: len(rawtext), stored in revlog data
3551 3540 # 3. L3: len(text), stored in revlog data if flags==0, or
3552 3541 # possibly somewhere else if flags!=0
3553 3542 #
3554 3543 # L1 should be equal to L2. L3 could be different from them.
3555 3544 # "text" may or may not affect commit hash depending on flag
3556 3545 # processors (see flagutil.addflagprocessor).
3557 3546 #
3558 3547 # | common | rename | meta | ext
3559 3548 # -------------------------------------------------
3560 3549 # rawsize() | L1 | L1 | L1 | L1
3561 3550 # size() | L1 | L2-LM | L1(*) | L1 (?)
3562 3551 # len(rawtext) | L2 | L2 | L2 | L2
3563 3552 # len(text) | L2 | L2 | L2 | L3
3564 3553 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3565 3554 #
3566 3555 # LM: length of metadata, depending on rawtext
3567 3556 # (*): not ideal, see comment in filelog.size
3568 3557 # (?): could be "- len(meta)" if the resolved content has
3569 3558 # rename metadata
3570 3559 #
3571 3560 # Checks needed to be done:
3572 3561 # 1. length check: L1 == L2, in all cases.
3573 3562 # 2. hash check: depending on flag processor, we may need to
3574 3563 # use either "text" (external), or "rawtext" (in revlog).
3575 3564
3576 3565 try:
3577 3566 skipflags = state.get(b'skipflags', 0)
3578 3567 if skipflags:
3579 3568 skipflags &= self.flags(rev)
3580 3569
3581 3570 _verify_revision(self, skipflags, state, node)
3582 3571
3583 3572 l1 = self.rawsize(rev)
3584 3573 l2 = len(self.rawdata(node))
3585 3574
3586 3575 if l1 != l2:
3587 3576 yield revlogproblem(
3588 3577 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3589 3578 node=node,
3590 3579 )
3591 3580
3592 3581 except error.CensoredNodeError:
3593 3582 if state[b'erroroncensored']:
3594 3583 yield revlogproblem(
3595 3584 error=_(b'censored file data'), node=node
3596 3585 )
3597 3586 state[b'skipread'].add(node)
3598 3587 except Exception as e:
3599 3588 yield revlogproblem(
3600 3589 error=_(b'unpacking %s: %s')
3601 3590 % (short(node), stringutil.forcebytestr(e)),
3602 3591 node=node,
3603 3592 )
3604 3593 state[b'skipread'].add(node)
3605 3594
3606 3595 def storageinfo(
3607 3596 self,
3608 3597 exclusivefiles=False,
3609 3598 sharedfiles=False,
3610 3599 revisionscount=False,
3611 3600 trackedsize=False,
3612 3601 storedsize=False,
3613 3602 ):
3614 3603 d = {}
3615 3604
3616 3605 if exclusivefiles:
3617 3606 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3618 3607 if not self._inline:
3619 3608 d[b'exclusivefiles'].append((self.opener, self._datafile))
3620 3609
3621 3610 if sharedfiles:
3622 3611 d[b'sharedfiles'] = []
3623 3612
3624 3613 if revisionscount:
3625 3614 d[b'revisionscount'] = len(self)
3626 3615
3627 3616 if trackedsize:
3628 3617 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3629 3618
3630 3619 if storedsize:
3631 3620 d[b'storedsize'] = sum(
3632 3621 self.opener.stat(path).st_size for path in self.files()
3633 3622 )
3634 3623
3635 3624 return d
3636 3625
3637 3626 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3638 3627 if not self.hassidedata:
3639 3628 return
3640 3629 # revlog formats with sidedata support does not support inline
3641 3630 assert not self._inline
3642 3631 if not helpers[1] and not helpers[2]:
3643 3632 # Nothing to generate or remove
3644 3633 return
3645 3634
3646 3635 new_entries = []
3647 3636 # append the new sidedata
3648 3637 with self._writing(transaction):
3649 3638 ifh, dfh, sdfh = self._writinghandles
3650 3639 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3651 3640
3652 3641 current_offset = sdfh.tell()
3653 3642 for rev in range(startrev, endrev + 1):
3654 3643 entry = self.index[rev]
3655 3644 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3656 3645 store=self,
3657 3646 sidedata_helpers=helpers,
3658 3647 sidedata={},
3659 3648 rev=rev,
3660 3649 )
3661 3650
3662 3651 serialized_sidedata = sidedatautil.serialize_sidedata(
3663 3652 new_sidedata
3664 3653 )
3665 3654
3666 3655 sidedata_compression_mode = COMP_MODE_INLINE
3667 3656 if serialized_sidedata and self.hassidedata:
3668 3657 sidedata_compression_mode = COMP_MODE_PLAIN
3669 3658 h, comp_sidedata = self.compress(serialized_sidedata)
3670 3659 if (
3671 3660 h != b'u'
3672 3661 and comp_sidedata[0] != b'\0'
3673 3662 and len(comp_sidedata) < len(serialized_sidedata)
3674 3663 ):
3675 3664 assert not h
3676 3665 if (
3677 3666 comp_sidedata[0]
3678 3667 == self._docket.default_compression_header
3679 3668 ):
3680 3669 sidedata_compression_mode = COMP_MODE_DEFAULT
3681 3670 serialized_sidedata = comp_sidedata
3682 3671 else:
3683 3672 sidedata_compression_mode = COMP_MODE_INLINE
3684 3673 serialized_sidedata = comp_sidedata
3685 3674 if entry[8] != 0 or entry[9] != 0:
3686 3675 # rewriting entries that already have sidedata is not
3687 3676 # supported yet, because it introduces garbage data in the
3688 3677 # revlog.
3689 3678 msg = b"rewriting existing sidedata is not supported yet"
3690 3679 raise error.Abort(msg)
3691 3680
3692 3681 # Apply (potential) flags to add and to remove after running
3693 3682 # the sidedata helpers
3694 3683 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3695 3684 entry_update = (
3696 3685 current_offset,
3697 3686 len(serialized_sidedata),
3698 3687 new_offset_flags,
3699 3688 sidedata_compression_mode,
3700 3689 )
3701 3690
3702 3691 # the sidedata computation might have move the file cursors around
3703 3692 sdfh.seek(current_offset, os.SEEK_SET)
3704 3693 sdfh.write(serialized_sidedata)
3705 3694 new_entries.append(entry_update)
3706 3695 current_offset += len(serialized_sidedata)
3707 3696 self._docket.sidedata_end = sdfh.tell()
3708 3697
3709 3698 # rewrite the new index entries
3710 3699 ifh.seek(startrev * self.index.entry_size)
3711 3700 for i, e in enumerate(new_entries):
3712 3701 rev = startrev + i
3713 3702 self.index.replace_sidedata_info(rev, *e)
3714 3703 packed = self.index.entry_binary(rev)
3715 3704 if rev == 0 and self._docket is None:
3716 3705 header = self._format_flags | self._format_version
3717 3706 header = self.index.pack_header(header)
3718 3707 packed = header + packed
3719 3708 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now