##// END OF EJS Templates
revlog: create the revlog object at the repository level...
marmoute -
r51924:133f5a54 default
parent child Browse files
Show More
@@ -1,4043 +1,4047 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 # coding: utf-8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 import functools
11 11 import os
12 12 import random
13 13 import re
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from concurrent import futures
19 19 from typing import (
20 20 Optional,
21 21 )
22 22
23 23 from .i18n import _
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullrev,
28 28 sha1nodeconstants,
29 29 short,
30 30 )
31 31 from . import (
32 32 bookmarks,
33 33 branchmap,
34 34 bundle2,
35 35 bundlecaches,
36 36 changegroup,
37 37 color,
38 38 commit,
39 39 context,
40 40 dirstate,
41 41 discovery,
42 42 encoding,
43 43 error,
44 44 exchange,
45 45 extensions,
46 46 filelog,
47 47 hook,
48 48 lock as lockmod,
49 49 match as matchmod,
50 50 mergestate as mergestatemod,
51 51 mergeutil,
52 52 namespaces,
53 53 narrowspec,
54 54 obsolete,
55 55 pathutil,
56 56 phases,
57 57 policy,
58 58 pushkey,
59 59 pycompat,
60 60 rcutil,
61 61 repoview,
62 62 requirements as requirementsmod,
63 63 revlog,
64 64 revset,
65 65 revsetlang,
66 66 scmutil,
67 67 sparse,
68 68 store as storemod,
69 69 subrepoutil,
70 70 tags as tagsmod,
71 71 transaction,
72 72 txnutil,
73 73 util,
74 74 vfs as vfsmod,
75 75 wireprototypes,
76 76 )
77 77
78 78 from .interfaces import (
79 79 repository,
80 80 util as interfaceutil,
81 81 )
82 82
83 83 from .utils import (
84 84 hashutil,
85 85 procutil,
86 86 stringutil,
87 87 urlutil,
88 88 )
89 89
90 90 from .revlogutils import (
91 91 concurrency_checker as revlogchecker,
92 92 constants as revlogconst,
93 93 sidedata as sidedatamod,
94 94 )
95 95
96 96 release = lockmod.release
97 97 urlerr = util.urlerr
98 98 urlreq = util.urlreq
99 99
100 100 RE_SKIP_DIRSTATE_ROLLBACK = re.compile(
101 101 b"^((dirstate|narrowspec.dirstate).*|branch$)"
102 102 )
103 103
104 104 # set of (path, vfs-location) tuples. vfs-location is:
105 105 # - 'plain for vfs relative paths
106 106 # - '' for svfs relative paths
107 107 _cachedfiles = set()
108 108
109 109
110 110 class _basefilecache(scmutil.filecache):
111 111 """All filecache usage on repo are done for logic that should be unfiltered"""
112 112
113 113 def __get__(self, repo, type=None):
114 114 if repo is None:
115 115 return self
116 116 # proxy to unfiltered __dict__ since filtered repo has no entry
117 117 unfi = repo.unfiltered()
118 118 try:
119 119 return unfi.__dict__[self.sname]
120 120 except KeyError:
121 121 pass
122 122 return super(_basefilecache, self).__get__(unfi, type)
123 123
124 124 def set(self, repo, value):
125 125 return super(_basefilecache, self).set(repo.unfiltered(), value)
126 126
127 127
128 128 class repofilecache(_basefilecache):
129 129 """filecache for files in .hg but outside of .hg/store"""
130 130
131 131 def __init__(self, *paths):
132 132 super(repofilecache, self).__init__(*paths)
133 133 for path in paths:
134 134 _cachedfiles.add((path, b'plain'))
135 135
136 136 def join(self, obj, fname):
137 137 return obj.vfs.join(fname)
138 138
139 139
140 140 class storecache(_basefilecache):
141 141 """filecache for files in the store"""
142 142
143 143 def __init__(self, *paths):
144 144 super(storecache, self).__init__(*paths)
145 145 for path in paths:
146 146 _cachedfiles.add((path, b''))
147 147
148 148 def join(self, obj, fname):
149 149 return obj.sjoin(fname)
150 150
151 151
152 152 class changelogcache(storecache):
153 153 """filecache for the changelog"""
154 154
155 155 def __init__(self):
156 156 super(changelogcache, self).__init__()
157 157 _cachedfiles.add((b'00changelog.i', b''))
158 158 _cachedfiles.add((b'00changelog.n', b''))
159 159
160 160 def tracked_paths(self, obj):
161 161 paths = [self.join(obj, b'00changelog.i')]
162 162 if obj.store.opener.options.get(b'persistent-nodemap', False):
163 163 paths.append(self.join(obj, b'00changelog.n'))
164 164 return paths
165 165
166 166
167 167 class manifestlogcache(storecache):
168 168 """filecache for the manifestlog"""
169 169
170 170 def __init__(self):
171 171 super(manifestlogcache, self).__init__()
172 172 _cachedfiles.add((b'00manifest.i', b''))
173 173 _cachedfiles.add((b'00manifest.n', b''))
174 174
175 175 def tracked_paths(self, obj):
176 176 paths = [self.join(obj, b'00manifest.i')]
177 177 if obj.store.opener.options.get(b'persistent-nodemap', False):
178 178 paths.append(self.join(obj, b'00manifest.n'))
179 179 return paths
180 180
181 181
182 182 class mixedrepostorecache(_basefilecache):
183 183 """filecache for a mix files in .hg/store and outside"""
184 184
185 185 def __init__(self, *pathsandlocations):
186 186 # scmutil.filecache only uses the path for passing back into our
187 187 # join(), so we can safely pass a list of paths and locations
188 188 super(mixedrepostorecache, self).__init__(*pathsandlocations)
189 189 _cachedfiles.update(pathsandlocations)
190 190
191 191 def join(self, obj, fnameandlocation):
192 192 fname, location = fnameandlocation
193 193 if location == b'plain':
194 194 return obj.vfs.join(fname)
195 195 else:
196 196 if location != b'':
197 197 raise error.ProgrammingError(
198 198 b'unexpected location: %s' % location
199 199 )
200 200 return obj.sjoin(fname)
201 201
202 202
203 203 def isfilecached(repo, name):
204 204 """check if a repo has already cached "name" filecache-ed property
205 205
206 206 This returns (cachedobj-or-None, iscached) tuple.
207 207 """
208 208 cacheentry = repo.unfiltered()._filecache.get(name, None)
209 209 if not cacheentry:
210 210 return None, False
211 211 return cacheentry.obj, True
212 212
213 213
214 214 class unfilteredpropertycache(util.propertycache):
215 215 """propertycache that apply to unfiltered repo only"""
216 216
217 217 def __get__(self, repo, type=None):
218 218 unfi = repo.unfiltered()
219 219 if unfi is repo:
220 220 return super(unfilteredpropertycache, self).__get__(unfi)
221 221 return getattr(unfi, self.name)
222 222
223 223
224 224 class filteredpropertycache(util.propertycache):
225 225 """propertycache that must take filtering in account"""
226 226
227 227 def cachevalue(self, obj, value):
228 228 object.__setattr__(obj, self.name, value)
229 229
230 230
231 231 def hasunfilteredcache(repo, name):
232 232 """check if a repo has an unfilteredpropertycache value for <name>"""
233 233 return name in vars(repo.unfiltered())
234 234
235 235
236 236 def unfilteredmethod(orig):
237 237 """decorate method that always need to be run on unfiltered version"""
238 238
239 239 @functools.wraps(orig)
240 240 def wrapper(repo, *args, **kwargs):
241 241 return orig(repo.unfiltered(), *args, **kwargs)
242 242
243 243 return wrapper
244 244
245 245
246 246 moderncaps = {
247 247 b'lookup',
248 248 b'branchmap',
249 249 b'pushkey',
250 250 b'known',
251 251 b'getbundle',
252 252 b'unbundle',
253 253 }
254 254 legacycaps = moderncaps.union({b'changegroupsubset'})
255 255
256 256
257 257 @interfaceutil.implementer(repository.ipeercommandexecutor)
258 258 class localcommandexecutor:
259 259 def __init__(self, peer):
260 260 self._peer = peer
261 261 self._sent = False
262 262 self._closed = False
263 263
264 264 def __enter__(self):
265 265 return self
266 266
267 267 def __exit__(self, exctype, excvalue, exctb):
268 268 self.close()
269 269
270 270 def callcommand(self, command, args):
271 271 if self._sent:
272 272 raise error.ProgrammingError(
273 273 b'callcommand() cannot be used after sendcommands()'
274 274 )
275 275
276 276 if self._closed:
277 277 raise error.ProgrammingError(
278 278 b'callcommand() cannot be used after close()'
279 279 )
280 280
281 281 # We don't need to support anything fancy. Just call the named
282 282 # method on the peer and return a resolved future.
283 283 fn = getattr(self._peer, pycompat.sysstr(command))
284 284
285 285 f = futures.Future()
286 286
287 287 try:
288 288 result = fn(**pycompat.strkwargs(args))
289 289 except Exception:
290 290 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
291 291 else:
292 292 f.set_result(result)
293 293
294 294 return f
295 295
296 296 def sendcommands(self):
297 297 self._sent = True
298 298
299 299 def close(self):
300 300 self._closed = True
301 301
302 302
303 303 @interfaceutil.implementer(repository.ipeercommands)
304 304 class localpeer(repository.peer):
305 305 '''peer for a local repo; reflects only the most recent API'''
306 306
307 307 def __init__(self, repo, caps=None, path=None, remotehidden=False):
308 308 super(localpeer, self).__init__(
309 309 repo.ui, path=path, remotehidden=remotehidden
310 310 )
311 311
312 312 if caps is None:
313 313 caps = moderncaps.copy()
314 314 if remotehidden:
315 315 self._repo = repo.filtered(b'served.hidden')
316 316 else:
317 317 self._repo = repo.filtered(b'served')
318 318 if repo._wanted_sidedata:
319 319 formatted = bundle2.format_remote_wanted_sidedata(repo)
320 320 caps.add(b'exp-wanted-sidedata=' + formatted)
321 321
322 322 self._caps = repo._restrictcapabilities(caps)
323 323
324 324 # Begin of _basepeer interface.
325 325
326 326 def url(self):
327 327 return self._repo.url()
328 328
329 329 def local(self):
330 330 return self._repo
331 331
332 332 def canpush(self):
333 333 return True
334 334
335 335 def close(self):
336 336 self._repo.close()
337 337
338 338 # End of _basepeer interface.
339 339
340 340 # Begin of _basewirecommands interface.
341 341
342 342 def branchmap(self):
343 343 return self._repo.branchmap()
344 344
345 345 def capabilities(self):
346 346 return self._caps
347 347
348 348 def get_cached_bundle_inline(self, path):
349 349 # not needed with local peer
350 350 raise NotImplementedError
351 351
352 352 def clonebundles(self):
353 353 return bundlecaches.get_manifest(self._repo)
354 354
355 355 def debugwireargs(self, one, two, three=None, four=None, five=None):
356 356 """Used to test argument passing over the wire"""
357 357 return b"%s %s %s %s %s" % (
358 358 one,
359 359 two,
360 360 pycompat.bytestr(three),
361 361 pycompat.bytestr(four),
362 362 pycompat.bytestr(five),
363 363 )
364 364
365 365 def getbundle(
366 366 self,
367 367 source,
368 368 heads=None,
369 369 common=None,
370 370 bundlecaps=None,
371 371 remote_sidedata=None,
372 372 **kwargs
373 373 ):
374 374 chunks = exchange.getbundlechunks(
375 375 self._repo,
376 376 source,
377 377 heads=heads,
378 378 common=common,
379 379 bundlecaps=bundlecaps,
380 380 remote_sidedata=remote_sidedata,
381 381 **kwargs
382 382 )[1]
383 383 cb = util.chunkbuffer(chunks)
384 384
385 385 if exchange.bundle2requested(bundlecaps):
386 386 # When requesting a bundle2, getbundle returns a stream to make the
387 387 # wire level function happier. We need to build a proper object
388 388 # from it in local peer.
389 389 return bundle2.getunbundler(self.ui, cb)
390 390 else:
391 391 return changegroup.getunbundler(b'01', cb, None)
392 392
393 393 def heads(self):
394 394 return self._repo.heads()
395 395
396 396 def known(self, nodes):
397 397 return self._repo.known(nodes)
398 398
399 399 def listkeys(self, namespace):
400 400 return self._repo.listkeys(namespace)
401 401
402 402 def lookup(self, key):
403 403 return self._repo.lookup(key)
404 404
405 405 def pushkey(self, namespace, key, old, new):
406 406 return self._repo.pushkey(namespace, key, old, new)
407 407
408 408 def stream_out(self):
409 409 raise error.Abort(_(b'cannot perform stream clone against local peer'))
410 410
411 411 def unbundle(self, bundle, heads, url):
412 412 """apply a bundle on a repo
413 413
414 414 This function handles the repo locking itself."""
415 415 try:
416 416 try:
417 417 bundle = exchange.readbundle(self.ui, bundle, None)
418 418 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
419 419 if hasattr(ret, 'getchunks'):
420 420 # This is a bundle20 object, turn it into an unbundler.
421 421 # This little dance should be dropped eventually when the
422 422 # API is finally improved.
423 423 stream = util.chunkbuffer(ret.getchunks())
424 424 ret = bundle2.getunbundler(self.ui, stream)
425 425 return ret
426 426 except Exception as exc:
427 427 # If the exception contains output salvaged from a bundle2
428 428 # reply, we need to make sure it is printed before continuing
429 429 # to fail. So we build a bundle2 with such output and consume
430 430 # it directly.
431 431 #
432 432 # This is not very elegant but allows a "simple" solution for
433 433 # issue4594
434 434 output = getattr(exc, '_bundle2salvagedoutput', ())
435 435 if output:
436 436 bundler = bundle2.bundle20(self._repo.ui)
437 437 for out in output:
438 438 bundler.addpart(out)
439 439 stream = util.chunkbuffer(bundler.getchunks())
440 440 b = bundle2.getunbundler(self.ui, stream)
441 441 bundle2.processbundle(self._repo, b)
442 442 raise
443 443 except error.PushRaced as exc:
444 444 raise error.ResponseError(
445 445 _(b'push failed:'), stringutil.forcebytestr(exc)
446 446 )
447 447
448 448 # End of _basewirecommands interface.
449 449
450 450 # Begin of peer interface.
451 451
452 452 def commandexecutor(self):
453 453 return localcommandexecutor(self)
454 454
455 455 # End of peer interface.
456 456
457 457
458 458 @interfaceutil.implementer(repository.ipeerlegacycommands)
459 459 class locallegacypeer(localpeer):
460 460 """peer extension which implements legacy methods too; used for tests with
461 461 restricted capabilities"""
462 462
463 463 def __init__(self, repo, path=None, remotehidden=False):
464 464 super(locallegacypeer, self).__init__(
465 465 repo, caps=legacycaps, path=path, remotehidden=remotehidden
466 466 )
467 467
468 468 # Begin of baselegacywirecommands interface.
469 469
470 470 def between(self, pairs):
471 471 return self._repo.between(pairs)
472 472
473 473 def branches(self, nodes):
474 474 return self._repo.branches(nodes)
475 475
476 476 def changegroup(self, nodes, source):
477 477 outgoing = discovery.outgoing(
478 478 self._repo, missingroots=nodes, ancestorsof=self._repo.heads()
479 479 )
480 480 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
481 481
482 482 def changegroupsubset(self, bases, heads, source):
483 483 outgoing = discovery.outgoing(
484 484 self._repo, missingroots=bases, ancestorsof=heads
485 485 )
486 486 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
487 487
488 488 # End of baselegacywirecommands interface.
489 489
490 490
491 491 # Functions receiving (ui, features) that extensions can register to impact
492 492 # the ability to load repositories with custom requirements. Only
493 493 # functions defined in loaded extensions are called.
494 494 #
495 495 # The function receives a set of requirement strings that the repository
496 496 # is capable of opening. Functions will typically add elements to the
497 497 # set to reflect that the extension knows how to handle that requirements.
498 498 featuresetupfuncs = set()
499 499
500 500
501 501 def _getsharedvfs(hgvfs, requirements):
502 502 """returns the vfs object pointing to root of shared source
503 503 repo for a shared repository
504 504
505 505 hgvfs is vfs pointing at .hg/ of current repo (shared one)
506 506 requirements is a set of requirements of current repo (shared one)
507 507 """
508 508 # The ``shared`` or ``relshared`` requirements indicate the
509 509 # store lives in the path contained in the ``.hg/sharedpath`` file.
510 510 # This is an absolute path for ``shared`` and relative to
511 511 # ``.hg/`` for ``relshared``.
512 512 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
513 513 if requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements:
514 514 sharedpath = util.normpath(hgvfs.join(sharedpath))
515 515
516 516 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
517 517
518 518 if not sharedvfs.exists():
519 519 raise error.RepoError(
520 520 _(b'.hg/sharedpath points to nonexistent directory %s')
521 521 % sharedvfs.base
522 522 )
523 523 return sharedvfs
524 524
525 525
526 526 def _readrequires(vfs, allowmissing):
527 527 """reads the require file present at root of this vfs
528 528 and return a set of requirements
529 529
530 530 If allowmissing is True, we suppress FileNotFoundError if raised"""
531 531 # requires file contains a newline-delimited list of
532 532 # features/capabilities the opener (us) must have in order to use
533 533 # the repository. This file was introduced in Mercurial 0.9.2,
534 534 # which means very old repositories may not have one. We assume
535 535 # a missing file translates to no requirements.
536 536 read = vfs.tryread if allowmissing else vfs.read
537 537 return set(read(b'requires').splitlines())
538 538
539 539
540 540 def makelocalrepository(baseui, path: bytes, intents=None):
541 541 """Create a local repository object.
542 542
543 543 Given arguments needed to construct a local repository, this function
544 544 performs various early repository loading functionality (such as
545 545 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
546 546 the repository can be opened, derives a type suitable for representing
547 547 that repository, and returns an instance of it.
548 548
549 549 The returned object conforms to the ``repository.completelocalrepository``
550 550 interface.
551 551
552 552 The repository type is derived by calling a series of factory functions
553 553 for each aspect/interface of the final repository. These are defined by
554 554 ``REPO_INTERFACES``.
555 555
556 556 Each factory function is called to produce a type implementing a specific
557 557 interface. The cumulative list of returned types will be combined into a
558 558 new type and that type will be instantiated to represent the local
559 559 repository.
560 560
561 561 The factory functions each receive various state that may be consulted
562 562 as part of deriving a type.
563 563
564 564 Extensions should wrap these factory functions to customize repository type
565 565 creation. Note that an extension's wrapped function may be called even if
566 566 that extension is not loaded for the repo being constructed. Extensions
567 567 should check if their ``__name__`` appears in the
568 568 ``extensionmodulenames`` set passed to the factory function and no-op if
569 569 not.
570 570 """
571 571 ui = baseui.copy()
572 572 # Prevent copying repo configuration.
573 573 ui.copy = baseui.copy
574 574
575 575 # Working directory VFS rooted at repository root.
576 576 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
577 577
578 578 # Main VFS for .hg/ directory.
579 579 hgpath = wdirvfs.join(b'.hg')
580 580 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
581 581 # Whether this repository is shared one or not
582 582 shared = False
583 583 # If this repository is shared, vfs pointing to shared repo
584 584 sharedvfs = None
585 585
586 586 # The .hg/ path should exist and should be a directory. All other
587 587 # cases are errors.
588 588 if not hgvfs.isdir():
589 589 try:
590 590 hgvfs.stat()
591 591 except FileNotFoundError:
592 592 pass
593 593 except ValueError as e:
594 594 # Can be raised on Python 3.8 when path is invalid.
595 595 raise error.Abort(
596 596 _(b'invalid path %s: %s') % (path, stringutil.forcebytestr(e))
597 597 )
598 598
599 599 raise error.RepoError(_(b'repository %s not found') % path)
600 600
601 601 requirements = _readrequires(hgvfs, True)
602 602 shared = (
603 603 requirementsmod.SHARED_REQUIREMENT in requirements
604 604 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
605 605 )
606 606 storevfs = None
607 607 if shared:
608 608 # This is a shared repo
609 609 sharedvfs = _getsharedvfs(hgvfs, requirements)
610 610 storevfs = vfsmod.vfs(sharedvfs.join(b'store'))
611 611 else:
612 612 storevfs = vfsmod.vfs(hgvfs.join(b'store'))
613 613
614 614 # if .hg/requires contains the sharesafe requirement, it means
615 615 # there exists a `.hg/store/requires` too and we should read it
616 616 # NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
617 617 # is present. We never write SHARESAFE_REQUIREMENT for a repo if store
618 618 # is not present, refer checkrequirementscompat() for that
619 619 #
620 620 # However, if SHARESAFE_REQUIREMENT is not present, it means that the
621 621 # repository was shared the old way. We check the share source .hg/requires
622 622 # for SHARESAFE_REQUIREMENT to detect whether the current repository needs
623 623 # to be reshared
624 624 hint = _(b"see `hg help config.format.use-share-safe` for more information")
625 625 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
626 626 if (
627 627 shared
628 628 and requirementsmod.SHARESAFE_REQUIREMENT
629 629 not in _readrequires(sharedvfs, True)
630 630 ):
631 631 mismatch_warn = ui.configbool(
632 632 b'share', b'safe-mismatch.source-not-safe.warn'
633 633 )
634 634 mismatch_config = ui.config(
635 635 b'share', b'safe-mismatch.source-not-safe'
636 636 )
637 637 mismatch_verbose_upgrade = ui.configbool(
638 638 b'share', b'safe-mismatch.source-not-safe:verbose-upgrade'
639 639 )
640 640 if mismatch_config in (
641 641 b'downgrade-allow',
642 642 b'allow',
643 643 b'downgrade-abort',
644 644 ):
645 645 # prevent cyclic import localrepo -> upgrade -> localrepo
646 646 from . import upgrade
647 647
648 648 upgrade.downgrade_share_to_non_safe(
649 649 ui,
650 650 hgvfs,
651 651 sharedvfs,
652 652 requirements,
653 653 mismatch_config,
654 654 mismatch_warn,
655 655 mismatch_verbose_upgrade,
656 656 )
657 657 elif mismatch_config == b'abort':
658 658 raise error.Abort(
659 659 _(b"share source does not support share-safe requirement"),
660 660 hint=hint,
661 661 )
662 662 else:
663 663 raise error.Abort(
664 664 _(
665 665 b"share-safe mismatch with source.\nUnrecognized"
666 666 b" value '%s' of `share.safe-mismatch.source-not-safe`"
667 667 b" set."
668 668 )
669 669 % mismatch_config,
670 670 hint=hint,
671 671 )
672 672 else:
673 673 requirements |= _readrequires(storevfs, False)
674 674 elif shared:
675 675 sourcerequires = _readrequires(sharedvfs, False)
676 676 if requirementsmod.SHARESAFE_REQUIREMENT in sourcerequires:
677 677 mismatch_config = ui.config(b'share', b'safe-mismatch.source-safe')
678 678 mismatch_warn = ui.configbool(
679 679 b'share', b'safe-mismatch.source-safe.warn'
680 680 )
681 681 mismatch_verbose_upgrade = ui.configbool(
682 682 b'share', b'safe-mismatch.source-safe:verbose-upgrade'
683 683 )
684 684 if mismatch_config in (
685 685 b'upgrade-allow',
686 686 b'allow',
687 687 b'upgrade-abort',
688 688 ):
689 689 # prevent cyclic import localrepo -> upgrade -> localrepo
690 690 from . import upgrade
691 691
692 692 upgrade.upgrade_share_to_safe(
693 693 ui,
694 694 hgvfs,
695 695 storevfs,
696 696 requirements,
697 697 mismatch_config,
698 698 mismatch_warn,
699 699 mismatch_verbose_upgrade,
700 700 )
701 701 elif mismatch_config == b'abort':
702 702 raise error.Abort(
703 703 _(
704 704 b'version mismatch: source uses share-safe'
705 705 b' functionality while the current share does not'
706 706 ),
707 707 hint=hint,
708 708 )
709 709 else:
710 710 raise error.Abort(
711 711 _(
712 712 b"share-safe mismatch with source.\nUnrecognized"
713 713 b" value '%s' of `share.safe-mismatch.source-safe` set."
714 714 )
715 715 % mismatch_config,
716 716 hint=hint,
717 717 )
718 718
719 719 # The .hg/hgrc file may load extensions or contain config options
720 720 # that influence repository construction. Attempt to load it and
721 721 # process any new extensions that it may have pulled in.
722 722 if loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs):
723 723 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
724 724 extensions.loadall(ui)
725 725 extensions.populateui(ui)
726 726
727 727 # Set of module names of extensions loaded for this repository.
728 728 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
729 729
730 730 supportedrequirements = gathersupportedrequirements(ui)
731 731
732 732 # We first validate the requirements are known.
733 733 ensurerequirementsrecognized(requirements, supportedrequirements)
734 734
735 735 # Then we validate that the known set is reasonable to use together.
736 736 ensurerequirementscompatible(ui, requirements)
737 737
738 738 # TODO there are unhandled edge cases related to opening repositories with
739 739 # shared storage. If storage is shared, we should also test for requirements
740 740 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
741 741 # that repo, as that repo may load extensions needed to open it. This is a
742 742 # bit complicated because we don't want the other hgrc to overwrite settings
743 743 # in this hgrc.
744 744 #
745 745 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
746 746 # file when sharing repos. But if a requirement is added after the share is
747 747 # performed, thereby introducing a new requirement for the opener, we may
748 748 # will not see that and could encounter a run-time error interacting with
749 749 # that shared store since it has an unknown-to-us requirement.
750 750
751 751 # At this point, we know we should be capable of opening the repository.
752 752 # Now get on with doing that.
753 753
754 754 features = set()
755 755
756 756 # The "store" part of the repository holds versioned data. How it is
757 757 # accessed is determined by various requirements. If `shared` or
758 758 # `relshared` requirements are present, this indicates current repository
759 759 # is a share and store exists in path mentioned in `.hg/sharedpath`
760 760 if shared:
761 761 storebasepath = sharedvfs.base
762 762 cachepath = sharedvfs.join(b'cache')
763 763 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
764 764 else:
765 765 storebasepath = hgvfs.base
766 766 cachepath = hgvfs.join(b'cache')
767 767 wcachepath = hgvfs.join(b'wcache')
768 768
769 769 # The store has changed over time and the exact layout is dictated by
770 770 # requirements. The store interface abstracts differences across all
771 771 # of them.
772 772 store = makestore(
773 773 requirements,
774 774 storebasepath,
775 775 lambda base: vfsmod.vfs(base, cacheaudited=True),
776 776 )
777 777 hgvfs.createmode = store.createmode
778 778
779 779 storevfs = store.vfs
780 780 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
781 781
782 782 if (
783 783 requirementsmod.REVLOGV2_REQUIREMENT in requirements
784 784 or requirementsmod.CHANGELOGV2_REQUIREMENT in requirements
785 785 ):
786 786 features.add(repository.REPO_FEATURE_SIDE_DATA)
787 787 # the revlogv2 docket introduced race condition that we need to fix
788 788 features.discard(repository.REPO_FEATURE_STREAM_CLONE)
789 789
790 790 # The cache vfs is used to manage cache files.
791 791 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
792 792 cachevfs.createmode = store.createmode
793 793 # The cache vfs is used to manage cache files related to the working copy
794 794 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
795 795 wcachevfs.createmode = store.createmode
796 796
797 797 # Now resolve the type for the repository object. We do this by repeatedly
798 798 # calling a factory function to produces types for specific aspects of the
799 799 # repo's operation. The aggregate returned types are used as base classes
800 800 # for a dynamically-derived type, which will represent our new repository.
801 801
802 802 bases = []
803 803 extrastate = {}
804 804
805 805 for iface, fn in REPO_INTERFACES:
806 806 # We pass all potentially useful state to give extensions tons of
807 807 # flexibility.
808 808 typ = fn()(
809 809 ui=ui,
810 810 intents=intents,
811 811 requirements=requirements,
812 812 features=features,
813 813 wdirvfs=wdirvfs,
814 814 hgvfs=hgvfs,
815 815 store=store,
816 816 storevfs=storevfs,
817 817 storeoptions=storevfs.options,
818 818 cachevfs=cachevfs,
819 819 wcachevfs=wcachevfs,
820 820 extensionmodulenames=extensionmodulenames,
821 821 extrastate=extrastate,
822 822 baseclasses=bases,
823 823 )
824 824
825 825 if not isinstance(typ, type):
826 826 raise error.ProgrammingError(
827 827 b'unable to construct type for %s' % iface
828 828 )
829 829
830 830 bases.append(typ)
831 831
832 832 # type() allows you to use characters in type names that wouldn't be
833 833 # recognized as Python symbols in source code. We abuse that to add
834 834 # rich information about our constructed repo.
835 835 name = pycompat.sysstr(
836 836 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
837 837 )
838 838
839 839 cls = type(name, tuple(bases), {})
840 840
841 841 return cls(
842 842 baseui=baseui,
843 843 ui=ui,
844 844 origroot=path,
845 845 wdirvfs=wdirvfs,
846 846 hgvfs=hgvfs,
847 847 requirements=requirements,
848 848 supportedrequirements=supportedrequirements,
849 849 sharedpath=storebasepath,
850 850 store=store,
851 851 cachevfs=cachevfs,
852 852 wcachevfs=wcachevfs,
853 853 features=features,
854 854 intents=intents,
855 855 )
856 856
857 857
858 858 def loadhgrc(
859 859 ui,
860 860 wdirvfs: vfsmod.vfs,
861 861 hgvfs: vfsmod.vfs,
862 862 requirements,
863 863 sharedvfs: Optional[vfsmod.vfs] = None,
864 864 ):
865 865 """Load hgrc files/content into a ui instance.
866 866
867 867 This is called during repository opening to load any additional
868 868 config files or settings relevant to the current repository.
869 869
870 870 Returns a bool indicating whether any additional configs were loaded.
871 871
872 872 Extensions should monkeypatch this function to modify how per-repo
873 873 configs are loaded. For example, an extension may wish to pull in
874 874 configs from alternate files or sources.
875 875
876 876 sharedvfs is vfs object pointing to source repo if the current one is a
877 877 shared one
878 878 """
879 879 if not rcutil.use_repo_hgrc():
880 880 return False
881 881
882 882 ret = False
883 883 # first load config from shared source if we has to
884 884 if requirementsmod.SHARESAFE_REQUIREMENT in requirements and sharedvfs:
885 885 try:
886 886 ui.readconfig(sharedvfs.join(b'hgrc'), root=sharedvfs.base)
887 887 ret = True
888 888 except IOError:
889 889 pass
890 890
891 891 try:
892 892 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
893 893 ret = True
894 894 except IOError:
895 895 pass
896 896
897 897 try:
898 898 ui.readconfig(hgvfs.join(b'hgrc-not-shared'), root=wdirvfs.base)
899 899 ret = True
900 900 except IOError:
901 901 pass
902 902
903 903 return ret
904 904
905 905
906 906 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
907 907 """Perform additional actions after .hg/hgrc is loaded.
908 908
909 909 This function is called during repository loading immediately after
910 910 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
911 911
912 912 The function can be used to validate configs, automatically add
913 913 options (including extensions) based on requirements, etc.
914 914 """
915 915
916 916 # Map of requirements to list of extensions to load automatically when
917 917 # requirement is present.
918 918 autoextensions = {
919 919 b'git': [b'git'],
920 920 b'largefiles': [b'largefiles'],
921 921 b'lfs': [b'lfs'],
922 922 }
923 923
924 924 for requirement, names in sorted(autoextensions.items()):
925 925 if requirement not in requirements:
926 926 continue
927 927
928 928 for name in names:
929 929 if not ui.hasconfig(b'extensions', name):
930 930 ui.setconfig(b'extensions', name, b'', source=b'autoload')
931 931
932 932
933 933 def gathersupportedrequirements(ui):
934 934 """Determine the complete set of recognized requirements."""
935 935 # Start with all requirements supported by this file.
936 936 supported = set(localrepository._basesupported)
937 937
938 938 # Execute ``featuresetupfuncs`` entries if they belong to an extension
939 939 # relevant to this ui instance.
940 940 modules = {m.__name__ for n, m in extensions.extensions(ui)}
941 941
942 942 for fn in featuresetupfuncs:
943 943 if fn.__module__ in modules:
944 944 fn(ui, supported)
945 945
946 946 # Add derived requirements from registered compression engines.
947 947 for name in util.compengines:
948 948 engine = util.compengines[name]
949 949 if engine.available() and engine.revlogheader():
950 950 supported.add(b'exp-compression-%s' % name)
951 951 if engine.name() == b'zstd':
952 952 supported.add(requirementsmod.REVLOG_COMPRESSION_ZSTD)
953 953
954 954 return supported
955 955
956 956
957 957 def ensurerequirementsrecognized(requirements, supported):
958 958 """Validate that a set of local requirements is recognized.
959 959
960 960 Receives a set of requirements. Raises an ``error.RepoError`` if there
961 961 exists any requirement in that set that currently loaded code doesn't
962 962 recognize.
963 963
964 964 Returns a set of supported requirements.
965 965 """
966 966 missing = set()
967 967
968 968 for requirement in requirements:
969 969 if requirement in supported:
970 970 continue
971 971
972 972 if not requirement or not requirement[0:1].isalnum():
973 973 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
974 974
975 975 missing.add(requirement)
976 976
977 977 if missing:
978 978 raise error.RequirementError(
979 979 _(b'repository requires features unknown to this Mercurial: %s')
980 980 % b' '.join(sorted(missing)),
981 981 hint=_(
982 982 b'see https://mercurial-scm.org/wiki/MissingRequirement '
983 983 b'for more information'
984 984 ),
985 985 )
986 986
987 987
988 988 def ensurerequirementscompatible(ui, requirements):
989 989 """Validates that a set of recognized requirements is mutually compatible.
990 990
991 991 Some requirements may not be compatible with others or require
992 992 config options that aren't enabled. This function is called during
993 993 repository opening to ensure that the set of requirements needed
994 994 to open a repository is sane and compatible with config options.
995 995
996 996 Extensions can monkeypatch this function to perform additional
997 997 checking.
998 998
999 999 ``error.RepoError`` should be raised on failure.
1000 1000 """
1001 1001 if (
1002 1002 requirementsmod.SPARSE_REQUIREMENT in requirements
1003 1003 and not sparse.enabled
1004 1004 ):
1005 1005 raise error.RepoError(
1006 1006 _(
1007 1007 b'repository is using sparse feature but '
1008 1008 b'sparse is not enabled; enable the '
1009 1009 b'"sparse" extensions to access'
1010 1010 )
1011 1011 )
1012 1012
1013 1013
1014 1014 def makestore(requirements, path, vfstype):
1015 1015 """Construct a storage object for a repository."""
1016 1016 if requirementsmod.STORE_REQUIREMENT in requirements:
1017 1017 if requirementsmod.FNCACHE_REQUIREMENT in requirements:
1018 1018 dotencode = requirementsmod.DOTENCODE_REQUIREMENT in requirements
1019 1019 return storemod.fncachestore(path, vfstype, dotencode)
1020 1020
1021 1021 return storemod.encodedstore(path, vfstype)
1022 1022
1023 1023 return storemod.basicstore(path, vfstype)
1024 1024
1025 1025
1026 1026 def resolvestorevfsoptions(ui, requirements, features):
1027 1027 """Resolve the options to pass to the store vfs opener.
1028 1028
1029 1029 The returned dict is used to influence behavior of the storage layer.
1030 1030 """
1031 1031 options = {}
1032 1032
1033 1033 if requirementsmod.TREEMANIFEST_REQUIREMENT in requirements:
1034 1034 options[b'treemanifest'] = True
1035 1035
1036 1036 # experimental config: format.manifestcachesize
1037 1037 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
1038 1038 if manifestcachesize is not None:
1039 1039 options[b'manifestcachesize'] = manifestcachesize
1040 1040
1041 1041 # In the absence of another requirement superseding a revlog-related
1042 1042 # requirement, we have to assume the repo is using revlog version 0.
1043 1043 # This revlog format is super old and we don't bother trying to parse
1044 1044 # opener options for it because those options wouldn't do anything
1045 1045 # meaningful on such old repos.
1046 1046 if (
1047 1047 requirementsmod.REVLOGV1_REQUIREMENT in requirements
1048 1048 or requirementsmod.REVLOGV2_REQUIREMENT in requirements
1049 1049 ):
1050 1050 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
1051 1051 else: # explicitly mark repo as using revlogv0
1052 1052 options[b'revlogv0'] = True
1053 1053
1054 1054 if requirementsmod.COPIESSDC_REQUIREMENT in requirements:
1055 1055 options[b'copies-storage'] = b'changeset-sidedata'
1056 1056 else:
1057 1057 writecopiesto = ui.config(b'experimental', b'copies.write-to')
1058 1058 copiesextramode = (b'changeset-only', b'compatibility')
1059 1059 if writecopiesto in copiesextramode:
1060 1060 options[b'copies-storage'] = b'extra'
1061 1061
1062 1062 return options
1063 1063
1064 1064
1065 1065 def resolverevlogstorevfsoptions(ui, requirements, features):
1066 1066 """Resolve opener options specific to revlogs."""
1067 1067
1068 1068 options = {}
1069 1069 options[b'flagprocessors'] = {}
1070 1070
1071 feature_config = options[b'feature-config'] = revlog.FeatureConfig()
1072 data_config = options[b'data-config'] = revlog.DataConfig()
1073 delta_config = options[b'delta-config'] = revlog.DeltaConfig()
1074
1071 1075 if requirementsmod.REVLOGV1_REQUIREMENT in requirements:
1072 1076 options[b'revlogv1'] = True
1073 1077 if requirementsmod.REVLOGV2_REQUIREMENT in requirements:
1074 1078 options[b'revlogv2'] = True
1075 1079 if requirementsmod.CHANGELOGV2_REQUIREMENT in requirements:
1076 1080 options[b'changelogv2'] = True
1077 1081 cmp_rank = ui.configbool(b'experimental', b'changelog-v2.compute-rank')
1078 1082 options[b'changelogv2.compute-rank'] = cmp_rank
1079 1083
1080 1084 if requirementsmod.GENERALDELTA_REQUIREMENT in requirements:
1081 1085 options[b'generaldelta'] = True
1082 1086
1083 1087 # experimental config: format.chunkcachesize
1084 1088 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
1085 1089 if chunkcachesize is not None:
1086 1090 options[b'chunkcachesize'] = chunkcachesize
1087 1091
1088 1092 deltabothparents = ui.configbool(
1089 1093 b'storage', b'revlog.optimize-delta-parent-choice'
1090 1094 )
1091 1095 options[b'deltabothparents'] = deltabothparents
1092 1096 dps_cgds = ui.configint(
1093 1097 b'storage',
1094 1098 b'revlog.delta-parent-search.candidate-group-chunk-size',
1095 1099 )
1096 1100 options[b'delta-parent-search.candidate-group-chunk-size'] = dps_cgds
1097 1101 options[b'debug-delta'] = ui.configbool(b'debug', b'revlog.debug-delta')
1098 1102
1099 1103 issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming')
1100 1104 options[b'issue6528.fix-incoming'] = issue6528
1101 1105
1102 1106 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
1103 1107 lazydeltabase = False
1104 1108 if lazydelta:
1105 1109 lazydeltabase = ui.configbool(
1106 1110 b'storage', b'revlog.reuse-external-delta-parent'
1107 1111 )
1108 1112 if lazydeltabase is None:
1109 1113 lazydeltabase = not scmutil.gddeltaconfig(ui)
1110 1114 options[b'lazydelta'] = lazydelta
1111 1115 options[b'lazydeltabase'] = lazydeltabase
1112 1116
1113 1117 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
1114 1118 if 0 <= chainspan:
1115 1119 options[b'maxdeltachainspan'] = chainspan
1116 1120
1117 1121 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
1118 1122 if mmapindexthreshold is not None:
1119 1123 options[b'mmapindexthreshold'] = mmapindexthreshold
1120 1124
1121 1125 withsparseread = ui.configbool(b'experimental', b'sparse-read')
1122 1126 srdensitythres = float(
1123 1127 ui.config(b'experimental', b'sparse-read.density-threshold')
1124 1128 )
1125 1129 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
1126 1130 options[b'with-sparse-read'] = withsparseread
1127 1131 options[b'sparse-read-density-threshold'] = srdensitythres
1128 1132 options[b'sparse-read-min-gap-size'] = srmingapsize
1129 1133
1130 1134 sparserevlog = requirementsmod.SPARSEREVLOG_REQUIREMENT in requirements
1131 1135 options[b'sparse-revlog'] = sparserevlog
1132 1136 if sparserevlog:
1133 1137 options[b'generaldelta'] = True
1134 1138
1135 1139 maxchainlen = None
1136 1140 if sparserevlog:
1137 1141 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
1138 1142 # experimental config: format.maxchainlen
1139 1143 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
1140 1144 if maxchainlen is not None:
1141 1145 options[b'maxchainlen'] = maxchainlen
1142 1146
1143 1147 for r in requirements:
1144 1148 # we allow multiple compression engine requirement to co-exist because
1145 1149 # strickly speaking, revlog seems to support mixed compression style.
1146 1150 #
1147 1151 # The compression used for new entries will be "the last one"
1148 1152 prefix = r.startswith
1149 1153 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
1150 1154 options[b'compengine'] = r.split(b'-', 2)[2]
1151 1155
1152 1156 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
1153 1157 if options[b'zlib.level'] is not None:
1154 1158 if not (0 <= options[b'zlib.level'] <= 9):
1155 1159 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
1156 1160 raise error.Abort(msg % options[b'zlib.level'])
1157 1161 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
1158 1162 if options[b'zstd.level'] is not None:
1159 1163 if not (0 <= options[b'zstd.level'] <= 22):
1160 1164 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
1161 1165 raise error.Abort(msg % options[b'zstd.level'])
1162 1166
1163 1167 if requirementsmod.NARROW_REQUIREMENT in requirements:
1164 1168 options[b'enableellipsis'] = True
1165 1169
1166 1170 if ui.configbool(b'experimental', b'rust.index'):
1167 1171 options[b'rust.index'] = True
1168 1172 if requirementsmod.NODEMAP_REQUIREMENT in requirements:
1169 1173 slow_path = ui.config(
1170 1174 b'storage', b'revlog.persistent-nodemap.slow-path'
1171 1175 )
1172 1176 if slow_path not in (b'allow', b'warn', b'abort'):
1173 1177 default = ui.config_default(
1174 1178 b'storage', b'revlog.persistent-nodemap.slow-path'
1175 1179 )
1176 1180 msg = _(
1177 1181 b'unknown value for config '
1178 1182 b'"storage.revlog.persistent-nodemap.slow-path": "%s"\n'
1179 1183 )
1180 1184 ui.warn(msg % slow_path)
1181 1185 if not ui.quiet:
1182 1186 ui.warn(_(b'falling back to default value: %s\n') % default)
1183 1187 slow_path = default
1184 1188
1185 1189 msg = _(
1186 1190 b"accessing `persistent-nodemap` repository without associated "
1187 1191 b"fast implementation."
1188 1192 )
1189 1193 hint = _(
1190 1194 b"check `hg help config.format.use-persistent-nodemap` "
1191 1195 b"for details"
1192 1196 )
1193 1197 if not revlog.HAS_FAST_PERSISTENT_NODEMAP:
1194 1198 if slow_path == b'warn':
1195 1199 msg = b"warning: " + msg + b'\n'
1196 1200 ui.warn(msg)
1197 1201 if not ui.quiet:
1198 1202 hint = b'(' + hint + b')\n'
1199 1203 ui.warn(hint)
1200 1204 if slow_path == b'abort':
1201 1205 raise error.Abort(msg, hint=hint)
1202 1206 options[b'persistent-nodemap'] = True
1203 1207 if requirementsmod.DIRSTATE_V2_REQUIREMENT in requirements:
1204 1208 slow_path = ui.config(b'storage', b'dirstate-v2.slow-path')
1205 1209 if slow_path not in (b'allow', b'warn', b'abort'):
1206 1210 default = ui.config_default(b'storage', b'dirstate-v2.slow-path')
1207 1211 msg = _(b'unknown value for config "dirstate-v2.slow-path": "%s"\n')
1208 1212 ui.warn(msg % slow_path)
1209 1213 if not ui.quiet:
1210 1214 ui.warn(_(b'falling back to default value: %s\n') % default)
1211 1215 slow_path = default
1212 1216
1213 1217 msg = _(
1214 1218 b"accessing `dirstate-v2` repository without associated "
1215 1219 b"fast implementation."
1216 1220 )
1217 1221 hint = _(
1218 1222 b"check `hg help config.format.use-dirstate-v2` " b"for details"
1219 1223 )
1220 1224 if not dirstate.HAS_FAST_DIRSTATE_V2:
1221 1225 if slow_path == b'warn':
1222 1226 msg = b"warning: " + msg + b'\n'
1223 1227 ui.warn(msg)
1224 1228 if not ui.quiet:
1225 1229 hint = b'(' + hint + b')\n'
1226 1230 ui.warn(hint)
1227 1231 if slow_path == b'abort':
1228 1232 raise error.Abort(msg, hint=hint)
1229 1233 if ui.configbool(b'storage', b'revlog.persistent-nodemap.mmap'):
1230 1234 options[b'persistent-nodemap.mmap'] = True
1231 1235 if ui.configbool(b'devel', b'persistent-nodemap'):
1232 1236 options[b'devel-force-nodemap'] = True
1233 1237
1234 1238 return options
1235 1239
1236 1240
1237 1241 def makemain(**kwargs):
1238 1242 """Produce a type conforming to ``ilocalrepositorymain``."""
1239 1243 return localrepository
1240 1244
1241 1245
1242 1246 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1243 1247 class revlogfilestorage:
1244 1248 """File storage when using revlogs."""
1245 1249
1246 1250 def file(self, path):
1247 1251 if path.startswith(b'/'):
1248 1252 path = path[1:]
1249 1253
1250 1254 try_split = (
1251 1255 self.currenttransaction() is not None
1252 1256 or txnutil.mayhavepending(self.root)
1253 1257 )
1254 1258
1255 1259 return filelog.filelog(self.svfs, path, try_split=try_split)
1256 1260
1257 1261
1258 1262 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1259 1263 class revlognarrowfilestorage:
1260 1264 """File storage when using revlogs and narrow files."""
1261 1265
1262 1266 def file(self, path):
1263 1267 if path.startswith(b'/'):
1264 1268 path = path[1:]
1265 1269
1266 1270 try_split = (
1267 1271 self.currenttransaction() is not None
1268 1272 or txnutil.mayhavepending(self.root)
1269 1273 )
1270 1274 return filelog.narrowfilelog(
1271 1275 self.svfs, path, self._storenarrowmatch, try_split=try_split
1272 1276 )
1273 1277
1274 1278
1275 1279 def makefilestorage(requirements, features, **kwargs):
1276 1280 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1277 1281 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
1278 1282 features.add(repository.REPO_FEATURE_STREAM_CLONE)
1279 1283
1280 1284 if requirementsmod.NARROW_REQUIREMENT in requirements:
1281 1285 return revlognarrowfilestorage
1282 1286 else:
1283 1287 return revlogfilestorage
1284 1288
1285 1289
1286 1290 # List of repository interfaces and factory functions for them. Each
1287 1291 # will be called in order during ``makelocalrepository()`` to iteratively
1288 1292 # derive the final type for a local repository instance. We capture the
1289 1293 # function as a lambda so we don't hold a reference and the module-level
1290 1294 # functions can be wrapped.
1291 1295 REPO_INTERFACES = [
1292 1296 (repository.ilocalrepositorymain, lambda: makemain),
1293 1297 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
1294 1298 ]
1295 1299
1296 1300
1297 1301 @interfaceutil.implementer(repository.ilocalrepositorymain)
1298 1302 class localrepository:
1299 1303 """Main class for representing local repositories.
1300 1304
1301 1305 All local repositories are instances of this class.
1302 1306
1303 1307 Constructed on its own, instances of this class are not usable as
1304 1308 repository objects. To obtain a usable repository object, call
1305 1309 ``hg.repository()``, ``localrepo.instance()``, or
1306 1310 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1307 1311 ``instance()`` adds support for creating new repositories.
1308 1312 ``hg.repository()`` adds more extension integration, including calling
1309 1313 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1310 1314 used.
1311 1315 """
1312 1316
1313 1317 _basesupported = {
1314 1318 requirementsmod.ARCHIVED_PHASE_REQUIREMENT,
1315 1319 requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT,
1316 1320 requirementsmod.CHANGELOGV2_REQUIREMENT,
1317 1321 requirementsmod.COPIESSDC_REQUIREMENT,
1318 1322 requirementsmod.DIRSTATE_TRACKED_HINT_V1,
1319 1323 requirementsmod.DIRSTATE_V2_REQUIREMENT,
1320 1324 requirementsmod.DOTENCODE_REQUIREMENT,
1321 1325 requirementsmod.FNCACHE_REQUIREMENT,
1322 1326 requirementsmod.GENERALDELTA_REQUIREMENT,
1323 1327 requirementsmod.INTERNAL_PHASE_REQUIREMENT,
1324 1328 requirementsmod.NODEMAP_REQUIREMENT,
1325 1329 requirementsmod.RELATIVE_SHARED_REQUIREMENT,
1326 1330 requirementsmod.REVLOGV1_REQUIREMENT,
1327 1331 requirementsmod.REVLOGV2_REQUIREMENT,
1328 1332 requirementsmod.SHARED_REQUIREMENT,
1329 1333 requirementsmod.SHARESAFE_REQUIREMENT,
1330 1334 requirementsmod.SPARSE_REQUIREMENT,
1331 1335 requirementsmod.SPARSEREVLOG_REQUIREMENT,
1332 1336 requirementsmod.STORE_REQUIREMENT,
1333 1337 requirementsmod.TREEMANIFEST_REQUIREMENT,
1334 1338 }
1335 1339
1336 1340 # list of prefix for file which can be written without 'wlock'
1337 1341 # Extensions should extend this list when needed
1338 1342 _wlockfreeprefix = {
1339 1343 # We migh consider requiring 'wlock' for the next
1340 1344 # two, but pretty much all the existing code assume
1341 1345 # wlock is not needed so we keep them excluded for
1342 1346 # now.
1343 1347 b'hgrc',
1344 1348 b'requires',
1345 1349 # XXX cache is a complicatged business someone
1346 1350 # should investigate this in depth at some point
1347 1351 b'cache/',
1348 1352 # XXX bisect was still a bit too messy at the time
1349 1353 # this changeset was introduced. Someone should fix
1350 1354 # the remainig bit and drop this line
1351 1355 b'bisect.state',
1352 1356 }
1353 1357
1354 1358 def __init__(
1355 1359 self,
1356 1360 baseui,
1357 1361 ui,
1358 1362 origroot: bytes,
1359 1363 wdirvfs: vfsmod.vfs,
1360 1364 hgvfs: vfsmod.vfs,
1361 1365 requirements,
1362 1366 supportedrequirements,
1363 1367 sharedpath: bytes,
1364 1368 store,
1365 1369 cachevfs: vfsmod.vfs,
1366 1370 wcachevfs: vfsmod.vfs,
1367 1371 features,
1368 1372 intents=None,
1369 1373 ):
1370 1374 """Create a new local repository instance.
1371 1375
1372 1376 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1373 1377 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1374 1378 object.
1375 1379
1376 1380 Arguments:
1377 1381
1378 1382 baseui
1379 1383 ``ui.ui`` instance that ``ui`` argument was based off of.
1380 1384
1381 1385 ui
1382 1386 ``ui.ui`` instance for use by the repository.
1383 1387
1384 1388 origroot
1385 1389 ``bytes`` path to working directory root of this repository.
1386 1390
1387 1391 wdirvfs
1388 1392 ``vfs.vfs`` rooted at the working directory.
1389 1393
1390 1394 hgvfs
1391 1395 ``vfs.vfs`` rooted at .hg/
1392 1396
1393 1397 requirements
1394 1398 ``set`` of bytestrings representing repository opening requirements.
1395 1399
1396 1400 supportedrequirements
1397 1401 ``set`` of bytestrings representing repository requirements that we
1398 1402 know how to open. May be a supetset of ``requirements``.
1399 1403
1400 1404 sharedpath
1401 1405 ``bytes`` Defining path to storage base directory. Points to a
1402 1406 ``.hg/`` directory somewhere.
1403 1407
1404 1408 store
1405 1409 ``store.basicstore`` (or derived) instance providing access to
1406 1410 versioned storage.
1407 1411
1408 1412 cachevfs
1409 1413 ``vfs.vfs`` used for cache files.
1410 1414
1411 1415 wcachevfs
1412 1416 ``vfs.vfs`` used for cache files related to the working copy.
1413 1417
1414 1418 features
1415 1419 ``set`` of bytestrings defining features/capabilities of this
1416 1420 instance.
1417 1421
1418 1422 intents
1419 1423 ``set`` of system strings indicating what this repo will be used
1420 1424 for.
1421 1425 """
1422 1426 self.baseui = baseui
1423 1427 self.ui = ui
1424 1428 self.origroot = origroot
1425 1429 # vfs rooted at working directory.
1426 1430 self.wvfs = wdirvfs
1427 1431 self.root = wdirvfs.base
1428 1432 # vfs rooted at .hg/. Used to access most non-store paths.
1429 1433 self.vfs = hgvfs
1430 1434 self.path = hgvfs.base
1431 1435 self.requirements = requirements
1432 1436 self.nodeconstants = sha1nodeconstants
1433 1437 self.nullid = self.nodeconstants.nullid
1434 1438 self.supported = supportedrequirements
1435 1439 self.sharedpath = sharedpath
1436 1440 self.store = store
1437 1441 self.cachevfs = cachevfs
1438 1442 self.wcachevfs = wcachevfs
1439 1443 self.features = features
1440 1444
1441 1445 self.filtername = None
1442 1446
1443 1447 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1444 1448 b'devel', b'check-locks'
1445 1449 ):
1446 1450 self.vfs.audit = self._getvfsward(self.vfs.audit)
1447 1451 # A list of callback to shape the phase if no data were found.
1448 1452 # Callback are in the form: func(repo, roots) --> processed root.
1449 1453 # This list it to be filled by extension during repo setup
1450 1454 self._phasedefaults = []
1451 1455
1452 1456 color.setup(self.ui)
1453 1457
1454 1458 self.spath = self.store.path
1455 1459 self.svfs = self.store.vfs
1456 1460 self.sjoin = self.store.join
1457 1461 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1458 1462 b'devel', b'check-locks'
1459 1463 ):
1460 1464 if hasattr(self.svfs, 'vfs'): # this is filtervfs
1461 1465 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1462 1466 else: # standard vfs
1463 1467 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1464 1468
1465 1469 self._dirstatevalidatewarned = False
1466 1470
1467 1471 self._branchcaches = branchmap.BranchMapCache()
1468 1472 self._revbranchcache = None
1469 1473 self._filterpats = {}
1470 1474 self._datafilters = {}
1471 1475 self._transref = self._lockref = self._wlockref = None
1472 1476
1473 1477 # A cache for various files under .hg/ that tracks file changes,
1474 1478 # (used by the filecache decorator)
1475 1479 #
1476 1480 # Maps a property name to its util.filecacheentry
1477 1481 self._filecache = {}
1478 1482
1479 1483 # hold sets of revision to be filtered
1480 1484 # should be cleared when something might have changed the filter value:
1481 1485 # - new changesets,
1482 1486 # - phase change,
1483 1487 # - new obsolescence marker,
1484 1488 # - working directory parent change,
1485 1489 # - bookmark changes
1486 1490 self.filteredrevcache = {}
1487 1491
1488 1492 self._dirstate = None
1489 1493 # post-dirstate-status hooks
1490 1494 self._postdsstatus = []
1491 1495
1492 1496 self._pending_narrow_pats = None
1493 1497 self._pending_narrow_pats_dirstate = None
1494 1498
1495 1499 # generic mapping between names and nodes
1496 1500 self.names = namespaces.namespaces()
1497 1501
1498 1502 # Key to signature value.
1499 1503 self._sparsesignaturecache = {}
1500 1504 # Signature to cached matcher instance.
1501 1505 self._sparsematchercache = {}
1502 1506
1503 1507 self._extrafilterid = repoview.extrafilter(ui)
1504 1508
1505 1509 self.filecopiesmode = None
1506 1510 if requirementsmod.COPIESSDC_REQUIREMENT in self.requirements:
1507 1511 self.filecopiesmode = b'changeset-sidedata'
1508 1512
1509 1513 self._wanted_sidedata = set()
1510 1514 self._sidedata_computers = {}
1511 1515 sidedatamod.set_sidedata_spec_for_repo(self)
1512 1516
1513 1517 def _getvfsward(self, origfunc):
1514 1518 """build a ward for self.vfs"""
1515 1519 rref = weakref.ref(self)
1516 1520
1517 1521 def checkvfs(path, mode=None):
1518 1522 ret = origfunc(path, mode=mode)
1519 1523 repo = rref()
1520 1524 if (
1521 1525 repo is None
1522 1526 or not hasattr(repo, '_wlockref')
1523 1527 or not hasattr(repo, '_lockref')
1524 1528 ):
1525 1529 return
1526 1530 if mode in (None, b'r', b'rb'):
1527 1531 return
1528 1532 if path.startswith(repo.path):
1529 1533 # truncate name relative to the repository (.hg)
1530 1534 path = path[len(repo.path) + 1 :]
1531 1535 if path.startswith(b'cache/'):
1532 1536 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1533 1537 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1534 1538 # path prefixes covered by 'lock'
1535 1539 vfs_path_prefixes = (
1536 1540 b'journal.',
1537 1541 b'undo.',
1538 1542 b'strip-backup/',
1539 1543 b'cache/',
1540 1544 )
1541 1545 if any(path.startswith(prefix) for prefix in vfs_path_prefixes):
1542 1546 if repo._currentlock(repo._lockref) is None:
1543 1547 repo.ui.develwarn(
1544 1548 b'write with no lock: "%s"' % path,
1545 1549 stacklevel=3,
1546 1550 config=b'check-locks',
1547 1551 )
1548 1552 elif repo._currentlock(repo._wlockref) is None:
1549 1553 # rest of vfs files are covered by 'wlock'
1550 1554 #
1551 1555 # exclude special files
1552 1556 for prefix in self._wlockfreeprefix:
1553 1557 if path.startswith(prefix):
1554 1558 return
1555 1559 repo.ui.develwarn(
1556 1560 b'write with no wlock: "%s"' % path,
1557 1561 stacklevel=3,
1558 1562 config=b'check-locks',
1559 1563 )
1560 1564 return ret
1561 1565
1562 1566 return checkvfs
1563 1567
1564 1568 def _getsvfsward(self, origfunc):
1565 1569 """build a ward for self.svfs"""
1566 1570 rref = weakref.ref(self)
1567 1571
1568 1572 def checksvfs(path, mode=None):
1569 1573 ret = origfunc(path, mode=mode)
1570 1574 repo = rref()
1571 1575 if repo is None or not hasattr(repo, '_lockref'):
1572 1576 return
1573 1577 if mode in (None, b'r', b'rb'):
1574 1578 return
1575 1579 if path.startswith(repo.sharedpath):
1576 1580 # truncate name relative to the repository (.hg)
1577 1581 path = path[len(repo.sharedpath) + 1 :]
1578 1582 if repo._currentlock(repo._lockref) is None:
1579 1583 repo.ui.develwarn(
1580 1584 b'write with no lock: "%s"' % path, stacklevel=4
1581 1585 )
1582 1586 return ret
1583 1587
1584 1588 return checksvfs
1585 1589
1586 1590 @property
1587 1591 def vfs_map(self):
1588 1592 return {
1589 1593 b'': self.svfs,
1590 1594 b'plain': self.vfs,
1591 1595 b'store': self.svfs,
1592 1596 }
1593 1597
1594 1598 def close(self):
1595 1599 self._writecaches()
1596 1600
1597 1601 def _writecaches(self):
1598 1602 if self._revbranchcache:
1599 1603 self._revbranchcache.write()
1600 1604
1601 1605 def _restrictcapabilities(self, caps):
1602 1606 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1603 1607 caps = set(caps)
1604 1608 capsblob = bundle2.encodecaps(
1605 1609 bundle2.getrepocaps(self, role=b'client')
1606 1610 )
1607 1611 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1608 1612 if self.ui.configbool(b'experimental', b'narrow'):
1609 1613 caps.add(wireprototypes.NARROWCAP)
1610 1614 return caps
1611 1615
1612 1616 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1613 1617 # self -> auditor -> self._checknested -> self
1614 1618
1615 1619 @property
1616 1620 def auditor(self):
1617 1621 # This is only used by context.workingctx.match in order to
1618 1622 # detect files in subrepos.
1619 1623 return pathutil.pathauditor(self.root, callback=self._checknested)
1620 1624
1621 1625 @property
1622 1626 def nofsauditor(self):
1623 1627 # This is only used by context.basectx.match in order to detect
1624 1628 # files in subrepos.
1625 1629 return pathutil.pathauditor(
1626 1630 self.root, callback=self._checknested, realfs=False, cached=True
1627 1631 )
1628 1632
1629 1633 def _checknested(self, path):
1630 1634 """Determine if path is a legal nested repository."""
1631 1635 if not path.startswith(self.root):
1632 1636 return False
1633 1637 subpath = path[len(self.root) + 1 :]
1634 1638 normsubpath = util.pconvert(subpath)
1635 1639
1636 1640 # XXX: Checking against the current working copy is wrong in
1637 1641 # the sense that it can reject things like
1638 1642 #
1639 1643 # $ hg cat -r 10 sub/x.txt
1640 1644 #
1641 1645 # if sub/ is no longer a subrepository in the working copy
1642 1646 # parent revision.
1643 1647 #
1644 1648 # However, it can of course also allow things that would have
1645 1649 # been rejected before, such as the above cat command if sub/
1646 1650 # is a subrepository now, but was a normal directory before.
1647 1651 # The old path auditor would have rejected by mistake since it
1648 1652 # panics when it sees sub/.hg/.
1649 1653 #
1650 1654 # All in all, checking against the working copy seems sensible
1651 1655 # since we want to prevent access to nested repositories on
1652 1656 # the filesystem *now*.
1653 1657 ctx = self[None]
1654 1658 parts = util.splitpath(subpath)
1655 1659 while parts:
1656 1660 prefix = b'/'.join(parts)
1657 1661 if prefix in ctx.substate:
1658 1662 if prefix == normsubpath:
1659 1663 return True
1660 1664 else:
1661 1665 sub = ctx.sub(prefix)
1662 1666 return sub.checknested(subpath[len(prefix) + 1 :])
1663 1667 else:
1664 1668 parts.pop()
1665 1669 return False
1666 1670
1667 1671 def peer(self, path=None, remotehidden=False):
1668 1672 return localpeer(
1669 1673 self, path=path, remotehidden=remotehidden
1670 1674 ) # not cached to avoid reference cycle
1671 1675
1672 1676 def unfiltered(self):
1673 1677 """Return unfiltered version of the repository
1674 1678
1675 1679 Intended to be overwritten by filtered repo."""
1676 1680 return self
1677 1681
1678 1682 def filtered(self, name, visibilityexceptions=None):
1679 1683 """Return a filtered version of a repository
1680 1684
1681 1685 The `name` parameter is the identifier of the requested view. This
1682 1686 will return a repoview object set "exactly" to the specified view.
1683 1687
1684 1688 This function does not apply recursive filtering to a repository. For
1685 1689 example calling `repo.filtered("served")` will return a repoview using
1686 1690 the "served" view, regardless of the initial view used by `repo`.
1687 1691
1688 1692 In other word, there is always only one level of `repoview` "filtering".
1689 1693 """
1690 1694 if self._extrafilterid is not None and b'%' not in name:
1691 1695 name = name + b'%' + self._extrafilterid
1692 1696
1693 1697 cls = repoview.newtype(self.unfiltered().__class__)
1694 1698 return cls(self, name, visibilityexceptions)
1695 1699
1696 1700 @mixedrepostorecache(
1697 1701 (b'bookmarks', b'plain'),
1698 1702 (b'bookmarks.current', b'plain'),
1699 1703 (b'bookmarks', b''),
1700 1704 (b'00changelog.i', b''),
1701 1705 )
1702 1706 def _bookmarks(self):
1703 1707 # Since the multiple files involved in the transaction cannot be
1704 1708 # written atomically (with current repository format), there is a race
1705 1709 # condition here.
1706 1710 #
1707 1711 # 1) changelog content A is read
1708 1712 # 2) outside transaction update changelog to content B
1709 1713 # 3) outside transaction update bookmark file referring to content B
1710 1714 # 4) bookmarks file content is read and filtered against changelog-A
1711 1715 #
1712 1716 # When this happens, bookmarks against nodes missing from A are dropped.
1713 1717 #
1714 1718 # Having this happening during read is not great, but it become worse
1715 1719 # when this happen during write because the bookmarks to the "unknown"
1716 1720 # nodes will be dropped for good. However, writes happen within locks.
1717 1721 # This locking makes it possible to have a race free consistent read.
1718 1722 # For this purpose data read from disc before locking are
1719 1723 # "invalidated" right after the locks are taken. This invalidations are
1720 1724 # "light", the `filecache` mechanism keep the data in memory and will
1721 1725 # reuse them if the underlying files did not changed. Not parsing the
1722 1726 # same data multiple times helps performances.
1723 1727 #
1724 1728 # Unfortunately in the case describe above, the files tracked by the
1725 1729 # bookmarks file cache might not have changed, but the in-memory
1726 1730 # content is still "wrong" because we used an older changelog content
1727 1731 # to process the on-disk data. So after locking, the changelog would be
1728 1732 # refreshed but `_bookmarks` would be preserved.
1729 1733 # Adding `00changelog.i` to the list of tracked file is not
1730 1734 # enough, because at the time we build the content for `_bookmarks` in
1731 1735 # (4), the changelog file has already diverged from the content used
1732 1736 # for loading `changelog` in (1)
1733 1737 #
1734 1738 # To prevent the issue, we force the changelog to be explicitly
1735 1739 # reloaded while computing `_bookmarks`. The data race can still happen
1736 1740 # without the lock (with a narrower window), but it would no longer go
1737 1741 # undetected during the lock time refresh.
1738 1742 #
1739 1743 # The new schedule is as follow
1740 1744 #
1741 1745 # 1) filecache logic detect that `_bookmarks` needs to be computed
1742 1746 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1743 1747 # 3) We force `changelog` filecache to be tested
1744 1748 # 4) cachestat for `changelog` are captured (for changelog)
1745 1749 # 5) `_bookmarks` is computed and cached
1746 1750 #
1747 1751 # The step in (3) ensure we have a changelog at least as recent as the
1748 1752 # cache stat computed in (1). As a result at locking time:
1749 1753 # * if the changelog did not changed since (1) -> we can reuse the data
1750 1754 # * otherwise -> the bookmarks get refreshed.
1751 1755 self._refreshchangelog()
1752 1756 return bookmarks.bmstore(self)
1753 1757
1754 1758 def _refreshchangelog(self):
1755 1759 """make sure the in memory changelog match the on-disk one"""
1756 1760 if 'changelog' in vars(self) and self.currenttransaction() is None:
1757 1761 del self.changelog
1758 1762
1759 1763 @property
1760 1764 def _activebookmark(self):
1761 1765 return self._bookmarks.active
1762 1766
1763 1767 # _phasesets depend on changelog. what we need is to call
1764 1768 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1765 1769 # can't be easily expressed in filecache mechanism.
1766 1770 @storecache(b'phaseroots', b'00changelog.i')
1767 1771 def _phasecache(self):
1768 1772 return phases.phasecache(self, self._phasedefaults)
1769 1773
1770 1774 @storecache(b'obsstore')
1771 1775 def obsstore(self):
1772 1776 return obsolete.makestore(self.ui, self)
1773 1777
1774 1778 @changelogcache()
1775 1779 def changelog(repo):
1776 1780 # load dirstate before changelog to avoid race see issue6303
1777 1781 repo.dirstate.prefetch_parents()
1778 1782 return repo.store.changelog(
1779 1783 txnutil.mayhavepending(repo.root),
1780 1784 concurrencychecker=revlogchecker.get_checker(repo.ui, b'changelog'),
1781 1785 )
1782 1786
1783 1787 @manifestlogcache()
1784 1788 def manifestlog(self):
1785 1789 return self.store.manifestlog(self, self._storenarrowmatch)
1786 1790
1787 1791 @unfilteredpropertycache
1788 1792 def dirstate(self):
1789 1793 if self._dirstate is None:
1790 1794 self._dirstate = self._makedirstate()
1791 1795 else:
1792 1796 self._dirstate.refresh()
1793 1797 return self._dirstate
1794 1798
1795 1799 def _makedirstate(self):
1796 1800 """Extension point for wrapping the dirstate per-repo."""
1797 1801 sparsematchfn = None
1798 1802 if sparse.use_sparse(self):
1799 1803 sparsematchfn = lambda: sparse.matcher(self)
1800 1804 v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT
1801 1805 th = requirementsmod.DIRSTATE_TRACKED_HINT_V1
1802 1806 use_dirstate_v2 = v2_req in self.requirements
1803 1807 use_tracked_hint = th in self.requirements
1804 1808
1805 1809 return dirstate.dirstate(
1806 1810 self.vfs,
1807 1811 self.ui,
1808 1812 self.root,
1809 1813 self._dirstatevalidate,
1810 1814 sparsematchfn,
1811 1815 self.nodeconstants,
1812 1816 use_dirstate_v2,
1813 1817 use_tracked_hint=use_tracked_hint,
1814 1818 )
1815 1819
1816 1820 def _dirstatevalidate(self, node):
1817 1821 okay = True
1818 1822 try:
1819 1823 self.changelog.rev(node)
1820 1824 except error.LookupError:
1821 1825 # If the parent are unknown it might just be because the changelog
1822 1826 # in memory is lagging behind the dirstate in memory. So try to
1823 1827 # refresh the changelog first.
1824 1828 #
1825 1829 # We only do so if we don't hold the lock, if we do hold the lock
1826 1830 # the invalidation at that time should have taken care of this and
1827 1831 # something is very fishy.
1828 1832 if self.currentlock() is None:
1829 1833 self.invalidate()
1830 1834 try:
1831 1835 self.changelog.rev(node)
1832 1836 except error.LookupError:
1833 1837 okay = False
1834 1838 else:
1835 1839 # XXX we should consider raising an error here.
1836 1840 okay = False
1837 1841 if okay:
1838 1842 return node
1839 1843 else:
1840 1844 if not self._dirstatevalidatewarned:
1841 1845 self._dirstatevalidatewarned = True
1842 1846 self.ui.warn(
1843 1847 _(b"warning: ignoring unknown working parent %s!\n")
1844 1848 % short(node)
1845 1849 )
1846 1850 return self.nullid
1847 1851
1848 1852 @storecache(narrowspec.FILENAME)
1849 1853 def narrowpats(self):
1850 1854 """matcher patterns for this repository's narrowspec
1851 1855
1852 1856 A tuple of (includes, excludes).
1853 1857 """
1854 1858 # the narrow management should probably move into its own object
1855 1859 val = self._pending_narrow_pats
1856 1860 if val is None:
1857 1861 val = narrowspec.load(self)
1858 1862 return val
1859 1863
1860 1864 @storecache(narrowspec.FILENAME)
1861 1865 def _storenarrowmatch(self):
1862 1866 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1863 1867 return matchmod.always()
1864 1868 include, exclude = self.narrowpats
1865 1869 return narrowspec.match(self.root, include=include, exclude=exclude)
1866 1870
1867 1871 @storecache(narrowspec.FILENAME)
1868 1872 def _narrowmatch(self):
1869 1873 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1870 1874 return matchmod.always()
1871 1875 narrowspec.checkworkingcopynarrowspec(self)
1872 1876 include, exclude = self.narrowpats
1873 1877 return narrowspec.match(self.root, include=include, exclude=exclude)
1874 1878
1875 1879 def narrowmatch(self, match=None, includeexact=False):
1876 1880 """matcher corresponding the the repo's narrowspec
1877 1881
1878 1882 If `match` is given, then that will be intersected with the narrow
1879 1883 matcher.
1880 1884
1881 1885 If `includeexact` is True, then any exact matches from `match` will
1882 1886 be included even if they're outside the narrowspec.
1883 1887 """
1884 1888 if match:
1885 1889 if includeexact and not self._narrowmatch.always():
1886 1890 # do not exclude explicitly-specified paths so that they can
1887 1891 # be warned later on
1888 1892 em = matchmod.exact(match.files())
1889 1893 nm = matchmod.unionmatcher([self._narrowmatch, em])
1890 1894 return matchmod.intersectmatchers(match, nm)
1891 1895 return matchmod.intersectmatchers(match, self._narrowmatch)
1892 1896 return self._narrowmatch
1893 1897
1894 1898 def setnarrowpats(self, newincludes, newexcludes):
1895 1899 narrowspec.save(self, newincludes, newexcludes)
1896 1900 self.invalidate(clearfilecache=True)
1897 1901
1898 1902 @unfilteredpropertycache
1899 1903 def _quick_access_changeid_null(self):
1900 1904 return {
1901 1905 b'null': (nullrev, self.nodeconstants.nullid),
1902 1906 nullrev: (nullrev, self.nodeconstants.nullid),
1903 1907 self.nullid: (nullrev, self.nullid),
1904 1908 }
1905 1909
1906 1910 @unfilteredpropertycache
1907 1911 def _quick_access_changeid_wc(self):
1908 1912 # also fast path access to the working copy parents
1909 1913 # however, only do it for filter that ensure wc is visible.
1910 1914 quick = self._quick_access_changeid_null.copy()
1911 1915 cl = self.unfiltered().changelog
1912 1916 for node in self.dirstate.parents():
1913 1917 if node == self.nullid:
1914 1918 continue
1915 1919 rev = cl.index.get_rev(node)
1916 1920 if rev is None:
1917 1921 # unknown working copy parent case:
1918 1922 #
1919 1923 # skip the fast path and let higher code deal with it
1920 1924 continue
1921 1925 pair = (rev, node)
1922 1926 quick[rev] = pair
1923 1927 quick[node] = pair
1924 1928 # also add the parents of the parents
1925 1929 for r in cl.parentrevs(rev):
1926 1930 if r == nullrev:
1927 1931 continue
1928 1932 n = cl.node(r)
1929 1933 pair = (r, n)
1930 1934 quick[r] = pair
1931 1935 quick[n] = pair
1932 1936 p1node = self.dirstate.p1()
1933 1937 if p1node != self.nullid:
1934 1938 quick[b'.'] = quick[p1node]
1935 1939 return quick
1936 1940
1937 1941 @unfilteredmethod
1938 1942 def _quick_access_changeid_invalidate(self):
1939 1943 if '_quick_access_changeid_wc' in vars(self):
1940 1944 del self.__dict__['_quick_access_changeid_wc']
1941 1945
1942 1946 @property
1943 1947 def _quick_access_changeid(self):
1944 1948 """an helper dictionnary for __getitem__ calls
1945 1949
1946 1950 This contains a list of symbol we can recognise right away without
1947 1951 further processing.
1948 1952 """
1949 1953 if self.filtername in repoview.filter_has_wc:
1950 1954 return self._quick_access_changeid_wc
1951 1955 return self._quick_access_changeid_null
1952 1956
1953 1957 def __getitem__(self, changeid):
1954 1958 # dealing with special cases
1955 1959 if changeid is None:
1956 1960 return context.workingctx(self)
1957 1961 if isinstance(changeid, context.basectx):
1958 1962 return changeid
1959 1963
1960 1964 # dealing with multiple revisions
1961 1965 if isinstance(changeid, slice):
1962 1966 # wdirrev isn't contiguous so the slice shouldn't include it
1963 1967 return [
1964 1968 self[i]
1965 1969 for i in range(*changeid.indices(len(self)))
1966 1970 if i not in self.changelog.filteredrevs
1967 1971 ]
1968 1972
1969 1973 # dealing with some special values
1970 1974 quick_access = self._quick_access_changeid.get(changeid)
1971 1975 if quick_access is not None:
1972 1976 rev, node = quick_access
1973 1977 return context.changectx(self, rev, node, maybe_filtered=False)
1974 1978 if changeid == b'tip':
1975 1979 node = self.changelog.tip()
1976 1980 rev = self.changelog.rev(node)
1977 1981 return context.changectx(self, rev, node)
1978 1982
1979 1983 # dealing with arbitrary values
1980 1984 try:
1981 1985 if isinstance(changeid, int):
1982 1986 node = self.changelog.node(changeid)
1983 1987 rev = changeid
1984 1988 elif changeid == b'.':
1985 1989 # this is a hack to delay/avoid loading obsmarkers
1986 1990 # when we know that '.' won't be hidden
1987 1991 node = self.dirstate.p1()
1988 1992 rev = self.unfiltered().changelog.rev(node)
1989 1993 elif len(changeid) == self.nodeconstants.nodelen:
1990 1994 try:
1991 1995 node = changeid
1992 1996 rev = self.changelog.rev(changeid)
1993 1997 except error.FilteredLookupError:
1994 1998 changeid = hex(changeid) # for the error message
1995 1999 raise
1996 2000 except LookupError:
1997 2001 # check if it might have come from damaged dirstate
1998 2002 #
1999 2003 # XXX we could avoid the unfiltered if we had a recognizable
2000 2004 # exception for filtered changeset access
2001 2005 if (
2002 2006 self.local()
2003 2007 and changeid in self.unfiltered().dirstate.parents()
2004 2008 ):
2005 2009 msg = _(b"working directory has unknown parent '%s'!")
2006 2010 raise error.Abort(msg % short(changeid))
2007 2011 changeid = hex(changeid) # for the error message
2008 2012 raise
2009 2013
2010 2014 elif len(changeid) == 2 * self.nodeconstants.nodelen:
2011 2015 node = bin(changeid)
2012 2016 rev = self.changelog.rev(node)
2013 2017 else:
2014 2018 raise error.ProgrammingError(
2015 2019 b"unsupported changeid '%s' of type %s"
2016 2020 % (changeid, pycompat.bytestr(type(changeid)))
2017 2021 )
2018 2022
2019 2023 return context.changectx(self, rev, node)
2020 2024
2021 2025 except (error.FilteredIndexError, error.FilteredLookupError):
2022 2026 raise error.FilteredRepoLookupError(
2023 2027 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
2024 2028 )
2025 2029 except (IndexError, LookupError):
2026 2030 raise error.RepoLookupError(
2027 2031 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
2028 2032 )
2029 2033 except error.WdirUnsupported:
2030 2034 return context.workingctx(self)
2031 2035
2032 2036 def __contains__(self, changeid):
2033 2037 """True if the given changeid exists"""
2034 2038 try:
2035 2039 self[changeid]
2036 2040 return True
2037 2041 except error.RepoLookupError:
2038 2042 return False
2039 2043
2040 2044 def __nonzero__(self):
2041 2045 return True
2042 2046
2043 2047 __bool__ = __nonzero__
2044 2048
2045 2049 def __len__(self):
2046 2050 # no need to pay the cost of repoview.changelog
2047 2051 unfi = self.unfiltered()
2048 2052 return len(unfi.changelog)
2049 2053
2050 2054 def __iter__(self):
2051 2055 return iter(self.changelog)
2052 2056
2053 2057 def revs(self, expr: bytes, *args):
2054 2058 """Find revisions matching a revset.
2055 2059
2056 2060 The revset is specified as a string ``expr`` that may contain
2057 2061 %-formatting to escape certain types. See ``revsetlang.formatspec``.
2058 2062
2059 2063 Revset aliases from the configuration are not expanded. To expand
2060 2064 user aliases, consider calling ``scmutil.revrange()`` or
2061 2065 ``repo.anyrevs([expr], user=True)``.
2062 2066
2063 2067 Returns a smartset.abstractsmartset, which is a list-like interface
2064 2068 that contains integer revisions.
2065 2069 """
2066 2070 tree = revsetlang.spectree(expr, *args)
2067 2071 return revset.makematcher(tree)(self)
2068 2072
2069 2073 def set(self, expr: bytes, *args):
2070 2074 """Find revisions matching a revset and emit changectx instances.
2071 2075
2072 2076 This is a convenience wrapper around ``revs()`` that iterates the
2073 2077 result and is a generator of changectx instances.
2074 2078
2075 2079 Revset aliases from the configuration are not expanded. To expand
2076 2080 user aliases, consider calling ``scmutil.revrange()``.
2077 2081 """
2078 2082 for r in self.revs(expr, *args):
2079 2083 yield self[r]
2080 2084
2081 2085 def anyrevs(self, specs: bytes, user=False, localalias=None):
2082 2086 """Find revisions matching one of the given revsets.
2083 2087
2084 2088 Revset aliases from the configuration are not expanded by default. To
2085 2089 expand user aliases, specify ``user=True``. To provide some local
2086 2090 definitions overriding user aliases, set ``localalias`` to
2087 2091 ``{name: definitionstring}``.
2088 2092 """
2089 2093 if specs == [b'null']:
2090 2094 return revset.baseset([nullrev])
2091 2095 if specs == [b'.']:
2092 2096 quick_data = self._quick_access_changeid.get(b'.')
2093 2097 if quick_data is not None:
2094 2098 return revset.baseset([quick_data[0]])
2095 2099 if user:
2096 2100 m = revset.matchany(
2097 2101 self.ui,
2098 2102 specs,
2099 2103 lookup=revset.lookupfn(self),
2100 2104 localalias=localalias,
2101 2105 )
2102 2106 else:
2103 2107 m = revset.matchany(None, specs, localalias=localalias)
2104 2108 return m(self)
2105 2109
2106 2110 def url(self) -> bytes:
2107 2111 return b'file:' + self.root
2108 2112
2109 2113 def hook(self, name, throw=False, **args):
2110 2114 """Call a hook, passing this repo instance.
2111 2115
2112 2116 This a convenience method to aid invoking hooks. Extensions likely
2113 2117 won't call this unless they have registered a custom hook or are
2114 2118 replacing code that is expected to call a hook.
2115 2119 """
2116 2120 return hook.hook(self.ui, self, name, throw, **args)
2117 2121
2118 2122 @filteredpropertycache
2119 2123 def _tagscache(self):
2120 2124 """Returns a tagscache object that contains various tags related
2121 2125 caches."""
2122 2126
2123 2127 # This simplifies its cache management by having one decorated
2124 2128 # function (this one) and the rest simply fetch things from it.
2125 2129 class tagscache:
2126 2130 def __init__(self):
2127 2131 # These two define the set of tags for this repository. tags
2128 2132 # maps tag name to node; tagtypes maps tag name to 'global' or
2129 2133 # 'local'. (Global tags are defined by .hgtags across all
2130 2134 # heads, and local tags are defined in .hg/localtags.)
2131 2135 # They constitute the in-memory cache of tags.
2132 2136 self.tags = self.tagtypes = None
2133 2137
2134 2138 self.nodetagscache = self.tagslist = None
2135 2139
2136 2140 cache = tagscache()
2137 2141 cache.tags, cache.tagtypes = self._findtags()
2138 2142
2139 2143 return cache
2140 2144
2141 2145 def tags(self):
2142 2146 '''return a mapping of tag to node'''
2143 2147 t = {}
2144 2148 if self.changelog.filteredrevs:
2145 2149 tags, tt = self._findtags()
2146 2150 else:
2147 2151 tags = self._tagscache.tags
2148 2152 rev = self.changelog.rev
2149 2153 for k, v in tags.items():
2150 2154 try:
2151 2155 # ignore tags to unknown nodes
2152 2156 rev(v)
2153 2157 t[k] = v
2154 2158 except (error.LookupError, ValueError):
2155 2159 pass
2156 2160 return t
2157 2161
2158 2162 def _findtags(self):
2159 2163 """Do the hard work of finding tags. Return a pair of dicts
2160 2164 (tags, tagtypes) where tags maps tag name to node, and tagtypes
2161 2165 maps tag name to a string like \'global\' or \'local\'.
2162 2166 Subclasses or extensions are free to add their own tags, but
2163 2167 should be aware that the returned dicts will be retained for the
2164 2168 duration of the localrepo object."""
2165 2169
2166 2170 # XXX what tagtype should subclasses/extensions use? Currently
2167 2171 # mq and bookmarks add tags, but do not set the tagtype at all.
2168 2172 # Should each extension invent its own tag type? Should there
2169 2173 # be one tagtype for all such "virtual" tags? Or is the status
2170 2174 # quo fine?
2171 2175
2172 2176 # map tag name to (node, hist)
2173 2177 alltags = tagsmod.findglobaltags(self.ui, self)
2174 2178 # map tag name to tag type
2175 2179 tagtypes = {tag: b'global' for tag in alltags}
2176 2180
2177 2181 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
2178 2182
2179 2183 # Build the return dicts. Have to re-encode tag names because
2180 2184 # the tags module always uses UTF-8 (in order not to lose info
2181 2185 # writing to the cache), but the rest of Mercurial wants them in
2182 2186 # local encoding.
2183 2187 tags = {}
2184 2188 for name, (node, hist) in alltags.items():
2185 2189 if node != self.nullid:
2186 2190 tags[encoding.tolocal(name)] = node
2187 2191 tags[b'tip'] = self.changelog.tip()
2188 2192 tagtypes = {
2189 2193 encoding.tolocal(name): value for (name, value) in tagtypes.items()
2190 2194 }
2191 2195 return (tags, tagtypes)
2192 2196
2193 2197 def tagtype(self, tagname):
2194 2198 """
2195 2199 return the type of the given tag. result can be:
2196 2200
2197 2201 'local' : a local tag
2198 2202 'global' : a global tag
2199 2203 None : tag does not exist
2200 2204 """
2201 2205
2202 2206 return self._tagscache.tagtypes.get(tagname)
2203 2207
2204 2208 def tagslist(self):
2205 2209 '''return a list of tags ordered by revision'''
2206 2210 if not self._tagscache.tagslist:
2207 2211 l = []
2208 2212 for t, n in self.tags().items():
2209 2213 l.append((self.changelog.rev(n), t, n))
2210 2214 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
2211 2215
2212 2216 return self._tagscache.tagslist
2213 2217
2214 2218 def nodetags(self, node):
2215 2219 '''return the tags associated with a node'''
2216 2220 if not self._tagscache.nodetagscache:
2217 2221 nodetagscache = {}
2218 2222 for t, n in self._tagscache.tags.items():
2219 2223 nodetagscache.setdefault(n, []).append(t)
2220 2224 for tags in nodetagscache.values():
2221 2225 tags.sort()
2222 2226 self._tagscache.nodetagscache = nodetagscache
2223 2227 return self._tagscache.nodetagscache.get(node, [])
2224 2228
2225 2229 def nodebookmarks(self, node):
2226 2230 """return the list of bookmarks pointing to the specified node"""
2227 2231 return self._bookmarks.names(node)
2228 2232
2229 2233 def branchmap(self):
2230 2234 """returns a dictionary {branch: [branchheads]} with branchheads
2231 2235 ordered by increasing revision number"""
2232 2236 return self._branchcaches[self]
2233 2237
2234 2238 @unfilteredmethod
2235 2239 def revbranchcache(self):
2236 2240 if not self._revbranchcache:
2237 2241 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
2238 2242 return self._revbranchcache
2239 2243
2240 2244 def register_changeset(self, rev, changelogrevision):
2241 2245 self.revbranchcache().setdata(rev, changelogrevision)
2242 2246
2243 2247 def branchtip(self, branch, ignoremissing=False):
2244 2248 """return the tip node for a given branch
2245 2249
2246 2250 If ignoremissing is True, then this method will not raise an error.
2247 2251 This is helpful for callers that only expect None for a missing branch
2248 2252 (e.g. namespace).
2249 2253
2250 2254 """
2251 2255 try:
2252 2256 return self.branchmap().branchtip(branch)
2253 2257 except KeyError:
2254 2258 if not ignoremissing:
2255 2259 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
2256 2260 else:
2257 2261 pass
2258 2262
2259 2263 def lookup(self, key):
2260 2264 node = scmutil.revsymbol(self, key).node()
2261 2265 if node is None:
2262 2266 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
2263 2267 return node
2264 2268
2265 2269 def lookupbranch(self, key):
2266 2270 if self.branchmap().hasbranch(key):
2267 2271 return key
2268 2272
2269 2273 return scmutil.revsymbol(self, key).branch()
2270 2274
2271 2275 def known(self, nodes):
2272 2276 cl = self.changelog
2273 2277 get_rev = cl.index.get_rev
2274 2278 filtered = cl.filteredrevs
2275 2279 result = []
2276 2280 for n in nodes:
2277 2281 r = get_rev(n)
2278 2282 resp = not (r is None or r in filtered)
2279 2283 result.append(resp)
2280 2284 return result
2281 2285
2282 2286 def local(self):
2283 2287 return self
2284 2288
2285 2289 def publishing(self):
2286 2290 # it's safe (and desirable) to trust the publish flag unconditionally
2287 2291 # so that we don't finalize changes shared between users via ssh or nfs
2288 2292 return self.ui.configbool(b'phases', b'publish', untrusted=True)
2289 2293
2290 2294 def cancopy(self):
2291 2295 # so statichttprepo's override of local() works
2292 2296 if not self.local():
2293 2297 return False
2294 2298 if not self.publishing():
2295 2299 return True
2296 2300 # if publishing we can't copy if there is filtered content
2297 2301 return not self.filtered(b'visible').changelog.filteredrevs
2298 2302
2299 2303 def shared(self):
2300 2304 '''the type of shared repository (None if not shared)'''
2301 2305 if self.sharedpath != self.path:
2302 2306 return b'store'
2303 2307 return None
2304 2308
2305 2309 def wjoin(self, f: bytes, *insidef: bytes) -> bytes:
2306 2310 return self.vfs.reljoin(self.root, f, *insidef)
2307 2311
2308 2312 def setparents(self, p1, p2=None):
2309 2313 if p2 is None:
2310 2314 p2 = self.nullid
2311 2315 self[None].setparents(p1, p2)
2312 2316 self._quick_access_changeid_invalidate()
2313 2317
2314 2318 def filectx(self, path: bytes, changeid=None, fileid=None, changectx=None):
2315 2319 """changeid must be a changeset revision, if specified.
2316 2320 fileid can be a file revision or node."""
2317 2321 return context.filectx(
2318 2322 self, path, changeid, fileid, changectx=changectx
2319 2323 )
2320 2324
2321 2325 def getcwd(self) -> bytes:
2322 2326 return self.dirstate.getcwd()
2323 2327
2324 2328 def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
2325 2329 return self.dirstate.pathto(f, cwd)
2326 2330
2327 2331 def _loadfilter(self, filter):
2328 2332 if filter not in self._filterpats:
2329 2333 l = []
2330 2334 for pat, cmd in self.ui.configitems(filter):
2331 2335 if cmd == b'!':
2332 2336 continue
2333 2337 mf = matchmod.match(self.root, b'', [pat])
2334 2338 fn = None
2335 2339 params = cmd
2336 2340 for name, filterfn in self._datafilters.items():
2337 2341 if cmd.startswith(name):
2338 2342 fn = filterfn
2339 2343 params = cmd[len(name) :].lstrip()
2340 2344 break
2341 2345 if not fn:
2342 2346 fn = lambda s, c, **kwargs: procutil.filter(s, c)
2343 2347 fn.__name__ = 'commandfilter'
2344 2348 # Wrap old filters not supporting keyword arguments
2345 2349 if not pycompat.getargspec(fn)[2]:
2346 2350 oldfn = fn
2347 2351 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
2348 2352 fn.__name__ = 'compat-' + oldfn.__name__
2349 2353 l.append((mf, fn, params))
2350 2354 self._filterpats[filter] = l
2351 2355 return self._filterpats[filter]
2352 2356
2353 2357 def _filter(self, filterpats, filename, data):
2354 2358 for mf, fn, cmd in filterpats:
2355 2359 if mf(filename):
2356 2360 self.ui.debug(
2357 2361 b"filtering %s through %s\n"
2358 2362 % (filename, cmd or pycompat.sysbytes(fn.__name__))
2359 2363 )
2360 2364 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2361 2365 break
2362 2366
2363 2367 return data
2364 2368
2365 2369 @unfilteredpropertycache
2366 2370 def _encodefilterpats(self):
2367 2371 return self._loadfilter(b'encode')
2368 2372
2369 2373 @unfilteredpropertycache
2370 2374 def _decodefilterpats(self):
2371 2375 return self._loadfilter(b'decode')
2372 2376
2373 2377 def adddatafilter(self, name, filter):
2374 2378 self._datafilters[name] = filter
2375 2379
2376 2380 def wread(self, filename: bytes) -> bytes:
2377 2381 if self.wvfs.islink(filename):
2378 2382 data = self.wvfs.readlink(filename)
2379 2383 else:
2380 2384 data = self.wvfs.read(filename)
2381 2385 return self._filter(self._encodefilterpats, filename, data)
2382 2386
2383 2387 def wwrite(
2384 2388 self,
2385 2389 filename: bytes,
2386 2390 data: bytes,
2387 2391 flags: bytes,
2388 2392 backgroundclose=False,
2389 2393 **kwargs
2390 2394 ) -> int:
2391 2395 """write ``data`` into ``filename`` in the working directory
2392 2396
2393 2397 This returns length of written (maybe decoded) data.
2394 2398 """
2395 2399 data = self._filter(self._decodefilterpats, filename, data)
2396 2400 if b'l' in flags:
2397 2401 self.wvfs.symlink(data, filename)
2398 2402 else:
2399 2403 self.wvfs.write(
2400 2404 filename, data, backgroundclose=backgroundclose, **kwargs
2401 2405 )
2402 2406 if b'x' in flags:
2403 2407 self.wvfs.setflags(filename, False, True)
2404 2408 else:
2405 2409 self.wvfs.setflags(filename, False, False)
2406 2410 return len(data)
2407 2411
2408 2412 def wwritedata(self, filename: bytes, data: bytes) -> bytes:
2409 2413 return self._filter(self._decodefilterpats, filename, data)
2410 2414
2411 2415 def currenttransaction(self):
2412 2416 """return the current transaction or None if non exists"""
2413 2417 if self._transref:
2414 2418 tr = self._transref()
2415 2419 else:
2416 2420 tr = None
2417 2421
2418 2422 if tr and tr.running():
2419 2423 return tr
2420 2424 return None
2421 2425
2422 2426 def transaction(self, desc, report=None):
2423 2427 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2424 2428 b'devel', b'check-locks'
2425 2429 ):
2426 2430 if self._currentlock(self._lockref) is None:
2427 2431 raise error.ProgrammingError(b'transaction requires locking')
2428 2432 tr = self.currenttransaction()
2429 2433 if tr is not None:
2430 2434 return tr.nest(name=desc)
2431 2435
2432 2436 # abort here if the journal already exists
2433 2437 if self.svfs.exists(b"journal"):
2434 2438 raise error.RepoError(
2435 2439 _(b"abandoned transaction found"),
2436 2440 hint=_(b"run 'hg recover' to clean up transaction"),
2437 2441 )
2438 2442
2439 2443 # At that point your dirstate should be clean:
2440 2444 #
2441 2445 # - If you don't have the wlock, why would you still have a dirty
2442 2446 # dirstate ?
2443 2447 #
2444 2448 # - If you hold the wlock, you should not be opening a transaction in
2445 2449 # the middle of a `distate.changing_*` block. The transaction needs to
2446 2450 # be open before that and wrap the change-context.
2447 2451 #
2448 2452 # - If you are not within a `dirstate.changing_*` context, why is our
2449 2453 # dirstate dirty?
2450 2454 if self.dirstate._dirty:
2451 2455 m = "cannot open a transaction with a dirty dirstate"
2452 2456 raise error.ProgrammingError(m)
2453 2457
2454 2458 idbase = b"%.40f#%f" % (random.random(), time.time())
2455 2459 ha = hex(hashutil.sha1(idbase).digest())
2456 2460 txnid = b'TXN:' + ha
2457 2461 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2458 2462
2459 2463 self._writejournal(desc)
2460 2464 if report:
2461 2465 rp = report
2462 2466 else:
2463 2467 rp = self.ui.warn
2464 2468 vfsmap = self.vfs_map
2465 2469 # we must avoid cyclic reference between repo and transaction.
2466 2470 reporef = weakref.ref(self)
2467 2471 # Code to track tag movement
2468 2472 #
2469 2473 # Since tags are all handled as file content, it is actually quite hard
2470 2474 # to track these movement from a code perspective. So we fallback to a
2471 2475 # tracking at the repository level. One could envision to track changes
2472 2476 # to the '.hgtags' file through changegroup apply but that fails to
2473 2477 # cope with case where transaction expose new heads without changegroup
2474 2478 # being involved (eg: phase movement).
2475 2479 #
2476 2480 # For now, We gate the feature behind a flag since this likely comes
2477 2481 # with performance impacts. The current code run more often than needed
2478 2482 # and do not use caches as much as it could. The current focus is on
2479 2483 # the behavior of the feature so we disable it by default. The flag
2480 2484 # will be removed when we are happy with the performance impact.
2481 2485 #
2482 2486 # Once this feature is no longer experimental move the following
2483 2487 # documentation to the appropriate help section:
2484 2488 #
2485 2489 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2486 2490 # tags (new or changed or deleted tags). In addition the details of
2487 2491 # these changes are made available in a file at:
2488 2492 # ``REPOROOT/.hg/changes/tags.changes``.
2489 2493 # Make sure you check for HG_TAG_MOVED before reading that file as it
2490 2494 # might exist from a previous transaction even if no tag were touched
2491 2495 # in this one. Changes are recorded in a line base format::
2492 2496 #
2493 2497 # <action> <hex-node> <tag-name>\n
2494 2498 #
2495 2499 # Actions are defined as follow:
2496 2500 # "-R": tag is removed,
2497 2501 # "+A": tag is added,
2498 2502 # "-M": tag is moved (old value),
2499 2503 # "+M": tag is moved (new value),
2500 2504 tracktags = lambda x: None
2501 2505 # experimental config: experimental.hook-track-tags
2502 2506 shouldtracktags = self.ui.configbool(
2503 2507 b'experimental', b'hook-track-tags'
2504 2508 )
2505 2509 if desc != b'strip' and shouldtracktags:
2506 2510 oldheads = self.changelog.headrevs()
2507 2511
2508 2512 def tracktags(tr2):
2509 2513 repo = reporef()
2510 2514 assert repo is not None # help pytype
2511 2515 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2512 2516 newheads = repo.changelog.headrevs()
2513 2517 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2514 2518 # notes: we compare lists here.
2515 2519 # As we do it only once buiding set would not be cheaper
2516 2520 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2517 2521 if changes:
2518 2522 tr2.hookargs[b'tag_moved'] = b'1'
2519 2523 with repo.vfs(
2520 2524 b'changes/tags.changes', b'w', atomictemp=True
2521 2525 ) as changesfile:
2522 2526 # note: we do not register the file to the transaction
2523 2527 # because we needs it to still exist on the transaction
2524 2528 # is close (for txnclose hooks)
2525 2529 tagsmod.writediff(changesfile, changes)
2526 2530
2527 2531 def validate(tr2):
2528 2532 """will run pre-closing hooks"""
2529 2533 # XXX the transaction API is a bit lacking here so we take a hacky
2530 2534 # path for now
2531 2535 #
2532 2536 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2533 2537 # dict is copied before these run. In addition we needs the data
2534 2538 # available to in memory hooks too.
2535 2539 #
2536 2540 # Moreover, we also need to make sure this runs before txnclose
2537 2541 # hooks and there is no "pending" mechanism that would execute
2538 2542 # logic only if hooks are about to run.
2539 2543 #
2540 2544 # Fixing this limitation of the transaction is also needed to track
2541 2545 # other families of changes (bookmarks, phases, obsolescence).
2542 2546 #
2543 2547 # This will have to be fixed before we remove the experimental
2544 2548 # gating.
2545 2549 tracktags(tr2)
2546 2550 repo = reporef()
2547 2551 assert repo is not None # help pytype
2548 2552
2549 2553 singleheadopt = (b'experimental', b'single-head-per-branch')
2550 2554 singlehead = repo.ui.configbool(*singleheadopt)
2551 2555 if singlehead:
2552 2556 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2553 2557 accountclosed = singleheadsub.get(
2554 2558 b"account-closed-heads", False
2555 2559 )
2556 2560 if singleheadsub.get(b"public-changes-only", False):
2557 2561 filtername = b"immutable"
2558 2562 else:
2559 2563 filtername = b"visible"
2560 2564 scmutil.enforcesinglehead(
2561 2565 repo, tr2, desc, accountclosed, filtername
2562 2566 )
2563 2567 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2564 2568 for name, (old, new) in sorted(
2565 2569 tr.changes[b'bookmarks'].items()
2566 2570 ):
2567 2571 args = tr.hookargs.copy()
2568 2572 args.update(bookmarks.preparehookargs(name, old, new))
2569 2573 repo.hook(
2570 2574 b'pretxnclose-bookmark',
2571 2575 throw=True,
2572 2576 **pycompat.strkwargs(args)
2573 2577 )
2574 2578 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2575 2579 cl = repo.unfiltered().changelog
2576 2580 for revs, (old, new) in tr.changes[b'phases']:
2577 2581 for rev in revs:
2578 2582 args = tr.hookargs.copy()
2579 2583 node = hex(cl.node(rev))
2580 2584 args.update(phases.preparehookargs(node, old, new))
2581 2585 repo.hook(
2582 2586 b'pretxnclose-phase',
2583 2587 throw=True,
2584 2588 **pycompat.strkwargs(args)
2585 2589 )
2586 2590
2587 2591 repo.hook(
2588 2592 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2589 2593 )
2590 2594
2591 2595 def releasefn(tr, success):
2592 2596 repo = reporef()
2593 2597 if repo is None:
2594 2598 # If the repo has been GC'd (and this release function is being
2595 2599 # called from transaction.__del__), there's not much we can do,
2596 2600 # so just leave the unfinished transaction there and let the
2597 2601 # user run `hg recover`.
2598 2602 return
2599 2603 if success:
2600 2604 # this should be explicitly invoked here, because
2601 2605 # in-memory changes aren't written out at closing
2602 2606 # transaction, if tr.addfilegenerator (via
2603 2607 # dirstate.write or so) isn't invoked while
2604 2608 # transaction running
2605 2609 repo.dirstate.write(None)
2606 2610 else:
2607 2611 # discard all changes (including ones already written
2608 2612 # out) in this transaction
2609 2613 repo.invalidate(clearfilecache=True)
2610 2614
2611 2615 tr = transaction.transaction(
2612 2616 rp,
2613 2617 self.svfs,
2614 2618 vfsmap,
2615 2619 b"journal",
2616 2620 b"undo",
2617 2621 lambda: None,
2618 2622 self.store.createmode,
2619 2623 validator=validate,
2620 2624 releasefn=releasefn,
2621 2625 checkambigfiles=_cachedfiles,
2622 2626 name=desc,
2623 2627 )
2624 2628 for vfs_id, path in self._journalfiles():
2625 2629 tr.add_journal(vfs_id, path)
2626 2630 tr.changes[b'origrepolen'] = len(self)
2627 2631 tr.changes[b'obsmarkers'] = set()
2628 2632 tr.changes[b'phases'] = []
2629 2633 tr.changes[b'bookmarks'] = {}
2630 2634
2631 2635 tr.hookargs[b'txnid'] = txnid
2632 2636 tr.hookargs[b'txnname'] = desc
2633 2637 tr.hookargs[b'changes'] = tr.changes
2634 2638 # note: writing the fncache only during finalize mean that the file is
2635 2639 # outdated when running hooks. As fncache is used for streaming clone,
2636 2640 # this is not expected to break anything that happen during the hooks.
2637 2641 tr.addfinalize(b'flush-fncache', self.store.write)
2638 2642
2639 2643 def txnclosehook(tr2):
2640 2644 """To be run if transaction is successful, will schedule a hook run"""
2641 2645 # Don't reference tr2 in hook() so we don't hold a reference.
2642 2646 # This reduces memory consumption when there are multiple
2643 2647 # transactions per lock. This can likely go away if issue5045
2644 2648 # fixes the function accumulation.
2645 2649 hookargs = tr2.hookargs
2646 2650
2647 2651 def hookfunc(unused_success):
2648 2652 repo = reporef()
2649 2653 assert repo is not None # help pytype
2650 2654
2651 2655 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2652 2656 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2653 2657 for name, (old, new) in bmchanges:
2654 2658 args = tr.hookargs.copy()
2655 2659 args.update(bookmarks.preparehookargs(name, old, new))
2656 2660 repo.hook(
2657 2661 b'txnclose-bookmark',
2658 2662 throw=False,
2659 2663 **pycompat.strkwargs(args)
2660 2664 )
2661 2665
2662 2666 if hook.hashook(repo.ui, b'txnclose-phase'):
2663 2667 cl = repo.unfiltered().changelog
2664 2668 phasemv = sorted(
2665 2669 tr.changes[b'phases'], key=lambda r: r[0][0]
2666 2670 )
2667 2671 for revs, (old, new) in phasemv:
2668 2672 for rev in revs:
2669 2673 args = tr.hookargs.copy()
2670 2674 node = hex(cl.node(rev))
2671 2675 args.update(phases.preparehookargs(node, old, new))
2672 2676 repo.hook(
2673 2677 b'txnclose-phase',
2674 2678 throw=False,
2675 2679 **pycompat.strkwargs(args)
2676 2680 )
2677 2681
2678 2682 repo.hook(
2679 2683 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2680 2684 )
2681 2685
2682 2686 repo = reporef()
2683 2687 assert repo is not None # help pytype
2684 2688 repo._afterlock(hookfunc)
2685 2689
2686 2690 tr.addfinalize(b'txnclose-hook', txnclosehook)
2687 2691 # Include a leading "-" to make it happen before the transaction summary
2688 2692 # reports registered via scmutil.registersummarycallback() whose names
2689 2693 # are 00-txnreport etc. That way, the caches will be warm when the
2690 2694 # callbacks run.
2691 2695 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2692 2696
2693 2697 def txnaborthook(tr2):
2694 2698 """To be run if transaction is aborted"""
2695 2699 repo = reporef()
2696 2700 assert repo is not None # help pytype
2697 2701 repo.hook(
2698 2702 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2699 2703 )
2700 2704
2701 2705 tr.addabort(b'txnabort-hook', txnaborthook)
2702 2706 # avoid eager cache invalidation. in-memory data should be identical
2703 2707 # to stored data if transaction has no error.
2704 2708 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2705 2709 self._transref = weakref.ref(tr)
2706 2710 scmutil.registersummarycallback(self, tr, desc)
2707 2711 # This only exist to deal with the need of rollback to have viable
2708 2712 # parents at the end of the operation. So backup viable parents at the
2709 2713 # time of this operation.
2710 2714 #
2711 2715 # We only do it when the `wlock` is taken, otherwise other might be
2712 2716 # altering the dirstate under us.
2713 2717 #
2714 2718 # This is really not a great way to do this (first, because we cannot
2715 2719 # always do it). There are more viable alternative that exists
2716 2720 #
2717 2721 # - backing only the working copy parent in a dedicated files and doing
2718 2722 # a clean "keep-update" to them on `hg rollback`.
2719 2723 #
2720 2724 # - slightly changing the behavior an applying a logic similar to "hg
2721 2725 # strip" to pick a working copy destination on `hg rollback`
2722 2726 if self.currentwlock() is not None:
2723 2727 ds = self.dirstate
2724 2728 if not self.vfs.exists(b'branch'):
2725 2729 # force a file to be written if None exist
2726 2730 ds.setbranch(b'default', None)
2727 2731
2728 2732 def backup_dirstate(tr):
2729 2733 for f in ds.all_file_names():
2730 2734 # hardlink backup is okay because `dirstate` is always
2731 2735 # atomically written and possible data file are append only
2732 2736 # and resistant to trailing data.
2733 2737 tr.addbackup(f, hardlink=True, location=b'plain')
2734 2738
2735 2739 tr.addvalidator(b'dirstate-backup', backup_dirstate)
2736 2740 return tr
2737 2741
2738 2742 def _journalfiles(self):
2739 2743 return (
2740 2744 (self.svfs, b'journal'),
2741 2745 (self.vfs, b'journal.desc'),
2742 2746 )
2743 2747
2744 2748 def undofiles(self):
2745 2749 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2746 2750
2747 2751 @unfilteredmethod
2748 2752 def _writejournal(self, desc):
2749 2753 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2750 2754
2751 2755 def recover(self):
2752 2756 with self.lock():
2753 2757 if self.svfs.exists(b"journal"):
2754 2758 self.ui.status(_(b"rolling back interrupted transaction\n"))
2755 2759 vfsmap = self.vfs_map
2756 2760 transaction.rollback(
2757 2761 self.svfs,
2758 2762 vfsmap,
2759 2763 b"journal",
2760 2764 self.ui.warn,
2761 2765 checkambigfiles=_cachedfiles,
2762 2766 )
2763 2767 self.invalidate()
2764 2768 return True
2765 2769 else:
2766 2770 self.ui.warn(_(b"no interrupted transaction available\n"))
2767 2771 return False
2768 2772
2769 2773 def rollback(self, dryrun=False, force=False):
2770 2774 wlock = lock = None
2771 2775 try:
2772 2776 wlock = self.wlock()
2773 2777 lock = self.lock()
2774 2778 if self.svfs.exists(b"undo"):
2775 2779 return self._rollback(dryrun, force)
2776 2780 else:
2777 2781 self.ui.warn(_(b"no rollback information available\n"))
2778 2782 return 1
2779 2783 finally:
2780 2784 release(lock, wlock)
2781 2785
2782 2786 @unfilteredmethod # Until we get smarter cache management
2783 2787 def _rollback(self, dryrun, force):
2784 2788 ui = self.ui
2785 2789
2786 2790 parents = self.dirstate.parents()
2787 2791 try:
2788 2792 args = self.vfs.read(b'undo.desc').splitlines()
2789 2793 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2790 2794 if len(args) >= 3:
2791 2795 detail = args[2]
2792 2796 oldtip = oldlen - 1
2793 2797
2794 2798 if detail and ui.verbose:
2795 2799 msg = _(
2796 2800 b'repository tip rolled back to revision %d'
2797 2801 b' (undo %s: %s)\n'
2798 2802 ) % (oldtip, desc, detail)
2799 2803 else:
2800 2804 msg = _(
2801 2805 b'repository tip rolled back to revision %d (undo %s)\n'
2802 2806 ) % (oldtip, desc)
2803 2807 parentgone = any(self[p].rev() > oldtip for p in parents)
2804 2808 except IOError:
2805 2809 msg = _(b'rolling back unknown transaction\n')
2806 2810 desc = None
2807 2811 parentgone = True
2808 2812
2809 2813 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2810 2814 raise error.Abort(
2811 2815 _(
2812 2816 b'rollback of last commit while not checked out '
2813 2817 b'may lose data'
2814 2818 ),
2815 2819 hint=_(b'use -f to force'),
2816 2820 )
2817 2821
2818 2822 ui.status(msg)
2819 2823 if dryrun:
2820 2824 return 0
2821 2825
2822 2826 self.destroying()
2823 2827 vfsmap = self.vfs_map
2824 2828 skip_journal_pattern = None
2825 2829 if not parentgone:
2826 2830 skip_journal_pattern = RE_SKIP_DIRSTATE_ROLLBACK
2827 2831 transaction.rollback(
2828 2832 self.svfs,
2829 2833 vfsmap,
2830 2834 b'undo',
2831 2835 ui.warn,
2832 2836 checkambigfiles=_cachedfiles,
2833 2837 skip_journal_pattern=skip_journal_pattern,
2834 2838 )
2835 2839 self.invalidate()
2836 2840 self.dirstate.invalidate()
2837 2841
2838 2842 if parentgone:
2839 2843 # replace this with some explicit parent update in the future.
2840 2844 has_node = self.changelog.index.has_node
2841 2845 if not all(has_node(p) for p in self.dirstate._pl):
2842 2846 # There was no dirstate to backup initially, we need to drop
2843 2847 # the existing one.
2844 2848 with self.dirstate.changing_parents(self):
2845 2849 self.dirstate.setparents(self.nullid)
2846 2850 self.dirstate.clear()
2847 2851
2848 2852 parents = tuple([p.rev() for p in self[None].parents()])
2849 2853 if len(parents) > 1:
2850 2854 ui.status(
2851 2855 _(
2852 2856 b'working directory now based on '
2853 2857 b'revisions %d and %d\n'
2854 2858 )
2855 2859 % parents
2856 2860 )
2857 2861 else:
2858 2862 ui.status(
2859 2863 _(b'working directory now based on revision %d\n') % parents
2860 2864 )
2861 2865 mergestatemod.mergestate.clean(self)
2862 2866
2863 2867 # TODO: if we know which new heads may result from this rollback, pass
2864 2868 # them to destroy(), which will prevent the branchhead cache from being
2865 2869 # invalidated.
2866 2870 self.destroyed()
2867 2871 return 0
2868 2872
2869 2873 def _buildcacheupdater(self, newtransaction):
2870 2874 """called during transaction to build the callback updating cache
2871 2875
2872 2876 Lives on the repository to help extension who might want to augment
2873 2877 this logic. For this purpose, the created transaction is passed to the
2874 2878 method.
2875 2879 """
2876 2880 # we must avoid cyclic reference between repo and transaction.
2877 2881 reporef = weakref.ref(self)
2878 2882
2879 2883 def updater(tr):
2880 2884 repo = reporef()
2881 2885 assert repo is not None # help pytype
2882 2886 repo.updatecaches(tr)
2883 2887
2884 2888 return updater
2885 2889
2886 2890 @unfilteredmethod
2887 2891 def updatecaches(self, tr=None, full=False, caches=None):
2888 2892 """warm appropriate caches
2889 2893
2890 2894 If this function is called after a transaction closed. The transaction
2891 2895 will be available in the 'tr' argument. This can be used to selectively
2892 2896 update caches relevant to the changes in that transaction.
2893 2897
2894 2898 If 'full' is set, make sure all caches the function knows about have
2895 2899 up-to-date data. Even the ones usually loaded more lazily.
2896 2900
2897 2901 The `full` argument can take a special "post-clone" value. In this case
2898 2902 the cache warming is made after a clone and of the slower cache might
2899 2903 be skipped, namely the `.fnodetags` one. This argument is 5.8 specific
2900 2904 as we plan for a cleaner way to deal with this for 5.9.
2901 2905 """
2902 2906 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2903 2907 # During strip, many caches are invalid but
2904 2908 # later call to `destroyed` will refresh them.
2905 2909 return
2906 2910
2907 2911 unfi = self.unfiltered()
2908 2912
2909 2913 if full:
2910 2914 msg = (
2911 2915 "`full` argument for `repo.updatecaches` is deprecated\n"
2912 2916 "(use `caches=repository.CACHE_ALL` instead)"
2913 2917 )
2914 2918 self.ui.deprecwarn(msg, b"5.9")
2915 2919 caches = repository.CACHES_ALL
2916 2920 if full == b"post-clone":
2917 2921 caches = repository.CACHES_POST_CLONE
2918 2922 caches = repository.CACHES_ALL
2919 2923 elif caches is None:
2920 2924 caches = repository.CACHES_DEFAULT
2921 2925
2922 2926 if repository.CACHE_BRANCHMAP_SERVED in caches:
2923 2927 if tr is None or tr.changes[b'origrepolen'] < len(self):
2924 2928 # accessing the 'served' branchmap should refresh all the others,
2925 2929 self.ui.debug(b'updating the branch cache\n')
2926 2930 self.filtered(b'served').branchmap()
2927 2931 self.filtered(b'served.hidden').branchmap()
2928 2932 # flush all possibly delayed write.
2929 2933 self._branchcaches.write_delayed(self)
2930 2934
2931 2935 if repository.CACHE_CHANGELOG_CACHE in caches:
2932 2936 self.changelog.update_caches(transaction=tr)
2933 2937
2934 2938 if repository.CACHE_MANIFESTLOG_CACHE in caches:
2935 2939 self.manifestlog.update_caches(transaction=tr)
2936 2940 for entry in self.store.walk():
2937 2941 if not entry.is_revlog:
2938 2942 continue
2939 2943 if not entry.is_manifestlog:
2940 2944 continue
2941 2945 manifestrevlog = entry.get_revlog_instance(self).get_revlog()
2942 2946 if manifestrevlog is not None:
2943 2947 manifestrevlog.update_caches(transaction=tr)
2944 2948
2945 2949 if repository.CACHE_REV_BRANCH in caches:
2946 2950 rbc = unfi.revbranchcache()
2947 2951 for r in unfi.changelog:
2948 2952 rbc.branchinfo(r)
2949 2953 rbc.write()
2950 2954
2951 2955 if repository.CACHE_FULL_MANIFEST in caches:
2952 2956 # ensure the working copy parents are in the manifestfulltextcache
2953 2957 for ctx in self[b'.'].parents():
2954 2958 ctx.manifest() # accessing the manifest is enough
2955 2959
2956 2960 if repository.CACHE_FILE_NODE_TAGS in caches:
2957 2961 # accessing fnode cache warms the cache
2958 2962 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2959 2963
2960 2964 if repository.CACHE_TAGS_DEFAULT in caches:
2961 2965 # accessing tags warm the cache
2962 2966 self.tags()
2963 2967 if repository.CACHE_TAGS_SERVED in caches:
2964 2968 self.filtered(b'served').tags()
2965 2969
2966 2970 if repository.CACHE_BRANCHMAP_ALL in caches:
2967 2971 # The CACHE_BRANCHMAP_ALL updates lazily-loaded caches immediately,
2968 2972 # so we're forcing a write to cause these caches to be warmed up
2969 2973 # even if they haven't explicitly been requested yet (if they've
2970 2974 # never been used by hg, they won't ever have been written, even if
2971 2975 # they're a subset of another kind of cache that *has* been used).
2972 2976 for filt in repoview.filtertable.keys():
2973 2977 filtered = self.filtered(filt)
2974 2978 filtered.branchmap().write(filtered)
2975 2979
2976 2980 def invalidatecaches(self):
2977 2981 if '_tagscache' in vars(self):
2978 2982 # can't use delattr on proxy
2979 2983 del self.__dict__['_tagscache']
2980 2984
2981 2985 self._branchcaches.clear()
2982 2986 self.invalidatevolatilesets()
2983 2987 self._sparsesignaturecache.clear()
2984 2988
2985 2989 def invalidatevolatilesets(self):
2986 2990 self.filteredrevcache.clear()
2987 2991 obsolete.clearobscaches(self)
2988 2992 self._quick_access_changeid_invalidate()
2989 2993
2990 2994 def invalidatedirstate(self):
2991 2995 """Invalidates the dirstate, causing the next call to dirstate
2992 2996 to check if it was modified since the last time it was read,
2993 2997 rereading it if it has.
2994 2998
2995 2999 This is different to dirstate.invalidate() that it doesn't always
2996 3000 rereads the dirstate. Use dirstate.invalidate() if you want to
2997 3001 explicitly read the dirstate again (i.e. restoring it to a previous
2998 3002 known good state)."""
2999 3003 unfi = self.unfiltered()
3000 3004 if 'dirstate' in unfi.__dict__:
3001 3005 assert not self.dirstate.is_changing_any
3002 3006 del unfi.__dict__['dirstate']
3003 3007
3004 3008 def invalidate(self, clearfilecache=False):
3005 3009 """Invalidates both store and non-store parts other than dirstate
3006 3010
3007 3011 If a transaction is running, invalidation of store is omitted,
3008 3012 because discarding in-memory changes might cause inconsistency
3009 3013 (e.g. incomplete fncache causes unintentional failure, but
3010 3014 redundant one doesn't).
3011 3015 """
3012 3016 unfiltered = self.unfiltered() # all file caches are stored unfiltered
3013 3017 for k in list(self._filecache.keys()):
3014 3018 if (
3015 3019 k == b'changelog'
3016 3020 and self.currenttransaction()
3017 3021 and self.changelog._delayed
3018 3022 ):
3019 3023 # The changelog object may store unwritten revisions. We don't
3020 3024 # want to lose them.
3021 3025 # TODO: Solve the problem instead of working around it.
3022 3026 continue
3023 3027
3024 3028 if clearfilecache:
3025 3029 del self._filecache[k]
3026 3030 try:
3027 3031 # XXX ideally, the key would be a unicode string to match the
3028 3032 # fact it refers to an attribut name. However changing this was
3029 3033 # a bit a scope creep compared to the series cleaning up
3030 3034 # del/set/getattr so we kept thing simple here.
3031 3035 delattr(unfiltered, pycompat.sysstr(k))
3032 3036 except AttributeError:
3033 3037 pass
3034 3038 self.invalidatecaches()
3035 3039 if not self.currenttransaction():
3036 3040 # TODO: Changing contents of store outside transaction
3037 3041 # causes inconsistency. We should make in-memory store
3038 3042 # changes detectable, and abort if changed.
3039 3043 self.store.invalidatecaches()
3040 3044
3041 3045 def invalidateall(self):
3042 3046 """Fully invalidates both store and non-store parts, causing the
3043 3047 subsequent operation to reread any outside changes."""
3044 3048 # extension should hook this to invalidate its caches
3045 3049 self.invalidate()
3046 3050 self.invalidatedirstate()
3047 3051
3048 3052 @unfilteredmethod
3049 3053 def _refreshfilecachestats(self, tr):
3050 3054 """Reload stats of cached files so that they are flagged as valid"""
3051 3055 for k, ce in self._filecache.items():
3052 3056 k = pycompat.sysstr(k)
3053 3057 if k == 'dirstate' or k not in self.__dict__:
3054 3058 continue
3055 3059 ce.refresh()
3056 3060
3057 3061 def _lock(
3058 3062 self,
3059 3063 vfs,
3060 3064 lockname,
3061 3065 wait,
3062 3066 releasefn,
3063 3067 acquirefn,
3064 3068 desc,
3065 3069 ):
3066 3070 timeout = 0
3067 3071 warntimeout = 0
3068 3072 if wait:
3069 3073 timeout = self.ui.configint(b"ui", b"timeout")
3070 3074 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
3071 3075 # internal config: ui.signal-safe-lock
3072 3076 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
3073 3077
3074 3078 l = lockmod.trylock(
3075 3079 self.ui,
3076 3080 vfs,
3077 3081 lockname,
3078 3082 timeout,
3079 3083 warntimeout,
3080 3084 releasefn=releasefn,
3081 3085 acquirefn=acquirefn,
3082 3086 desc=desc,
3083 3087 signalsafe=signalsafe,
3084 3088 )
3085 3089 return l
3086 3090
3087 3091 def _afterlock(self, callback):
3088 3092 """add a callback to be run when the repository is fully unlocked
3089 3093
3090 3094 The callback will be executed when the outermost lock is released
3091 3095 (with wlock being higher level than 'lock')."""
3092 3096 for ref in (self._wlockref, self._lockref):
3093 3097 l = ref and ref()
3094 3098 if l and l.held:
3095 3099 l.postrelease.append(callback)
3096 3100 break
3097 3101 else: # no lock have been found.
3098 3102 callback(True)
3099 3103
3100 3104 def lock(self, wait=True):
3101 3105 """Lock the repository store (.hg/store) and return a weak reference
3102 3106 to the lock. Use this before modifying the store (e.g. committing or
3103 3107 stripping). If you are opening a transaction, get a lock as well.)
3104 3108
3105 3109 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3106 3110 'wlock' first to avoid a dead-lock hazard."""
3107 3111 l = self._currentlock(self._lockref)
3108 3112 if l is not None:
3109 3113 l.lock()
3110 3114 return l
3111 3115
3112 3116 l = self._lock(
3113 3117 vfs=self.svfs,
3114 3118 lockname=b"lock",
3115 3119 wait=wait,
3116 3120 releasefn=None,
3117 3121 acquirefn=self.invalidate,
3118 3122 desc=_(b'repository %s') % self.origroot,
3119 3123 )
3120 3124 self._lockref = weakref.ref(l)
3121 3125 return l
3122 3126
3123 3127 def wlock(self, wait=True):
3124 3128 """Lock the non-store parts of the repository (everything under
3125 3129 .hg except .hg/store) and return a weak reference to the lock.
3126 3130
3127 3131 Use this before modifying files in .hg.
3128 3132
3129 3133 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3130 3134 'wlock' first to avoid a dead-lock hazard."""
3131 3135 l = self._wlockref() if self._wlockref else None
3132 3136 if l is not None and l.held:
3133 3137 l.lock()
3134 3138 return l
3135 3139
3136 3140 # We do not need to check for non-waiting lock acquisition. Such
3137 3141 # acquisition would not cause dead-lock as they would just fail.
3138 3142 if wait and (
3139 3143 self.ui.configbool(b'devel', b'all-warnings')
3140 3144 or self.ui.configbool(b'devel', b'check-locks')
3141 3145 ):
3142 3146 if self._currentlock(self._lockref) is not None:
3143 3147 self.ui.develwarn(b'"wlock" acquired after "lock"')
3144 3148
3145 3149 def unlock():
3146 3150 if self.dirstate.is_changing_any:
3147 3151 msg = b"wlock release in the middle of a changing parents"
3148 3152 self.ui.develwarn(msg)
3149 3153 self.dirstate.invalidate()
3150 3154 else:
3151 3155 if self.dirstate._dirty:
3152 3156 msg = b"dirty dirstate on wlock release"
3153 3157 self.ui.develwarn(msg)
3154 3158 self.dirstate.write(None)
3155 3159
3156 3160 unfi = self.unfiltered()
3157 3161 if 'dirstate' in unfi.__dict__:
3158 3162 del unfi.__dict__['dirstate']
3159 3163
3160 3164 l = self._lock(
3161 3165 self.vfs,
3162 3166 b"wlock",
3163 3167 wait,
3164 3168 unlock,
3165 3169 self.invalidatedirstate,
3166 3170 _(b'working directory of %s') % self.origroot,
3167 3171 )
3168 3172 self._wlockref = weakref.ref(l)
3169 3173 return l
3170 3174
3171 3175 def _currentlock(self, lockref):
3172 3176 """Returns the lock if it's held, or None if it's not."""
3173 3177 if lockref is None:
3174 3178 return None
3175 3179 l = lockref()
3176 3180 if l is None or not l.held:
3177 3181 return None
3178 3182 return l
3179 3183
3180 3184 def currentwlock(self):
3181 3185 """Returns the wlock if it's held, or None if it's not."""
3182 3186 return self._currentlock(self._wlockref)
3183 3187
3184 3188 def currentlock(self):
3185 3189 """Returns the lock if it's held, or None if it's not."""
3186 3190 return self._currentlock(self._lockref)
3187 3191
3188 3192 def checkcommitpatterns(self, wctx, match, status, fail):
3189 3193 """check for commit arguments that aren't committable"""
3190 3194 if match.isexact() or match.prefix():
3191 3195 matched = set(status.modified + status.added + status.removed)
3192 3196
3193 3197 for f in match.files():
3194 3198 f = self.dirstate.normalize(f)
3195 3199 if f == b'.' or f in matched or f in wctx.substate:
3196 3200 continue
3197 3201 if f in status.deleted:
3198 3202 fail(f, _(b'file not found!'))
3199 3203 # Is it a directory that exists or used to exist?
3200 3204 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
3201 3205 d = f + b'/'
3202 3206 for mf in matched:
3203 3207 if mf.startswith(d):
3204 3208 break
3205 3209 else:
3206 3210 fail(f, _(b"no match under directory!"))
3207 3211 elif f not in self.dirstate:
3208 3212 fail(f, _(b"file not tracked!"))
3209 3213
3210 3214 @unfilteredmethod
3211 3215 def commit(
3212 3216 self,
3213 3217 text=b"",
3214 3218 user=None,
3215 3219 date=None,
3216 3220 match=None,
3217 3221 force=False,
3218 3222 editor=None,
3219 3223 extra=None,
3220 3224 ):
3221 3225 """Add a new revision to current repository.
3222 3226
3223 3227 Revision information is gathered from the working directory,
3224 3228 match can be used to filter the committed files. If editor is
3225 3229 supplied, it is called to get a commit message.
3226 3230 """
3227 3231 if extra is None:
3228 3232 extra = {}
3229 3233
3230 3234 def fail(f, msg):
3231 3235 raise error.InputError(b'%s: %s' % (f, msg))
3232 3236
3233 3237 if not match:
3234 3238 match = matchmod.always()
3235 3239
3236 3240 if not force:
3237 3241 match.bad = fail
3238 3242
3239 3243 # lock() for recent changelog (see issue4368)
3240 3244 with self.wlock(), self.lock():
3241 3245 wctx = self[None]
3242 3246 merge = len(wctx.parents()) > 1
3243 3247
3244 3248 if not force and merge and not match.always():
3245 3249 raise error.Abort(
3246 3250 _(
3247 3251 b'cannot partially commit a merge '
3248 3252 b'(do not specify files or patterns)'
3249 3253 )
3250 3254 )
3251 3255
3252 3256 status = self.status(match=match, clean=force)
3253 3257 if force:
3254 3258 status.modified.extend(
3255 3259 status.clean
3256 3260 ) # mq may commit clean files
3257 3261
3258 3262 # check subrepos
3259 3263 subs, commitsubs, newstate = subrepoutil.precommit(
3260 3264 self.ui, wctx, status, match, force=force
3261 3265 )
3262 3266
3263 3267 # make sure all explicit patterns are matched
3264 3268 if not force:
3265 3269 self.checkcommitpatterns(wctx, match, status, fail)
3266 3270
3267 3271 cctx = context.workingcommitctx(
3268 3272 self, status, text, user, date, extra
3269 3273 )
3270 3274
3271 3275 ms = mergestatemod.mergestate.read(self)
3272 3276 mergeutil.checkunresolved(ms)
3273 3277
3274 3278 # internal config: ui.allowemptycommit
3275 3279 if cctx.isempty() and not self.ui.configbool(
3276 3280 b'ui', b'allowemptycommit'
3277 3281 ):
3278 3282 self.ui.debug(b'nothing to commit, clearing merge state\n')
3279 3283 ms.reset()
3280 3284 return None
3281 3285
3282 3286 if merge and cctx.deleted():
3283 3287 raise error.Abort(_(b"cannot commit merge with missing files"))
3284 3288
3285 3289 if editor:
3286 3290 cctx._text = editor(self, cctx, subs)
3287 3291 edited = text != cctx._text
3288 3292
3289 3293 # Save commit message in case this transaction gets rolled back
3290 3294 # (e.g. by a pretxncommit hook). Leave the content alone on
3291 3295 # the assumption that the user will use the same editor again.
3292 3296 msg_path = self.savecommitmessage(cctx._text)
3293 3297
3294 3298 # commit subs and write new state
3295 3299 if subs:
3296 3300 uipathfn = scmutil.getuipathfn(self)
3297 3301 for s in sorted(commitsubs):
3298 3302 sub = wctx.sub(s)
3299 3303 self.ui.status(
3300 3304 _(b'committing subrepository %s\n')
3301 3305 % uipathfn(subrepoutil.subrelpath(sub))
3302 3306 )
3303 3307 sr = sub.commit(cctx._text, user, date)
3304 3308 newstate[s] = (newstate[s][0], sr)
3305 3309 subrepoutil.writestate(self, newstate)
3306 3310
3307 3311 p1, p2 = self.dirstate.parents()
3308 3312 hookp1, hookp2 = hex(p1), (p2 != self.nullid and hex(p2) or b'')
3309 3313 try:
3310 3314 self.hook(
3311 3315 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3312 3316 )
3313 3317 with self.transaction(b'commit'):
3314 3318 ret = self.commitctx(cctx, True)
3315 3319 # update bookmarks, dirstate and mergestate
3316 3320 bookmarks.update(self, [p1, p2], ret)
3317 3321 cctx.markcommitted(ret)
3318 3322 ms.reset()
3319 3323 except: # re-raises
3320 3324 if edited:
3321 3325 self.ui.write(
3322 3326 _(b'note: commit message saved in %s\n') % msg_path
3323 3327 )
3324 3328 self.ui.write(
3325 3329 _(
3326 3330 b"note: use 'hg commit --logfile "
3327 3331 b"%s --edit' to reuse it\n"
3328 3332 )
3329 3333 % msg_path
3330 3334 )
3331 3335 raise
3332 3336
3333 3337 def commithook(unused_success):
3334 3338 # hack for command that use a temporary commit (eg: histedit)
3335 3339 # temporary commit got stripped before hook release
3336 3340 if self.changelog.hasnode(ret):
3337 3341 self.hook(
3338 3342 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3339 3343 )
3340 3344
3341 3345 self._afterlock(commithook)
3342 3346 return ret
3343 3347
3344 3348 @unfilteredmethod
3345 3349 def commitctx(self, ctx, error=False, origctx=None):
3346 3350 return commit.commitctx(self, ctx, error=error, origctx=origctx)
3347 3351
3348 3352 @unfilteredmethod
3349 3353 def destroying(self):
3350 3354 """Inform the repository that nodes are about to be destroyed.
3351 3355 Intended for use by strip and rollback, so there's a common
3352 3356 place for anything that has to be done before destroying history.
3353 3357
3354 3358 This is mostly useful for saving state that is in memory and waiting
3355 3359 to be flushed when the current lock is released. Because a call to
3356 3360 destroyed is imminent, the repo will be invalidated causing those
3357 3361 changes to stay in memory (waiting for the next unlock), or vanish
3358 3362 completely.
3359 3363 """
3360 3364 # When using the same lock to commit and strip, the phasecache is left
3361 3365 # dirty after committing. Then when we strip, the repo is invalidated,
3362 3366 # causing those changes to disappear.
3363 3367 if '_phasecache' in vars(self):
3364 3368 self._phasecache.write()
3365 3369
3366 3370 @unfilteredmethod
3367 3371 def destroyed(self):
3368 3372 """Inform the repository that nodes have been destroyed.
3369 3373 Intended for use by strip and rollback, so there's a common
3370 3374 place for anything that has to be done after destroying history.
3371 3375 """
3372 3376 # When one tries to:
3373 3377 # 1) destroy nodes thus calling this method (e.g. strip)
3374 3378 # 2) use phasecache somewhere (e.g. commit)
3375 3379 #
3376 3380 # then 2) will fail because the phasecache contains nodes that were
3377 3381 # removed. We can either remove phasecache from the filecache,
3378 3382 # causing it to reload next time it is accessed, or simply filter
3379 3383 # the removed nodes now and write the updated cache.
3380 3384 self._phasecache.filterunknown(self)
3381 3385 self._phasecache.write()
3382 3386
3383 3387 # refresh all repository caches
3384 3388 self.updatecaches()
3385 3389
3386 3390 # Ensure the persistent tag cache is updated. Doing it now
3387 3391 # means that the tag cache only has to worry about destroyed
3388 3392 # heads immediately after a strip/rollback. That in turn
3389 3393 # guarantees that "cachetip == currenttip" (comparing both rev
3390 3394 # and node) always means no nodes have been added or destroyed.
3391 3395
3392 3396 # XXX this is suboptimal when qrefresh'ing: we strip the current
3393 3397 # head, refresh the tag cache, then immediately add a new head.
3394 3398 # But I think doing it this way is necessary for the "instant
3395 3399 # tag cache retrieval" case to work.
3396 3400 self.invalidate()
3397 3401
3398 3402 def status(
3399 3403 self,
3400 3404 node1=b'.',
3401 3405 node2=None,
3402 3406 match=None,
3403 3407 ignored=False,
3404 3408 clean=False,
3405 3409 unknown=False,
3406 3410 listsubrepos=False,
3407 3411 ):
3408 3412 '''a convenience method that calls node1.status(node2)'''
3409 3413 return self[node1].status(
3410 3414 node2, match, ignored, clean, unknown, listsubrepos
3411 3415 )
3412 3416
3413 3417 def addpostdsstatus(self, ps):
3414 3418 """Add a callback to run within the wlock, at the point at which status
3415 3419 fixups happen.
3416 3420
3417 3421 On status completion, callback(wctx, status) will be called with the
3418 3422 wlock held, unless the dirstate has changed from underneath or the wlock
3419 3423 couldn't be grabbed.
3420 3424
3421 3425 Callbacks should not capture and use a cached copy of the dirstate --
3422 3426 it might change in the meanwhile. Instead, they should access the
3423 3427 dirstate via wctx.repo().dirstate.
3424 3428
3425 3429 This list is emptied out after each status run -- extensions should
3426 3430 make sure it adds to this list each time dirstate.status is called.
3427 3431 Extensions should also make sure they don't call this for statuses
3428 3432 that don't involve the dirstate.
3429 3433 """
3430 3434
3431 3435 # The list is located here for uniqueness reasons -- it is actually
3432 3436 # managed by the workingctx, but that isn't unique per-repo.
3433 3437 self._postdsstatus.append(ps)
3434 3438
3435 3439 def postdsstatus(self):
3436 3440 """Used by workingctx to get the list of post-dirstate-status hooks."""
3437 3441 return self._postdsstatus
3438 3442
3439 3443 def clearpostdsstatus(self):
3440 3444 """Used by workingctx to clear post-dirstate-status hooks."""
3441 3445 del self._postdsstatus[:]
3442 3446
3443 3447 def heads(self, start=None):
3444 3448 if start is None:
3445 3449 cl = self.changelog
3446 3450 headrevs = reversed(cl.headrevs())
3447 3451 return [cl.node(rev) for rev in headrevs]
3448 3452
3449 3453 heads = self.changelog.heads(start)
3450 3454 # sort the output in rev descending order
3451 3455 return sorted(heads, key=self.changelog.rev, reverse=True)
3452 3456
3453 3457 def branchheads(self, branch=None, start=None, closed=False):
3454 3458 """return a (possibly filtered) list of heads for the given branch
3455 3459
3456 3460 Heads are returned in topological order, from newest to oldest.
3457 3461 If branch is None, use the dirstate branch.
3458 3462 If start is not None, return only heads reachable from start.
3459 3463 If closed is True, return heads that are marked as closed as well.
3460 3464 """
3461 3465 if branch is None:
3462 3466 branch = self[None].branch()
3463 3467 branches = self.branchmap()
3464 3468 if not branches.hasbranch(branch):
3465 3469 return []
3466 3470 # the cache returns heads ordered lowest to highest
3467 3471 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3468 3472 if start is not None:
3469 3473 # filter out the heads that cannot be reached from startrev
3470 3474 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3471 3475 bheads = [h for h in bheads if h in fbheads]
3472 3476 return bheads
3473 3477
3474 3478 def branches(self, nodes):
3475 3479 if not nodes:
3476 3480 nodes = [self.changelog.tip()]
3477 3481 b = []
3478 3482 for n in nodes:
3479 3483 t = n
3480 3484 while True:
3481 3485 p = self.changelog.parents(n)
3482 3486 if p[1] != self.nullid or p[0] == self.nullid:
3483 3487 b.append((t, n, p[0], p[1]))
3484 3488 break
3485 3489 n = p[0]
3486 3490 return b
3487 3491
3488 3492 def between(self, pairs):
3489 3493 r = []
3490 3494
3491 3495 for top, bottom in pairs:
3492 3496 n, l, i = top, [], 0
3493 3497 f = 1
3494 3498
3495 3499 while n != bottom and n != self.nullid:
3496 3500 p = self.changelog.parents(n)[0]
3497 3501 if i == f:
3498 3502 l.append(n)
3499 3503 f = f * 2
3500 3504 n = p
3501 3505 i += 1
3502 3506
3503 3507 r.append(l)
3504 3508
3505 3509 return r
3506 3510
3507 3511 def checkpush(self, pushop):
3508 3512 """Extensions can override this function if additional checks have
3509 3513 to be performed before pushing, or call it if they override push
3510 3514 command.
3511 3515 """
3512 3516
3513 3517 @unfilteredpropertycache
3514 3518 def prepushoutgoinghooks(self):
3515 3519 """Return util.hooks consists of a pushop with repo, remote, outgoing
3516 3520 methods, which are called before pushing changesets.
3517 3521 """
3518 3522 return util.hooks()
3519 3523
3520 3524 def pushkey(self, namespace, key, old, new):
3521 3525 try:
3522 3526 tr = self.currenttransaction()
3523 3527 hookargs = {}
3524 3528 if tr is not None:
3525 3529 hookargs.update(tr.hookargs)
3526 3530 hookargs = pycompat.strkwargs(hookargs)
3527 3531 hookargs['namespace'] = namespace
3528 3532 hookargs['key'] = key
3529 3533 hookargs['old'] = old
3530 3534 hookargs['new'] = new
3531 3535 self.hook(b'prepushkey', throw=True, **hookargs)
3532 3536 except error.HookAbort as exc:
3533 3537 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3534 3538 if exc.hint:
3535 3539 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3536 3540 return False
3537 3541 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3538 3542 ret = pushkey.push(self, namespace, key, old, new)
3539 3543
3540 3544 def runhook(unused_success):
3541 3545 self.hook(
3542 3546 b'pushkey',
3543 3547 namespace=namespace,
3544 3548 key=key,
3545 3549 old=old,
3546 3550 new=new,
3547 3551 ret=ret,
3548 3552 )
3549 3553
3550 3554 self._afterlock(runhook)
3551 3555 return ret
3552 3556
3553 3557 def listkeys(self, namespace):
3554 3558 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3555 3559 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3556 3560 values = pushkey.list(self, namespace)
3557 3561 self.hook(b'listkeys', namespace=namespace, values=values)
3558 3562 return values
3559 3563
3560 3564 def debugwireargs(self, one, two, three=None, four=None, five=None):
3561 3565 '''used to test argument passing over the wire'''
3562 3566 return b"%s %s %s %s %s" % (
3563 3567 one,
3564 3568 two,
3565 3569 pycompat.bytestr(three),
3566 3570 pycompat.bytestr(four),
3567 3571 pycompat.bytestr(five),
3568 3572 )
3569 3573
3570 3574 def savecommitmessage(self, text):
3571 3575 fp = self.vfs(b'last-message.txt', b'wb')
3572 3576 try:
3573 3577 fp.write(text)
3574 3578 finally:
3575 3579 fp.close()
3576 3580 return self.pathto(fp.name[len(self.root) + 1 :])
3577 3581
3578 3582 def register_wanted_sidedata(self, category):
3579 3583 if repository.REPO_FEATURE_SIDE_DATA not in self.features:
3580 3584 # Only revlogv2 repos can want sidedata.
3581 3585 return
3582 3586 self._wanted_sidedata.add(pycompat.bytestr(category))
3583 3587
3584 3588 def register_sidedata_computer(
3585 3589 self, kind, category, keys, computer, flags, replace=False
3586 3590 ):
3587 3591 if kind not in revlogconst.ALL_KINDS:
3588 3592 msg = _(b"unexpected revlog kind '%s'.")
3589 3593 raise error.ProgrammingError(msg % kind)
3590 3594 category = pycompat.bytestr(category)
3591 3595 already_registered = category in self._sidedata_computers.get(kind, [])
3592 3596 if already_registered and not replace:
3593 3597 msg = _(
3594 3598 b"cannot register a sidedata computer twice for category '%s'."
3595 3599 )
3596 3600 raise error.ProgrammingError(msg % category)
3597 3601 if replace and not already_registered:
3598 3602 msg = _(
3599 3603 b"cannot replace a sidedata computer that isn't registered "
3600 3604 b"for category '%s'."
3601 3605 )
3602 3606 raise error.ProgrammingError(msg % category)
3603 3607 self._sidedata_computers.setdefault(kind, {})
3604 3608 self._sidedata_computers[kind][category] = (keys, computer, flags)
3605 3609
3606 3610
3607 3611 def undoname(fn: bytes) -> bytes:
3608 3612 base, name = os.path.split(fn)
3609 3613 assert name.startswith(b'journal')
3610 3614 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3611 3615
3612 3616
3613 3617 def instance(ui, path: bytes, create, intents=None, createopts=None):
3614 3618 # prevent cyclic import localrepo -> upgrade -> localrepo
3615 3619 from . import upgrade
3616 3620
3617 3621 localpath = urlutil.urllocalpath(path)
3618 3622 if create:
3619 3623 createrepository(ui, localpath, createopts=createopts)
3620 3624
3621 3625 def repo_maker():
3622 3626 return makelocalrepository(ui, localpath, intents=intents)
3623 3627
3624 3628 repo = repo_maker()
3625 3629 repo = upgrade.may_auto_upgrade(repo, repo_maker)
3626 3630 return repo
3627 3631
3628 3632
3629 3633 def islocal(path: bytes) -> bool:
3630 3634 return True
3631 3635
3632 3636
3633 3637 def defaultcreateopts(ui, createopts=None):
3634 3638 """Populate the default creation options for a repository.
3635 3639
3636 3640 A dictionary of explicitly requested creation options can be passed
3637 3641 in. Missing keys will be populated.
3638 3642 """
3639 3643 createopts = dict(createopts or {})
3640 3644
3641 3645 if b'backend' not in createopts:
3642 3646 # experimental config: storage.new-repo-backend
3643 3647 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3644 3648
3645 3649 return createopts
3646 3650
3647 3651
3648 3652 def clone_requirements(ui, createopts, srcrepo):
3649 3653 """clone the requirements of a local repo for a local clone
3650 3654
3651 3655 The store requirements are unchanged while the working copy requirements
3652 3656 depends on the configuration
3653 3657 """
3654 3658 target_requirements = set()
3655 3659 if not srcrepo.requirements:
3656 3660 # this is a legacy revlog "v0" repository, we cannot do anything fancy
3657 3661 # with it.
3658 3662 return target_requirements
3659 3663 createopts = defaultcreateopts(ui, createopts=createopts)
3660 3664 for r in newreporequirements(ui, createopts):
3661 3665 if r in requirementsmod.WORKING_DIR_REQUIREMENTS:
3662 3666 target_requirements.add(r)
3663 3667
3664 3668 for r in srcrepo.requirements:
3665 3669 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS:
3666 3670 target_requirements.add(r)
3667 3671 return target_requirements
3668 3672
3669 3673
3670 3674 def newreporequirements(ui, createopts):
3671 3675 """Determine the set of requirements for a new local repository.
3672 3676
3673 3677 Extensions can wrap this function to specify custom requirements for
3674 3678 new repositories.
3675 3679 """
3676 3680
3677 3681 if b'backend' not in createopts:
3678 3682 raise error.ProgrammingError(
3679 3683 b'backend key not present in createopts; '
3680 3684 b'was defaultcreateopts() called?'
3681 3685 )
3682 3686
3683 3687 if createopts[b'backend'] != b'revlogv1':
3684 3688 raise error.Abort(
3685 3689 _(
3686 3690 b'unable to determine repository requirements for '
3687 3691 b'storage backend: %s'
3688 3692 )
3689 3693 % createopts[b'backend']
3690 3694 )
3691 3695
3692 3696 requirements = {requirementsmod.REVLOGV1_REQUIREMENT}
3693 3697 if ui.configbool(b'format', b'usestore'):
3694 3698 requirements.add(requirementsmod.STORE_REQUIREMENT)
3695 3699 if ui.configbool(b'format', b'usefncache'):
3696 3700 requirements.add(requirementsmod.FNCACHE_REQUIREMENT)
3697 3701 if ui.configbool(b'format', b'dotencode'):
3698 3702 requirements.add(requirementsmod.DOTENCODE_REQUIREMENT)
3699 3703
3700 3704 compengines = ui.configlist(b'format', b'revlog-compression')
3701 3705 for compengine in compengines:
3702 3706 if compengine in util.compengines:
3703 3707 engine = util.compengines[compengine]
3704 3708 if engine.available() and engine.revlogheader():
3705 3709 break
3706 3710 else:
3707 3711 raise error.Abort(
3708 3712 _(
3709 3713 b'compression engines %s defined by '
3710 3714 b'format.revlog-compression not available'
3711 3715 )
3712 3716 % b', '.join(b'"%s"' % e for e in compengines),
3713 3717 hint=_(
3714 3718 b'run "hg debuginstall" to list available '
3715 3719 b'compression engines'
3716 3720 ),
3717 3721 )
3718 3722
3719 3723 # zlib is the historical default and doesn't need an explicit requirement.
3720 3724 if compengine == b'zstd':
3721 3725 requirements.add(b'revlog-compression-zstd')
3722 3726 elif compengine != b'zlib':
3723 3727 requirements.add(b'exp-compression-%s' % compengine)
3724 3728
3725 3729 if scmutil.gdinitconfig(ui):
3726 3730 requirements.add(requirementsmod.GENERALDELTA_REQUIREMENT)
3727 3731 if ui.configbool(b'format', b'sparse-revlog'):
3728 3732 requirements.add(requirementsmod.SPARSEREVLOG_REQUIREMENT)
3729 3733
3730 3734 # experimental config: format.use-dirstate-v2
3731 3735 # Keep this logic in sync with `has_dirstate_v2()` in `tests/hghave.py`
3732 3736 if ui.configbool(b'format', b'use-dirstate-v2'):
3733 3737 requirements.add(requirementsmod.DIRSTATE_V2_REQUIREMENT)
3734 3738
3735 3739 # experimental config: format.exp-use-copies-side-data-changeset
3736 3740 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3737 3741 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3738 3742 requirements.add(requirementsmod.COPIESSDC_REQUIREMENT)
3739 3743 if ui.configbool(b'experimental', b'treemanifest'):
3740 3744 requirements.add(requirementsmod.TREEMANIFEST_REQUIREMENT)
3741 3745
3742 3746 changelogv2 = ui.config(b'format', b'exp-use-changelog-v2')
3743 3747 if changelogv2 == b'enable-unstable-format-and-corrupt-my-data':
3744 3748 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3745 3749
3746 3750 revlogv2 = ui.config(b'experimental', b'revlogv2')
3747 3751 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3748 3752 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3749 3753 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3750 3754 # experimental config: format.internal-phase
3751 3755 if ui.configbool(b'format', b'use-internal-phase'):
3752 3756 requirements.add(requirementsmod.INTERNAL_PHASE_REQUIREMENT)
3753 3757
3754 3758 # experimental config: format.exp-archived-phase
3755 3759 if ui.configbool(b'format', b'exp-archived-phase'):
3756 3760 requirements.add(requirementsmod.ARCHIVED_PHASE_REQUIREMENT)
3757 3761
3758 3762 if createopts.get(b'narrowfiles'):
3759 3763 requirements.add(requirementsmod.NARROW_REQUIREMENT)
3760 3764
3761 3765 if createopts.get(b'lfs'):
3762 3766 requirements.add(b'lfs')
3763 3767
3764 3768 if ui.configbool(b'format', b'bookmarks-in-store'):
3765 3769 requirements.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3766 3770
3767 3771 # The feature is disabled unless a fast implementation is available.
3768 3772 persistent_nodemap_default = policy.importrust('revlog') is not None
3769 3773 if ui.configbool(
3770 3774 b'format', b'use-persistent-nodemap', persistent_nodemap_default
3771 3775 ):
3772 3776 requirements.add(requirementsmod.NODEMAP_REQUIREMENT)
3773 3777
3774 3778 # if share-safe is enabled, let's create the new repository with the new
3775 3779 # requirement
3776 3780 if ui.configbool(b'format', b'use-share-safe'):
3777 3781 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
3778 3782
3779 3783 # if we are creating a share-repoΒΉ we have to handle requirement
3780 3784 # differently.
3781 3785 #
3782 3786 # [1] (i.e. reusing the store from another repository, just having a
3783 3787 # working copy)
3784 3788 if b'sharedrepo' in createopts:
3785 3789 source_requirements = set(createopts[b'sharedrepo'].requirements)
3786 3790
3787 3791 if requirementsmod.SHARESAFE_REQUIREMENT not in source_requirements:
3788 3792 # share to an old school repository, we have to copy the
3789 3793 # requirements and hope for the best.
3790 3794 requirements = source_requirements
3791 3795 else:
3792 3796 # We have control on the working copy only, so "copy" the non
3793 3797 # working copy part over, ignoring previous logic.
3794 3798 to_drop = set()
3795 3799 for req in requirements:
3796 3800 if req in requirementsmod.WORKING_DIR_REQUIREMENTS:
3797 3801 continue
3798 3802 if req in source_requirements:
3799 3803 continue
3800 3804 to_drop.add(req)
3801 3805 requirements -= to_drop
3802 3806 requirements |= source_requirements
3803 3807
3804 3808 if createopts.get(b'sharedrelative'):
3805 3809 requirements.add(requirementsmod.RELATIVE_SHARED_REQUIREMENT)
3806 3810 else:
3807 3811 requirements.add(requirementsmod.SHARED_REQUIREMENT)
3808 3812
3809 3813 if ui.configbool(b'format', b'use-dirstate-tracked-hint'):
3810 3814 version = ui.configint(b'format', b'use-dirstate-tracked-hint.version')
3811 3815 msg = _(b"ignoring unknown tracked key version: %d\n")
3812 3816 hint = _(
3813 3817 b"see `hg help config.format.use-dirstate-tracked-hint-version"
3814 3818 )
3815 3819 if version != 1:
3816 3820 ui.warn(msg % version, hint=hint)
3817 3821 else:
3818 3822 requirements.add(requirementsmod.DIRSTATE_TRACKED_HINT_V1)
3819 3823
3820 3824 return requirements
3821 3825
3822 3826
3823 3827 def checkrequirementscompat(ui, requirements):
3824 3828 """Checks compatibility of repository requirements enabled and disabled.
3825 3829
3826 3830 Returns a set of requirements which needs to be dropped because dependend
3827 3831 requirements are not enabled. Also warns users about it"""
3828 3832
3829 3833 dropped = set()
3830 3834
3831 3835 if requirementsmod.STORE_REQUIREMENT not in requirements:
3832 3836 if requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT in requirements:
3833 3837 ui.warn(
3834 3838 _(
3835 3839 b'ignoring enabled \'format.bookmarks-in-store\' config '
3836 3840 b'beacuse it is incompatible with disabled '
3837 3841 b'\'format.usestore\' config\n'
3838 3842 )
3839 3843 )
3840 3844 dropped.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3841 3845
3842 3846 if (
3843 3847 requirementsmod.SHARED_REQUIREMENT in requirements
3844 3848 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
3845 3849 ):
3846 3850 raise error.Abort(
3847 3851 _(
3848 3852 b"cannot create shared repository as source was created"
3849 3853 b" with 'format.usestore' config disabled"
3850 3854 )
3851 3855 )
3852 3856
3853 3857 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
3854 3858 if ui.hasconfig(b'format', b'use-share-safe'):
3855 3859 msg = _(
3856 3860 b"ignoring enabled 'format.use-share-safe' config because "
3857 3861 b"it is incompatible with disabled 'format.usestore'"
3858 3862 b" config\n"
3859 3863 )
3860 3864 ui.warn(msg)
3861 3865 dropped.add(requirementsmod.SHARESAFE_REQUIREMENT)
3862 3866
3863 3867 return dropped
3864 3868
3865 3869
3866 3870 def filterknowncreateopts(ui, createopts):
3867 3871 """Filters a dict of repo creation options against options that are known.
3868 3872
3869 3873 Receives a dict of repo creation options and returns a dict of those
3870 3874 options that we don't know how to handle.
3871 3875
3872 3876 This function is called as part of repository creation. If the
3873 3877 returned dict contains any items, repository creation will not
3874 3878 be allowed, as it means there was a request to create a repository
3875 3879 with options not recognized by loaded code.
3876 3880
3877 3881 Extensions can wrap this function to filter out creation options
3878 3882 they know how to handle.
3879 3883 """
3880 3884 known = {
3881 3885 b'backend',
3882 3886 b'lfs',
3883 3887 b'narrowfiles',
3884 3888 b'sharedrepo',
3885 3889 b'sharedrelative',
3886 3890 b'shareditems',
3887 3891 b'shallowfilestore',
3888 3892 }
3889 3893
3890 3894 return {k: v for k, v in createopts.items() if k not in known}
3891 3895
3892 3896
3893 3897 def createrepository(ui, path: bytes, createopts=None, requirements=None):
3894 3898 """Create a new repository in a vfs.
3895 3899
3896 3900 ``path`` path to the new repo's working directory.
3897 3901 ``createopts`` options for the new repository.
3898 3902 ``requirement`` predefined set of requirements.
3899 3903 (incompatible with ``createopts``)
3900 3904
3901 3905 The following keys for ``createopts`` are recognized:
3902 3906
3903 3907 backend
3904 3908 The storage backend to use.
3905 3909 lfs
3906 3910 Repository will be created with ``lfs`` requirement. The lfs extension
3907 3911 will automatically be loaded when the repository is accessed.
3908 3912 narrowfiles
3909 3913 Set up repository to support narrow file storage.
3910 3914 sharedrepo
3911 3915 Repository object from which storage should be shared.
3912 3916 sharedrelative
3913 3917 Boolean indicating if the path to the shared repo should be
3914 3918 stored as relative. By default, the pointer to the "parent" repo
3915 3919 is stored as an absolute path.
3916 3920 shareditems
3917 3921 Set of items to share to the new repository (in addition to storage).
3918 3922 shallowfilestore
3919 3923 Indicates that storage for files should be shallow (not all ancestor
3920 3924 revisions are known).
3921 3925 """
3922 3926
3923 3927 if requirements is not None:
3924 3928 if createopts is not None:
3925 3929 msg = b'cannot specify both createopts and requirements'
3926 3930 raise error.ProgrammingError(msg)
3927 3931 createopts = {}
3928 3932 else:
3929 3933 createopts = defaultcreateopts(ui, createopts=createopts)
3930 3934
3931 3935 unknownopts = filterknowncreateopts(ui, createopts)
3932 3936
3933 3937 if not isinstance(unknownopts, dict):
3934 3938 raise error.ProgrammingError(
3935 3939 b'filterknowncreateopts() did not return a dict'
3936 3940 )
3937 3941
3938 3942 if unknownopts:
3939 3943 raise error.Abort(
3940 3944 _(
3941 3945 b'unable to create repository because of unknown '
3942 3946 b'creation option: %s'
3943 3947 )
3944 3948 % b', '.join(sorted(unknownopts)),
3945 3949 hint=_(b'is a required extension not loaded?'),
3946 3950 )
3947 3951
3948 3952 requirements = newreporequirements(ui, createopts=createopts)
3949 3953 requirements -= checkrequirementscompat(ui, requirements)
3950 3954
3951 3955 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3952 3956
3953 3957 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3954 3958 if hgvfs.exists():
3955 3959 raise error.RepoError(_(b'repository %s already exists') % path)
3956 3960
3957 3961 if b'sharedrepo' in createopts:
3958 3962 sharedpath = createopts[b'sharedrepo'].sharedpath
3959 3963
3960 3964 if createopts.get(b'sharedrelative'):
3961 3965 try:
3962 3966 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3963 3967 sharedpath = util.pconvert(sharedpath)
3964 3968 except (IOError, ValueError) as e:
3965 3969 # ValueError is raised on Windows if the drive letters differ
3966 3970 # on each path.
3967 3971 raise error.Abort(
3968 3972 _(b'cannot calculate relative path'),
3969 3973 hint=stringutil.forcebytestr(e),
3970 3974 )
3971 3975
3972 3976 if not wdirvfs.exists():
3973 3977 wdirvfs.makedirs()
3974 3978
3975 3979 hgvfs.makedir(notindexed=True)
3976 3980 if b'sharedrepo' not in createopts:
3977 3981 hgvfs.mkdir(b'cache')
3978 3982 hgvfs.mkdir(b'wcache')
3979 3983
3980 3984 has_store = requirementsmod.STORE_REQUIREMENT in requirements
3981 3985 if has_store and b'sharedrepo' not in createopts:
3982 3986 hgvfs.mkdir(b'store')
3983 3987
3984 3988 # We create an invalid changelog outside the store so very old
3985 3989 # Mercurial versions (which didn't know about the requirements
3986 3990 # file) encounter an error on reading the changelog. This
3987 3991 # effectively locks out old clients and prevents them from
3988 3992 # mucking with a repo in an unknown format.
3989 3993 #
3990 3994 # The revlog header has version 65535, which won't be recognized by
3991 3995 # such old clients.
3992 3996 hgvfs.append(
3993 3997 b'00changelog.i',
3994 3998 b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
3995 3999 b'layout',
3996 4000 )
3997 4001
3998 4002 # Filter the requirements into working copy and store ones
3999 4003 wcreq, storereq = scmutil.filterrequirements(requirements)
4000 4004 # write working copy ones
4001 4005 scmutil.writerequires(hgvfs, wcreq)
4002 4006 # If there are store requirements and the current repository
4003 4007 # is not a shared one, write stored requirements
4004 4008 # For new shared repository, we don't need to write the store
4005 4009 # requirements as they are already present in store requires
4006 4010 if storereq and b'sharedrepo' not in createopts:
4007 4011 storevfs = vfsmod.vfs(hgvfs.join(b'store'), cacheaudited=True)
4008 4012 scmutil.writerequires(storevfs, storereq)
4009 4013
4010 4014 # Write out file telling readers where to find the shared store.
4011 4015 if b'sharedrepo' in createopts:
4012 4016 hgvfs.write(b'sharedpath', sharedpath)
4013 4017
4014 4018 if createopts.get(b'shareditems'):
4015 4019 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
4016 4020 hgvfs.write(b'shared', shared)
4017 4021
4018 4022
4019 4023 def poisonrepository(repo):
4020 4024 """Poison a repository instance so it can no longer be used."""
4021 4025 # Perform any cleanup on the instance.
4022 4026 repo.close()
4023 4027
4024 4028 # Our strategy is to replace the type of the object with one that
4025 4029 # has all attribute lookups result in error.
4026 4030 #
4027 4031 # But we have to allow the close() method because some constructors
4028 4032 # of repos call close() on repo references.
4029 4033 class poisonedrepository:
4030 4034 def __getattribute__(self, item):
4031 4035 if item == 'close':
4032 4036 return object.__getattribute__(self, item)
4033 4037
4034 4038 raise error.ProgrammingError(
4035 4039 b'repo instances should not be used after unshare'
4036 4040 )
4037 4041
4038 4042 def close(self):
4039 4043 pass
4040 4044
4041 4045 # We may have a repoview, which intercepts __setattr__. So be sure
4042 4046 # we operate at the lowest level possible.
4043 4047 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,3740 +1,3747 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 self.feature_config = FeatureConfig(
448 censorable=censorable,
449 canonical_parent_order=canonical_parent_order,
450 )
451 self.data_config = DataConfig(
452 check_ambig=checkambig,
453 mmap_large_index=mmaplargeindex,
454 )
455 self.delta_config = DeltaConfig()
447 if b'feature-config' in self.opener.options:
448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 else:
450 self.feature_config = FeatureConfig()
451 self.feature_config.censorable = censorable
452 self.feature_config.canonical_parent_order = canonical_parent_order
453 if b'data-config' in self.opener.options:
454 self.data_config = self.opener.options[b'data-config'].copy()
455 else:
456 self.data_config = DataConfig()
457 self.data_config.check_ambig = checkambig
458 self.data_config.mmap_large_index = mmaplargeindex
459 if b'delta-config' in self.opener.options:
460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 else:
462 self.delta_config = DeltaConfig()
456 463
457 464 # 3-tuple of (node, rev, text) for a raw revision.
458 465 self._revisioncache = None
459 466 # Maps rev to chain base rev.
460 467 self._chainbasecache = util.lrucachedict(100)
461 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
462 469 self._chunkcache = (0, b'')
463 470
464 471 self.index = None
465 472 self._docket = None
466 473 self._nodemap_docket = None
467 474 # Mapping of partial identifiers to full nodes.
468 475 self._pcache = {}
469 476
470 477 # other optionnals features
471 478
472 479 # Make copy of flag processors so each revlog instance can support
473 480 # custom flags.
474 481 self._flagprocessors = dict(flagutil.flagprocessors)
475 482
476 483 # 3-tuple of file handles being used for active writing.
477 484 self._writinghandles = None
478 485 # prevent nesting of addgroup
479 486 self._adding_group = None
480 487
481 488 self._loadindex()
482 489
483 490 self._concurrencychecker = concurrencychecker
484 491
485 492 @property
486 493 def _generaldelta(self):
487 494 """temporary compatibility proxy"""
488 495 return self.delta_config.general_delta
489 496
490 497 @property
491 498 def _checkambig(self):
492 499 """temporary compatibility proxy"""
493 500 return self.data_config.check_ambig
494 501
495 502 @property
496 503 def _mmaplargeindex(self):
497 504 """temporary compatibility proxy"""
498 505 return self.data_config.mmap_large_index
499 506
500 507 @property
501 508 def _censorable(self):
502 509 """temporary compatibility proxy"""
503 510 return self.feature_config.censorable
504 511
505 512 @property
506 513 def _chunkcachesize(self):
507 514 """temporary compatibility proxy"""
508 515 return self.data_config.chunk_cache_size
509 516
510 517 @property
511 518 def _maxchainlen(self):
512 519 """temporary compatibility proxy"""
513 520 return self.delta_config.max_chain_len
514 521
515 522 @property
516 523 def _deltabothparents(self):
517 524 """temporary compatibility proxy"""
518 525 return self.delta_config.delta_both_parents
519 526
520 527 @property
521 528 def _candidate_group_chunk_size(self):
522 529 """temporary compatibility proxy"""
523 530 return self.delta_config.candidate_group_chunk_size
524 531
525 532 @property
526 533 def _debug_delta(self):
527 534 """temporary compatibility proxy"""
528 535 return self.delta_config.debug_delta
529 536
530 537 @property
531 538 def _compengine(self):
532 539 """temporary compatibility proxy"""
533 540 return self.feature_config.compression_engine
534 541
535 542 @property
536 543 def _compengineopts(self):
537 544 """temporary compatibility proxy"""
538 545 return self.feature_config.compression_engine_options
539 546
540 547 @property
541 548 def _maxdeltachainspan(self):
542 549 """temporary compatibility proxy"""
543 550 return self.delta_config.max_deltachain_span
544 551
545 552 @property
546 553 def _withsparseread(self):
547 554 """temporary compatibility proxy"""
548 555 return self.data_config.with_sparse_read
549 556
550 557 @property
551 558 def _sparserevlog(self):
552 559 """temporary compatibility proxy"""
553 560 return self.delta_config.sparse_revlog
554 561
555 562 @property
556 563 def hassidedata(self):
557 564 """temporary compatibility proxy"""
558 565 return self.feature_config.has_side_data
559 566
560 567 @property
561 568 def _srdensitythreshold(self):
562 569 """temporary compatibility proxy"""
563 570 return self.data_config.sr_density_threshold
564 571
565 572 @property
566 573 def _srmingapsize(self):
567 574 """temporary compatibility proxy"""
568 575 return self.data_config.sr_min_gap_size
569 576
570 577 @property
571 578 def _compute_rank(self):
572 579 """temporary compatibility proxy"""
573 580 return self.feature_config.compute_rank
574 581
575 582 @property
576 583 def canonical_parent_order(self):
577 584 """temporary compatibility proxy"""
578 585 return self.feature_config.canonical_parent_order
579 586
580 587 @property
581 588 def _lazydelta(self):
582 589 """temporary compatibility proxy"""
583 590 return self.delta_config.lazy_delta
584 591
585 592 @property
586 593 def _lazydeltabase(self):
587 594 """temporary compatibility proxy"""
588 595 return self.delta_config.lazy_delta_base
589 596
590 597 def _init_opts(self):
591 598 """process options (from above/config) to setup associated default revlog mode
592 599
593 600 These values might be affected when actually reading on disk information.
594 601
595 602 The relevant values are returned for use in _loadindex().
596 603
597 604 * newversionflags:
598 605 version header to use if we need to create a new revlog
599 606
600 607 * mmapindexthreshold:
601 608 minimal index size for start to use mmap
602 609
603 610 * force_nodemap:
604 611 force the usage of a "development" version of the nodemap code
605 612 """
606 613 mmapindexthreshold = None
607 614 opts = self.opener.options
608 615
609 616 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
610 617 new_header = CHANGELOGV2
611 618 compute_rank = opts.get(b'changelogv2.compute-rank', True)
612 619 self.feature_config.compute_rank = compute_rank
613 620 elif b'revlogv2' in opts:
614 621 new_header = REVLOGV2
615 622 elif b'revlogv1' in opts:
616 623 new_header = REVLOGV1 | FLAG_INLINE_DATA
617 624 if b'generaldelta' in opts:
618 625 new_header |= FLAG_GENERALDELTA
619 626 elif b'revlogv0' in self.opener.options:
620 627 new_header = REVLOGV0
621 628 else:
622 629 new_header = REVLOG_DEFAULT_VERSION
623 630
624 631 if b'chunkcachesize' in opts:
625 632 self.data_config.chunk_cache_size = opts[b'chunkcachesize']
626 633 if b'maxchainlen' in opts:
627 634 self.delta_config.max_chain_len = opts[b'maxchainlen']
628 635 if b'deltabothparents' in opts:
629 636 self.delta_config.delta_both_parents = opts[b'deltabothparents']
630 637 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
631 638 if dps_cgds:
632 639 self.delta_config.candidate_group_chunk_size = dps_cgds
633 640 if b'lazydelta' in opts:
634 641 self.delta_config.lazy_delta = bool(opts[b'lazydelta'])
635 642 if self._lazydelta and b'lazydeltabase' in opts:
636 643 self.delta_config.lazy_delta_base = opts[b'lazydeltabase']
637 644 if b'debug-delta' in opts:
638 645 self.delta_config.debug_delta = opts[b'debug-delta']
639 646 if b'compengine' in opts:
640 647 self.feature_config.compression_engine = opts[b'compengine']
641 648 comp_engine_opts = self.feature_config.compression_engine_options
642 649 if b'zlib.level' in opts:
643 650 comp_engine_opts[b'zlib.level'] = opts[b'zlib.level']
644 651 if b'zstd.level' in opts:
645 652 comp_engine_opts[b'zstd.level'] = opts[b'zstd.level']
646 653 if b'maxdeltachainspan' in opts:
647 654 self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan']
648 655 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
649 656 mmapindexthreshold = opts[b'mmapindexthreshold']
650 657 self.data_config.mmap_index_threshold = mmapindexthreshold
651 658 if b'sparse-revlog' in opts:
652 659 self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog'])
653 660 if self.delta_config.sparse_revlog:
654 661 # sparse-revlog forces sparse-read
655 662 self.data_config.with_sparse_read = True
656 663 elif b'with-sparse-read' in opts:
657 664 self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
658 665 if b'sparse-read-density-threshold' in opts:
659 666 self.data_config.sr_density_threshold = opts[
660 667 b'sparse-read-density-threshold'
661 668 ]
662 669 if b'sparse-read-min-gap-size' in opts:
663 670 self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
664 671 if opts.get(b'enableellipsis'):
665 672 self.feature_config.enable_ellipsis = True
666 673 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
667 674
668 675 # revlog v0 doesn't have flag processors
669 676 for flag, processor in opts.get(b'flagprocessors', {}).items():
670 677 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
671 678
672 679 chunk_cache_size = self.data_config.chunk_cache_size
673 680 if chunk_cache_size <= 0:
674 681 raise error.RevlogError(
675 682 _(b'revlog chunk cache size %r is not greater than 0')
676 683 % chunk_cache_size
677 684 )
678 685 elif chunk_cache_size & (chunk_cache_size - 1):
679 686 raise error.RevlogError(
680 687 _(b'revlog chunk cache size %r is not a power of 2')
681 688 % chunk_cache_size
682 689 )
683 690 force_nodemap = opts.get(b'devel-force-nodemap', False)
684 691 return new_header, mmapindexthreshold, force_nodemap
685 692
686 693 def _get_data(self, filepath, mmap_threshold, size=None):
687 694 """return a file content with or without mmap
688 695
689 696 If the file is missing return the empty string"""
690 697 try:
691 698 with self.opener(filepath) as fp:
692 699 if mmap_threshold is not None:
693 700 file_size = self.opener.fstat(fp).st_size
694 701 if file_size >= mmap_threshold:
695 702 if size is not None:
696 703 # avoid potentiel mmap crash
697 704 size = min(file_size, size)
698 705 # TODO: should .close() to release resources without
699 706 # relying on Python GC
700 707 if size is None:
701 708 return util.buffer(util.mmapread(fp))
702 709 else:
703 710 return util.buffer(util.mmapread(fp, size))
704 711 if size is None:
705 712 return fp.read()
706 713 else:
707 714 return fp.read(size)
708 715 except FileNotFoundError:
709 716 return b''
710 717
711 718 def get_streams(self, max_linkrev, force_inline=False):
712 719 """return a list of streams that represent this revlog
713 720
714 721 This is used by stream-clone to do bytes to bytes copies of a repository.
715 722
716 723 This streams data for all revisions that refer to a changelog revision up
717 724 to `max_linkrev`.
718 725
719 726 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
720 727
721 728 It returns is a list of three-tuple:
722 729
723 730 [
724 731 (filename, bytes_stream, stream_size),
725 732 …
726 733 ]
727 734 """
728 735 n = len(self)
729 736 index = self.index
730 737 while n > 0:
731 738 linkrev = index[n - 1][4]
732 739 if linkrev < max_linkrev:
733 740 break
734 741 # note: this loop will rarely go through multiple iterations, since
735 742 # it only traverses commits created during the current streaming
736 743 # pull operation.
737 744 #
738 745 # If this become a problem, using a binary search should cap the
739 746 # runtime of this.
740 747 n = n - 1
741 748 if n == 0:
742 749 # no data to send
743 750 return []
744 751 index_size = n * index.entry_size
745 752 data_size = self.end(n - 1)
746 753
747 754 # XXX we might have been split (or stripped) since the object
748 755 # initialization, We need to close this race too, but having a way to
749 756 # pre-open the file we feed to the revlog and never closing them before
750 757 # we are done streaming.
751 758
752 759 if self._inline:
753 760
754 761 def get_stream():
755 762 with self._indexfp() as fp:
756 763 yield None
757 764 size = index_size + data_size
758 765 if size <= 65536:
759 766 yield fp.read(size)
760 767 else:
761 768 yield from util.filechunkiter(fp, limit=size)
762 769
763 770 inline_stream = get_stream()
764 771 next(inline_stream)
765 772 return [
766 773 (self._indexfile, inline_stream, index_size + data_size),
767 774 ]
768 775 elif force_inline:
769 776
770 777 def get_stream():
771 778 with self.reading():
772 779 yield None
773 780
774 781 for rev in range(n):
775 782 idx = self.index.entry_binary(rev)
776 783 if rev == 0 and self._docket is None:
777 784 # re-inject the inline flag
778 785 header = self._format_flags
779 786 header |= self._format_version
780 787 header |= FLAG_INLINE_DATA
781 788 header = self.index.pack_header(header)
782 789 idx = header + idx
783 790 yield idx
784 791 yield self._getsegmentforrevs(rev, rev)[1]
785 792
786 793 inline_stream = get_stream()
787 794 next(inline_stream)
788 795 return [
789 796 (self._indexfile, inline_stream, index_size + data_size),
790 797 ]
791 798 else:
792 799
793 800 def get_index_stream():
794 801 with self._indexfp() as fp:
795 802 yield None
796 803 if index_size <= 65536:
797 804 yield fp.read(index_size)
798 805 else:
799 806 yield from util.filechunkiter(fp, limit=index_size)
800 807
801 808 def get_data_stream():
802 809 with self._datafp() as fp:
803 810 yield None
804 811 if data_size <= 65536:
805 812 yield fp.read(data_size)
806 813 else:
807 814 yield from util.filechunkiter(fp, limit=data_size)
808 815
809 816 index_stream = get_index_stream()
810 817 next(index_stream)
811 818 data_stream = get_data_stream()
812 819 next(data_stream)
813 820 return [
814 821 (self._datafile, data_stream, data_size),
815 822 (self._indexfile, index_stream, index_size),
816 823 ]
817 824
818 825 def _loadindex(self, docket=None):
819 826
820 827 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
821 828
822 829 if self.postfix is not None:
823 830 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
824 831 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
825 832 entry_point = b'%s.i.a' % self.radix
826 833 elif self._try_split and self.opener.exists(self._split_index_file):
827 834 entry_point = self._split_index_file
828 835 else:
829 836 entry_point = b'%s.i' % self.radix
830 837
831 838 if docket is not None:
832 839 self._docket = docket
833 840 self._docket_file = entry_point
834 841 else:
835 842 self._initempty = True
836 843 entry_data = self._get_data(entry_point, mmapindexthreshold)
837 844 if len(entry_data) > 0:
838 845 header = INDEX_HEADER.unpack(entry_data[:4])[0]
839 846 self._initempty = False
840 847 else:
841 848 header = new_header
842 849
843 850 self._format_flags = header & ~0xFFFF
844 851 self._format_version = header & 0xFFFF
845 852
846 853 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
847 854 if supported_flags is None:
848 855 msg = _(b'unknown version (%d) in revlog %s')
849 856 msg %= (self._format_version, self.display_id)
850 857 raise error.RevlogError(msg)
851 858 elif self._format_flags & ~supported_flags:
852 859 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
853 860 display_flag = self._format_flags >> 16
854 861 msg %= (display_flag, self._format_version, self.display_id)
855 862 raise error.RevlogError(msg)
856 863
857 864 features = FEATURES_BY_VERSION[self._format_version]
858 865 self._inline = features[b'inline'](self._format_flags)
859 866 self.delta_config.general_delta = features[b'generaldelta'](
860 867 self._format_flags
861 868 )
862 869 self.feature_config.has_side_data = features[b'sidedata']
863 870
864 871 if not features[b'docket']:
865 872 self._indexfile = entry_point
866 873 index_data = entry_data
867 874 else:
868 875 self._docket_file = entry_point
869 876 if self._initempty:
870 877 self._docket = docketutil.default_docket(self, header)
871 878 else:
872 879 self._docket = docketutil.parse_docket(
873 880 self, entry_data, use_pending=self._trypending
874 881 )
875 882
876 883 if self._docket is not None:
877 884 self._indexfile = self._docket.index_filepath()
878 885 index_data = b''
879 886 index_size = self._docket.index_end
880 887 if index_size > 0:
881 888 index_data = self._get_data(
882 889 self._indexfile, mmapindexthreshold, size=index_size
883 890 )
884 891 if len(index_data) < index_size:
885 892 msg = _(b'too few index data for %s: got %d, expected %d')
886 893 msg %= (self.display_id, len(index_data), index_size)
887 894 raise error.RevlogError(msg)
888 895
889 896 self._inline = False
890 897 # generaldelta implied by version 2 revlogs.
891 898 self.delta_config.general_delta = True
892 899 # the logic for persistent nodemap will be dealt with within the
893 900 # main docket, so disable it for now.
894 901 self._nodemap_file = None
895 902
896 903 if self._docket is not None:
897 904 self._datafile = self._docket.data_filepath()
898 905 self._sidedatafile = self._docket.sidedata_filepath()
899 906 elif self.postfix is None:
900 907 self._datafile = b'%s.d' % self.radix
901 908 else:
902 909 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
903 910
904 911 self.nodeconstants = sha1nodeconstants
905 912 self.nullid = self.nodeconstants.nullid
906 913
907 914 # sparse-revlog can't be on without general-delta (issue6056)
908 915 if not self._generaldelta:
909 916 self.delta_config.sparse_revlog = False
910 917
911 918 self._storedeltachains = True
912 919
913 920 devel_nodemap = (
914 921 self._nodemap_file
915 922 and force_nodemap
916 923 and parse_index_v1_nodemap is not None
917 924 )
918 925
919 926 use_rust_index = False
920 927 if rustrevlog is not None:
921 928 if self._nodemap_file is not None:
922 929 use_rust_index = True
923 930 else:
924 931 use_rust_index = self.opener.options.get(b'rust.index')
925 932
926 933 self._parse_index = parse_index_v1
927 934 if self._format_version == REVLOGV0:
928 935 self._parse_index = revlogv0.parse_index_v0
929 936 elif self._format_version == REVLOGV2:
930 937 self._parse_index = parse_index_v2
931 938 elif self._format_version == CHANGELOGV2:
932 939 self._parse_index = parse_index_cl_v2
933 940 elif devel_nodemap:
934 941 self._parse_index = parse_index_v1_nodemap
935 942 elif use_rust_index:
936 943 self._parse_index = parse_index_v1_mixed
937 944 try:
938 945 d = self._parse_index(index_data, self._inline)
939 946 index, chunkcache = d
940 947 use_nodemap = (
941 948 not self._inline
942 949 and self._nodemap_file is not None
943 950 and hasattr(index, 'update_nodemap_data')
944 951 )
945 952 if use_nodemap:
946 953 nodemap_data = nodemaputil.persisted_data(self)
947 954 if nodemap_data is not None:
948 955 docket = nodemap_data[0]
949 956 if (
950 957 len(d[0]) > docket.tip_rev
951 958 and d[0][docket.tip_rev][7] == docket.tip_node
952 959 ):
953 960 # no changelog tampering
954 961 self._nodemap_docket = docket
955 962 index.update_nodemap_data(*nodemap_data)
956 963 except (ValueError, IndexError):
957 964 raise error.RevlogError(
958 965 _(b"index %s is corrupted") % self.display_id
959 966 )
960 967 self.index = index
961 968 self._segmentfile = randomaccessfile.randomaccessfile(
962 969 self.opener,
963 970 (self._indexfile if self._inline else self._datafile),
964 971 self._chunkcachesize,
965 972 chunkcache,
966 973 )
967 974 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
968 975 self.opener,
969 976 self._sidedatafile,
970 977 self._chunkcachesize,
971 978 )
972 979 # revnum -> (chain-length, sum-delta-length)
973 980 self._chaininfocache = util.lrucachedict(500)
974 981 # revlog header -> revlog compressor
975 982 self._decompressors = {}
976 983
977 984 def get_revlog(self):
978 985 """simple function to mirror API of other not-really-revlog API"""
979 986 return self
980 987
981 988 @util.propertycache
982 989 def revlog_kind(self):
983 990 return self.target[0]
984 991
985 992 @util.propertycache
986 993 def display_id(self):
987 994 """The public facing "ID" of the revlog that we use in message"""
988 995 if self.revlog_kind == KIND_FILELOG:
989 996 # Reference the file without the "data/" prefix, so it is familiar
990 997 # to the user.
991 998 return self.target[1]
992 999 else:
993 1000 return self.radix
994 1001
995 1002 def _get_decompressor(self, t):
996 1003 try:
997 1004 compressor = self._decompressors[t]
998 1005 except KeyError:
999 1006 try:
1000 1007 engine = util.compengines.forrevlogheader(t)
1001 1008 compressor = engine.revlogcompressor(self._compengineopts)
1002 1009 self._decompressors[t] = compressor
1003 1010 except KeyError:
1004 1011 raise error.RevlogError(
1005 1012 _(b'unknown compression type %s') % binascii.hexlify(t)
1006 1013 )
1007 1014 return compressor
1008 1015
1009 1016 @util.propertycache
1010 1017 def _compressor(self):
1011 1018 engine = util.compengines[self._compengine]
1012 1019 return engine.revlogcompressor(self._compengineopts)
1013 1020
1014 1021 @util.propertycache
1015 1022 def _decompressor(self):
1016 1023 """the default decompressor"""
1017 1024 if self._docket is None:
1018 1025 return None
1019 1026 t = self._docket.default_compression_header
1020 1027 c = self._get_decompressor(t)
1021 1028 return c.decompress
1022 1029
1023 1030 def _indexfp(self):
1024 1031 """file object for the revlog's index file"""
1025 1032 return self.opener(self._indexfile, mode=b"r")
1026 1033
1027 1034 def __index_write_fp(self):
1028 1035 # You should not use this directly and use `_writing` instead
1029 1036 try:
1030 1037 f = self.opener(
1031 1038 self._indexfile, mode=b"r+", checkambig=self._checkambig
1032 1039 )
1033 1040 if self._docket is None:
1034 1041 f.seek(0, os.SEEK_END)
1035 1042 else:
1036 1043 f.seek(self._docket.index_end, os.SEEK_SET)
1037 1044 return f
1038 1045 except FileNotFoundError:
1039 1046 return self.opener(
1040 1047 self._indexfile, mode=b"w+", checkambig=self._checkambig
1041 1048 )
1042 1049
1043 1050 def __index_new_fp(self):
1044 1051 # You should not use this unless you are upgrading from inline revlog
1045 1052 return self.opener(
1046 1053 self._indexfile,
1047 1054 mode=b"w",
1048 1055 checkambig=self._checkambig,
1049 1056 atomictemp=True,
1050 1057 )
1051 1058
1052 1059 def _datafp(self, mode=b'r'):
1053 1060 """file object for the revlog's data file"""
1054 1061 return self.opener(self._datafile, mode=mode)
1055 1062
1056 1063 @contextlib.contextmanager
1057 1064 def _sidedatareadfp(self):
1058 1065 """file object suitable to read sidedata"""
1059 1066 if self._writinghandles:
1060 1067 yield self._writinghandles[2]
1061 1068 else:
1062 1069 with self.opener(self._sidedatafile) as fp:
1063 1070 yield fp
1064 1071
1065 1072 def tiprev(self):
1066 1073 return len(self.index) - 1
1067 1074
1068 1075 def tip(self):
1069 1076 return self.node(self.tiprev())
1070 1077
1071 1078 def __contains__(self, rev):
1072 1079 return 0 <= rev < len(self)
1073 1080
1074 1081 def __len__(self):
1075 1082 return len(self.index)
1076 1083
1077 1084 def __iter__(self):
1078 1085 return iter(range(len(self)))
1079 1086
1080 1087 def revs(self, start=0, stop=None):
1081 1088 """iterate over all rev in this revlog (from start to stop)"""
1082 1089 return storageutil.iterrevs(len(self), start=start, stop=stop)
1083 1090
1084 1091 def hasnode(self, node):
1085 1092 try:
1086 1093 self.rev(node)
1087 1094 return True
1088 1095 except KeyError:
1089 1096 return False
1090 1097
1091 1098 def _candelta(self, baserev, rev):
1092 1099 """whether two revisions (baserev, rev) can be delta-ed or not"""
1093 1100 # Disable delta if either rev requires a content-changing flag
1094 1101 # processor (ex. LFS). This is because such flag processor can alter
1095 1102 # the rawtext content that the delta will be based on, and two clients
1096 1103 # could have a same revlog node with different flags (i.e. different
1097 1104 # rawtext contents) and the delta could be incompatible.
1098 1105 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1099 1106 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1100 1107 ):
1101 1108 return False
1102 1109 return True
1103 1110
1104 1111 def update_caches(self, transaction):
1105 1112 """update on disk cache
1106 1113
1107 1114 If a transaction is passed, the update may be delayed to transaction
1108 1115 commit."""
1109 1116 if self._nodemap_file is not None:
1110 1117 if transaction is None:
1111 1118 nodemaputil.update_persistent_nodemap(self)
1112 1119 else:
1113 1120 nodemaputil.setup_persistent_nodemap(transaction, self)
1114 1121
1115 1122 def clearcaches(self):
1116 1123 """Clear in-memory caches"""
1117 1124 self._revisioncache = None
1118 1125 self._chainbasecache.clear()
1119 1126 self._segmentfile.clear_cache()
1120 1127 self._segmentfile_sidedata.clear_cache()
1121 1128 self._pcache = {}
1122 1129 self._nodemap_docket = None
1123 1130 self.index.clearcaches()
1124 1131 # The python code is the one responsible for validating the docket, we
1125 1132 # end up having to refresh it here.
1126 1133 use_nodemap = (
1127 1134 not self._inline
1128 1135 and self._nodemap_file is not None
1129 1136 and hasattr(self.index, 'update_nodemap_data')
1130 1137 )
1131 1138 if use_nodemap:
1132 1139 nodemap_data = nodemaputil.persisted_data(self)
1133 1140 if nodemap_data is not None:
1134 1141 self._nodemap_docket = nodemap_data[0]
1135 1142 self.index.update_nodemap_data(*nodemap_data)
1136 1143
1137 1144 def rev(self, node):
1138 1145 """return the revision number associated with a <nodeid>"""
1139 1146 try:
1140 1147 return self.index.rev(node)
1141 1148 except TypeError:
1142 1149 raise
1143 1150 except error.RevlogError:
1144 1151 # parsers.c radix tree lookup failed
1145 1152 if (
1146 1153 node == self.nodeconstants.wdirid
1147 1154 or node in self.nodeconstants.wdirfilenodeids
1148 1155 ):
1149 1156 raise error.WdirUnsupported
1150 1157 raise error.LookupError(node, self.display_id, _(b'no node'))
1151 1158
1152 1159 # Accessors for index entries.
1153 1160
1154 1161 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1155 1162 # are flags.
1156 1163 def start(self, rev):
1157 1164 return int(self.index[rev][0] >> 16)
1158 1165
1159 1166 def sidedata_cut_off(self, rev):
1160 1167 sd_cut_off = self.index[rev][8]
1161 1168 if sd_cut_off != 0:
1162 1169 return sd_cut_off
1163 1170 # This is some annoying dance, because entries without sidedata
1164 1171 # currently use 0 as their ofsset. (instead of previous-offset +
1165 1172 # previous-size)
1166 1173 #
1167 1174 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1168 1175 # In the meantime, we need this.
1169 1176 while 0 <= rev:
1170 1177 e = self.index[rev]
1171 1178 if e[9] != 0:
1172 1179 return e[8] + e[9]
1173 1180 rev -= 1
1174 1181 return 0
1175 1182
1176 1183 def flags(self, rev):
1177 1184 return self.index[rev][0] & 0xFFFF
1178 1185
1179 1186 def length(self, rev):
1180 1187 return self.index[rev][1]
1181 1188
1182 1189 def sidedata_length(self, rev):
1183 1190 if not self.hassidedata:
1184 1191 return 0
1185 1192 return self.index[rev][9]
1186 1193
1187 1194 def rawsize(self, rev):
1188 1195 """return the length of the uncompressed text for a given revision"""
1189 1196 l = self.index[rev][2]
1190 1197 if l >= 0:
1191 1198 return l
1192 1199
1193 1200 t = self.rawdata(rev)
1194 1201 return len(t)
1195 1202
1196 1203 def size(self, rev):
1197 1204 """length of non-raw text (processed by a "read" flag processor)"""
1198 1205 # fast path: if no "read" flag processor could change the content,
1199 1206 # size is rawsize. note: ELLIPSIS is known to not change the content.
1200 1207 flags = self.flags(rev)
1201 1208 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1202 1209 return self.rawsize(rev)
1203 1210
1204 1211 return len(self.revision(rev))
1205 1212
1206 1213 def fast_rank(self, rev):
1207 1214 """Return the rank of a revision if already known, or None otherwise.
1208 1215
1209 1216 The rank of a revision is the size of the sub-graph it defines as a
1210 1217 head. Equivalently, the rank of a revision `r` is the size of the set
1211 1218 `ancestors(r)`, `r` included.
1212 1219
1213 1220 This method returns the rank retrieved from the revlog in constant
1214 1221 time. It makes no attempt at computing unknown values for versions of
1215 1222 the revlog which do not persist the rank.
1216 1223 """
1217 1224 rank = self.index[rev][ENTRY_RANK]
1218 1225 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1219 1226 return None
1220 1227 if rev == nullrev:
1221 1228 return 0 # convention
1222 1229 return rank
1223 1230
1224 1231 def chainbase(self, rev):
1225 1232 base = self._chainbasecache.get(rev)
1226 1233 if base is not None:
1227 1234 return base
1228 1235
1229 1236 index = self.index
1230 1237 iterrev = rev
1231 1238 base = index[iterrev][3]
1232 1239 while base != iterrev:
1233 1240 iterrev = base
1234 1241 base = index[iterrev][3]
1235 1242
1236 1243 self._chainbasecache[rev] = base
1237 1244 return base
1238 1245
1239 1246 def linkrev(self, rev):
1240 1247 return self.index[rev][4]
1241 1248
1242 1249 def parentrevs(self, rev):
1243 1250 try:
1244 1251 entry = self.index[rev]
1245 1252 except IndexError:
1246 1253 if rev == wdirrev:
1247 1254 raise error.WdirUnsupported
1248 1255 raise
1249 1256
1250 1257 if self.canonical_parent_order and entry[5] == nullrev:
1251 1258 return entry[6], entry[5]
1252 1259 else:
1253 1260 return entry[5], entry[6]
1254 1261
1255 1262 # fast parentrevs(rev) where rev isn't filtered
1256 1263 _uncheckedparentrevs = parentrevs
1257 1264
1258 1265 def node(self, rev):
1259 1266 try:
1260 1267 return self.index[rev][7]
1261 1268 except IndexError:
1262 1269 if rev == wdirrev:
1263 1270 raise error.WdirUnsupported
1264 1271 raise
1265 1272
1266 1273 # Derived from index values.
1267 1274
1268 1275 def end(self, rev):
1269 1276 return self.start(rev) + self.length(rev)
1270 1277
1271 1278 def parents(self, node):
1272 1279 i = self.index
1273 1280 d = i[self.rev(node)]
1274 1281 # inline node() to avoid function call overhead
1275 1282 if self.canonical_parent_order and d[5] == self.nullid:
1276 1283 return i[d[6]][7], i[d[5]][7]
1277 1284 else:
1278 1285 return i[d[5]][7], i[d[6]][7]
1279 1286
1280 1287 def chainlen(self, rev):
1281 1288 return self._chaininfo(rev)[0]
1282 1289
1283 1290 def _chaininfo(self, rev):
1284 1291 chaininfocache = self._chaininfocache
1285 1292 if rev in chaininfocache:
1286 1293 return chaininfocache[rev]
1287 1294 index = self.index
1288 1295 generaldelta = self._generaldelta
1289 1296 iterrev = rev
1290 1297 e = index[iterrev]
1291 1298 clen = 0
1292 1299 compresseddeltalen = 0
1293 1300 while iterrev != e[3]:
1294 1301 clen += 1
1295 1302 compresseddeltalen += e[1]
1296 1303 if generaldelta:
1297 1304 iterrev = e[3]
1298 1305 else:
1299 1306 iterrev -= 1
1300 1307 if iterrev in chaininfocache:
1301 1308 t = chaininfocache[iterrev]
1302 1309 clen += t[0]
1303 1310 compresseddeltalen += t[1]
1304 1311 break
1305 1312 e = index[iterrev]
1306 1313 else:
1307 1314 # Add text length of base since decompressing that also takes
1308 1315 # work. For cache hits the length is already included.
1309 1316 compresseddeltalen += e[1]
1310 1317 r = (clen, compresseddeltalen)
1311 1318 chaininfocache[rev] = r
1312 1319 return r
1313 1320
1314 1321 def _deltachain(self, rev, stoprev=None):
1315 1322 """Obtain the delta chain for a revision.
1316 1323
1317 1324 ``stoprev`` specifies a revision to stop at. If not specified, we
1318 1325 stop at the base of the chain.
1319 1326
1320 1327 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1321 1328 revs in ascending order and ``stopped`` is a bool indicating whether
1322 1329 ``stoprev`` was hit.
1323 1330 """
1324 1331 # Try C implementation.
1325 1332 try:
1326 1333 return self.index.deltachain(rev, stoprev, self._generaldelta)
1327 1334 except AttributeError:
1328 1335 pass
1329 1336
1330 1337 chain = []
1331 1338
1332 1339 # Alias to prevent attribute lookup in tight loop.
1333 1340 index = self.index
1334 1341 generaldelta = self._generaldelta
1335 1342
1336 1343 iterrev = rev
1337 1344 e = index[iterrev]
1338 1345 while iterrev != e[3] and iterrev != stoprev:
1339 1346 chain.append(iterrev)
1340 1347 if generaldelta:
1341 1348 iterrev = e[3]
1342 1349 else:
1343 1350 iterrev -= 1
1344 1351 e = index[iterrev]
1345 1352
1346 1353 if iterrev == stoprev:
1347 1354 stopped = True
1348 1355 else:
1349 1356 chain.append(iterrev)
1350 1357 stopped = False
1351 1358
1352 1359 chain.reverse()
1353 1360 return chain, stopped
1354 1361
1355 1362 def ancestors(self, revs, stoprev=0, inclusive=False):
1356 1363 """Generate the ancestors of 'revs' in reverse revision order.
1357 1364 Does not generate revs lower than stoprev.
1358 1365
1359 1366 See the documentation for ancestor.lazyancestors for more details."""
1360 1367
1361 1368 # first, make sure start revisions aren't filtered
1362 1369 revs = list(revs)
1363 1370 checkrev = self.node
1364 1371 for r in revs:
1365 1372 checkrev(r)
1366 1373 # and we're sure ancestors aren't filtered as well
1367 1374
1368 1375 if rustancestor is not None and self.index.rust_ext_compat:
1369 1376 lazyancestors = rustancestor.LazyAncestors
1370 1377 arg = self.index
1371 1378 else:
1372 1379 lazyancestors = ancestor.lazyancestors
1373 1380 arg = self._uncheckedparentrevs
1374 1381 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1375 1382
1376 1383 def descendants(self, revs):
1377 1384 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1378 1385
1379 1386 def findcommonmissing(self, common=None, heads=None):
1380 1387 """Return a tuple of the ancestors of common and the ancestors of heads
1381 1388 that are not ancestors of common. In revset terminology, we return the
1382 1389 tuple:
1383 1390
1384 1391 ::common, (::heads) - (::common)
1385 1392
1386 1393 The list is sorted by revision number, meaning it is
1387 1394 topologically sorted.
1388 1395
1389 1396 'heads' and 'common' are both lists of node IDs. If heads is
1390 1397 not supplied, uses all of the revlog's heads. If common is not
1391 1398 supplied, uses nullid."""
1392 1399 if common is None:
1393 1400 common = [self.nullid]
1394 1401 if heads is None:
1395 1402 heads = self.heads()
1396 1403
1397 1404 common = [self.rev(n) for n in common]
1398 1405 heads = [self.rev(n) for n in heads]
1399 1406
1400 1407 # we want the ancestors, but inclusive
1401 1408 class lazyset:
1402 1409 def __init__(self, lazyvalues):
1403 1410 self.addedvalues = set()
1404 1411 self.lazyvalues = lazyvalues
1405 1412
1406 1413 def __contains__(self, value):
1407 1414 return value in self.addedvalues or value in self.lazyvalues
1408 1415
1409 1416 def __iter__(self):
1410 1417 added = self.addedvalues
1411 1418 for r in added:
1412 1419 yield r
1413 1420 for r in self.lazyvalues:
1414 1421 if not r in added:
1415 1422 yield r
1416 1423
1417 1424 def add(self, value):
1418 1425 self.addedvalues.add(value)
1419 1426
1420 1427 def update(self, values):
1421 1428 self.addedvalues.update(values)
1422 1429
1423 1430 has = lazyset(self.ancestors(common))
1424 1431 has.add(nullrev)
1425 1432 has.update(common)
1426 1433
1427 1434 # take all ancestors from heads that aren't in has
1428 1435 missing = set()
1429 1436 visit = collections.deque(r for r in heads if r not in has)
1430 1437 while visit:
1431 1438 r = visit.popleft()
1432 1439 if r in missing:
1433 1440 continue
1434 1441 else:
1435 1442 missing.add(r)
1436 1443 for p in self.parentrevs(r):
1437 1444 if p not in has:
1438 1445 visit.append(p)
1439 1446 missing = list(missing)
1440 1447 missing.sort()
1441 1448 return has, [self.node(miss) for miss in missing]
1442 1449
1443 1450 def incrementalmissingrevs(self, common=None):
1444 1451 """Return an object that can be used to incrementally compute the
1445 1452 revision numbers of the ancestors of arbitrary sets that are not
1446 1453 ancestors of common. This is an ancestor.incrementalmissingancestors
1447 1454 object.
1448 1455
1449 1456 'common' is a list of revision numbers. If common is not supplied, uses
1450 1457 nullrev.
1451 1458 """
1452 1459 if common is None:
1453 1460 common = [nullrev]
1454 1461
1455 1462 if rustancestor is not None and self.index.rust_ext_compat:
1456 1463 return rustancestor.MissingAncestors(self.index, common)
1457 1464 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1458 1465
1459 1466 def findmissingrevs(self, common=None, heads=None):
1460 1467 """Return the revision numbers of the ancestors of heads that
1461 1468 are not ancestors of common.
1462 1469
1463 1470 More specifically, return a list of revision numbers corresponding to
1464 1471 nodes N such that every N satisfies the following constraints:
1465 1472
1466 1473 1. N is an ancestor of some node in 'heads'
1467 1474 2. N is not an ancestor of any node in 'common'
1468 1475
1469 1476 The list is sorted by revision number, meaning it is
1470 1477 topologically sorted.
1471 1478
1472 1479 'heads' and 'common' are both lists of revision numbers. If heads is
1473 1480 not supplied, uses all of the revlog's heads. If common is not
1474 1481 supplied, uses nullid."""
1475 1482 if common is None:
1476 1483 common = [nullrev]
1477 1484 if heads is None:
1478 1485 heads = self.headrevs()
1479 1486
1480 1487 inc = self.incrementalmissingrevs(common=common)
1481 1488 return inc.missingancestors(heads)
1482 1489
1483 1490 def findmissing(self, common=None, heads=None):
1484 1491 """Return the ancestors of heads that are not ancestors of common.
1485 1492
1486 1493 More specifically, return a list of nodes N such that every N
1487 1494 satisfies the following constraints:
1488 1495
1489 1496 1. N is an ancestor of some node in 'heads'
1490 1497 2. N is not an ancestor of any node in 'common'
1491 1498
1492 1499 The list is sorted by revision number, meaning it is
1493 1500 topologically sorted.
1494 1501
1495 1502 'heads' and 'common' are both lists of node IDs. If heads is
1496 1503 not supplied, uses all of the revlog's heads. If common is not
1497 1504 supplied, uses nullid."""
1498 1505 if common is None:
1499 1506 common = [self.nullid]
1500 1507 if heads is None:
1501 1508 heads = self.heads()
1502 1509
1503 1510 common = [self.rev(n) for n in common]
1504 1511 heads = [self.rev(n) for n in heads]
1505 1512
1506 1513 inc = self.incrementalmissingrevs(common=common)
1507 1514 return [self.node(r) for r in inc.missingancestors(heads)]
1508 1515
1509 1516 def nodesbetween(self, roots=None, heads=None):
1510 1517 """Return a topological path from 'roots' to 'heads'.
1511 1518
1512 1519 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1513 1520 topologically sorted list of all nodes N that satisfy both of
1514 1521 these constraints:
1515 1522
1516 1523 1. N is a descendant of some node in 'roots'
1517 1524 2. N is an ancestor of some node in 'heads'
1518 1525
1519 1526 Every node is considered to be both a descendant and an ancestor
1520 1527 of itself, so every reachable node in 'roots' and 'heads' will be
1521 1528 included in 'nodes'.
1522 1529
1523 1530 'outroots' is the list of reachable nodes in 'roots', i.e., the
1524 1531 subset of 'roots' that is returned in 'nodes'. Likewise,
1525 1532 'outheads' is the subset of 'heads' that is also in 'nodes'.
1526 1533
1527 1534 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1528 1535 unspecified, uses nullid as the only root. If 'heads' is
1529 1536 unspecified, uses list of all of the revlog's heads."""
1530 1537 nonodes = ([], [], [])
1531 1538 if roots is not None:
1532 1539 roots = list(roots)
1533 1540 if not roots:
1534 1541 return nonodes
1535 1542 lowestrev = min([self.rev(n) for n in roots])
1536 1543 else:
1537 1544 roots = [self.nullid] # Everybody's a descendant of nullid
1538 1545 lowestrev = nullrev
1539 1546 if (lowestrev == nullrev) and (heads is None):
1540 1547 # We want _all_ the nodes!
1541 1548 return (
1542 1549 [self.node(r) for r in self],
1543 1550 [self.nullid],
1544 1551 list(self.heads()),
1545 1552 )
1546 1553 if heads is None:
1547 1554 # All nodes are ancestors, so the latest ancestor is the last
1548 1555 # node.
1549 1556 highestrev = len(self) - 1
1550 1557 # Set ancestors to None to signal that every node is an ancestor.
1551 1558 ancestors = None
1552 1559 # Set heads to an empty dictionary for later discovery of heads
1553 1560 heads = {}
1554 1561 else:
1555 1562 heads = list(heads)
1556 1563 if not heads:
1557 1564 return nonodes
1558 1565 ancestors = set()
1559 1566 # Turn heads into a dictionary so we can remove 'fake' heads.
1560 1567 # Also, later we will be using it to filter out the heads we can't
1561 1568 # find from roots.
1562 1569 heads = dict.fromkeys(heads, False)
1563 1570 # Start at the top and keep marking parents until we're done.
1564 1571 nodestotag = set(heads)
1565 1572 # Remember where the top was so we can use it as a limit later.
1566 1573 highestrev = max([self.rev(n) for n in nodestotag])
1567 1574 while nodestotag:
1568 1575 # grab a node to tag
1569 1576 n = nodestotag.pop()
1570 1577 # Never tag nullid
1571 1578 if n == self.nullid:
1572 1579 continue
1573 1580 # A node's revision number represents its place in a
1574 1581 # topologically sorted list of nodes.
1575 1582 r = self.rev(n)
1576 1583 if r >= lowestrev:
1577 1584 if n not in ancestors:
1578 1585 # If we are possibly a descendant of one of the roots
1579 1586 # and we haven't already been marked as an ancestor
1580 1587 ancestors.add(n) # Mark as ancestor
1581 1588 # Add non-nullid parents to list of nodes to tag.
1582 1589 nodestotag.update(
1583 1590 [p for p in self.parents(n) if p != self.nullid]
1584 1591 )
1585 1592 elif n in heads: # We've seen it before, is it a fake head?
1586 1593 # So it is, real heads should not be the ancestors of
1587 1594 # any other heads.
1588 1595 heads.pop(n)
1589 1596 if not ancestors:
1590 1597 return nonodes
1591 1598 # Now that we have our set of ancestors, we want to remove any
1592 1599 # roots that are not ancestors.
1593 1600
1594 1601 # If one of the roots was nullid, everything is included anyway.
1595 1602 if lowestrev > nullrev:
1596 1603 # But, since we weren't, let's recompute the lowest rev to not
1597 1604 # include roots that aren't ancestors.
1598 1605
1599 1606 # Filter out roots that aren't ancestors of heads
1600 1607 roots = [root for root in roots if root in ancestors]
1601 1608 # Recompute the lowest revision
1602 1609 if roots:
1603 1610 lowestrev = min([self.rev(root) for root in roots])
1604 1611 else:
1605 1612 # No more roots? Return empty list
1606 1613 return nonodes
1607 1614 else:
1608 1615 # We are descending from nullid, and don't need to care about
1609 1616 # any other roots.
1610 1617 lowestrev = nullrev
1611 1618 roots = [self.nullid]
1612 1619 # Transform our roots list into a set.
1613 1620 descendants = set(roots)
1614 1621 # Also, keep the original roots so we can filter out roots that aren't
1615 1622 # 'real' roots (i.e. are descended from other roots).
1616 1623 roots = descendants.copy()
1617 1624 # Our topologically sorted list of output nodes.
1618 1625 orderedout = []
1619 1626 # Don't start at nullid since we don't want nullid in our output list,
1620 1627 # and if nullid shows up in descendants, empty parents will look like
1621 1628 # they're descendants.
1622 1629 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1623 1630 n = self.node(r)
1624 1631 isdescendant = False
1625 1632 if lowestrev == nullrev: # Everybody is a descendant of nullid
1626 1633 isdescendant = True
1627 1634 elif n in descendants:
1628 1635 # n is already a descendant
1629 1636 isdescendant = True
1630 1637 # This check only needs to be done here because all the roots
1631 1638 # will start being marked is descendants before the loop.
1632 1639 if n in roots:
1633 1640 # If n was a root, check if it's a 'real' root.
1634 1641 p = tuple(self.parents(n))
1635 1642 # If any of its parents are descendants, it's not a root.
1636 1643 if (p[0] in descendants) or (p[1] in descendants):
1637 1644 roots.remove(n)
1638 1645 else:
1639 1646 p = tuple(self.parents(n))
1640 1647 # A node is a descendant if either of its parents are
1641 1648 # descendants. (We seeded the dependents list with the roots
1642 1649 # up there, remember?)
1643 1650 if (p[0] in descendants) or (p[1] in descendants):
1644 1651 descendants.add(n)
1645 1652 isdescendant = True
1646 1653 if isdescendant and ((ancestors is None) or (n in ancestors)):
1647 1654 # Only include nodes that are both descendants and ancestors.
1648 1655 orderedout.append(n)
1649 1656 if (ancestors is not None) and (n in heads):
1650 1657 # We're trying to figure out which heads are reachable
1651 1658 # from roots.
1652 1659 # Mark this head as having been reached
1653 1660 heads[n] = True
1654 1661 elif ancestors is None:
1655 1662 # Otherwise, we're trying to discover the heads.
1656 1663 # Assume this is a head because if it isn't, the next step
1657 1664 # will eventually remove it.
1658 1665 heads[n] = True
1659 1666 # But, obviously its parents aren't.
1660 1667 for p in self.parents(n):
1661 1668 heads.pop(p, None)
1662 1669 heads = [head for head, flag in heads.items() if flag]
1663 1670 roots = list(roots)
1664 1671 assert orderedout
1665 1672 assert roots
1666 1673 assert heads
1667 1674 return (orderedout, roots, heads)
1668 1675
1669 1676 def headrevs(self, revs=None):
1670 1677 if revs is None:
1671 1678 try:
1672 1679 return self.index.headrevs()
1673 1680 except AttributeError:
1674 1681 return self._headrevs()
1675 1682 if rustdagop is not None and self.index.rust_ext_compat:
1676 1683 return rustdagop.headrevs(self.index, revs)
1677 1684 return dagop.headrevs(revs, self._uncheckedparentrevs)
1678 1685
1679 1686 def computephases(self, roots):
1680 1687 return self.index.computephasesmapsets(roots)
1681 1688
1682 1689 def _headrevs(self):
1683 1690 count = len(self)
1684 1691 if not count:
1685 1692 return [nullrev]
1686 1693 # we won't iter over filtered rev so nobody is a head at start
1687 1694 ishead = [0] * (count + 1)
1688 1695 index = self.index
1689 1696 for r in self:
1690 1697 ishead[r] = 1 # I may be an head
1691 1698 e = index[r]
1692 1699 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1693 1700 return [r for r, val in enumerate(ishead) if val]
1694 1701
1695 1702 def heads(self, start=None, stop=None):
1696 1703 """return the list of all nodes that have no children
1697 1704
1698 1705 if start is specified, only heads that are descendants of
1699 1706 start will be returned
1700 1707 if stop is specified, it will consider all the revs from stop
1701 1708 as if they had no children
1702 1709 """
1703 1710 if start is None and stop is None:
1704 1711 if not len(self):
1705 1712 return [self.nullid]
1706 1713 return [self.node(r) for r in self.headrevs()]
1707 1714
1708 1715 if start is None:
1709 1716 start = nullrev
1710 1717 else:
1711 1718 start = self.rev(start)
1712 1719
1713 1720 stoprevs = {self.rev(n) for n in stop or []}
1714 1721
1715 1722 revs = dagop.headrevssubset(
1716 1723 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1717 1724 )
1718 1725
1719 1726 return [self.node(rev) for rev in revs]
1720 1727
1721 1728 def children(self, node):
1722 1729 """find the children of a given node"""
1723 1730 c = []
1724 1731 p = self.rev(node)
1725 1732 for r in self.revs(start=p + 1):
1726 1733 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1727 1734 if prevs:
1728 1735 for pr in prevs:
1729 1736 if pr == p:
1730 1737 c.append(self.node(r))
1731 1738 elif p == nullrev:
1732 1739 c.append(self.node(r))
1733 1740 return c
1734 1741
1735 1742 def commonancestorsheads(self, a, b):
1736 1743 """calculate all the heads of the common ancestors of nodes a and b"""
1737 1744 a, b = self.rev(a), self.rev(b)
1738 1745 ancs = self._commonancestorsheads(a, b)
1739 1746 return pycompat.maplist(self.node, ancs)
1740 1747
1741 1748 def _commonancestorsheads(self, *revs):
1742 1749 """calculate all the heads of the common ancestors of revs"""
1743 1750 try:
1744 1751 ancs = self.index.commonancestorsheads(*revs)
1745 1752 except (AttributeError, OverflowError): # C implementation failed
1746 1753 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1747 1754 return ancs
1748 1755
1749 1756 def isancestor(self, a, b):
1750 1757 """return True if node a is an ancestor of node b
1751 1758
1752 1759 A revision is considered an ancestor of itself."""
1753 1760 a, b = self.rev(a), self.rev(b)
1754 1761 return self.isancestorrev(a, b)
1755 1762
1756 1763 def isancestorrev(self, a, b):
1757 1764 """return True if revision a is an ancestor of revision b
1758 1765
1759 1766 A revision is considered an ancestor of itself.
1760 1767
1761 1768 The implementation of this is trivial but the use of
1762 1769 reachableroots is not."""
1763 1770 if a == nullrev:
1764 1771 return True
1765 1772 elif a == b:
1766 1773 return True
1767 1774 elif a > b:
1768 1775 return False
1769 1776 return bool(self.reachableroots(a, [b], [a], includepath=False))
1770 1777
1771 1778 def reachableroots(self, minroot, heads, roots, includepath=False):
1772 1779 """return (heads(::(<roots> and <roots>::<heads>)))
1773 1780
1774 1781 If includepath is True, return (<roots>::<heads>)."""
1775 1782 try:
1776 1783 return self.index.reachableroots2(
1777 1784 minroot, heads, roots, includepath
1778 1785 )
1779 1786 except AttributeError:
1780 1787 return dagop._reachablerootspure(
1781 1788 self.parentrevs, minroot, roots, heads, includepath
1782 1789 )
1783 1790
1784 1791 def ancestor(self, a, b):
1785 1792 """calculate the "best" common ancestor of nodes a and b"""
1786 1793
1787 1794 a, b = self.rev(a), self.rev(b)
1788 1795 try:
1789 1796 ancs = self.index.ancestors(a, b)
1790 1797 except (AttributeError, OverflowError):
1791 1798 ancs = ancestor.ancestors(self.parentrevs, a, b)
1792 1799 if ancs:
1793 1800 # choose a consistent winner when there's a tie
1794 1801 return min(map(self.node, ancs))
1795 1802 return self.nullid
1796 1803
1797 1804 def _match(self, id):
1798 1805 if isinstance(id, int):
1799 1806 # rev
1800 1807 return self.node(id)
1801 1808 if len(id) == self.nodeconstants.nodelen:
1802 1809 # possibly a binary node
1803 1810 # odds of a binary node being all hex in ASCII are 1 in 10**25
1804 1811 try:
1805 1812 node = id
1806 1813 self.rev(node) # quick search the index
1807 1814 return node
1808 1815 except error.LookupError:
1809 1816 pass # may be partial hex id
1810 1817 try:
1811 1818 # str(rev)
1812 1819 rev = int(id)
1813 1820 if b"%d" % rev != id:
1814 1821 raise ValueError
1815 1822 if rev < 0:
1816 1823 rev = len(self) + rev
1817 1824 if rev < 0 or rev >= len(self):
1818 1825 raise ValueError
1819 1826 return self.node(rev)
1820 1827 except (ValueError, OverflowError):
1821 1828 pass
1822 1829 if len(id) == 2 * self.nodeconstants.nodelen:
1823 1830 try:
1824 1831 # a full hex nodeid?
1825 1832 node = bin(id)
1826 1833 self.rev(node)
1827 1834 return node
1828 1835 except (binascii.Error, error.LookupError):
1829 1836 pass
1830 1837
1831 1838 def _partialmatch(self, id):
1832 1839 # we don't care wdirfilenodeids as they should be always full hash
1833 1840 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1834 1841 ambiguous = False
1835 1842 try:
1836 1843 partial = self.index.partialmatch(id)
1837 1844 if partial and self.hasnode(partial):
1838 1845 if maybewdir:
1839 1846 # single 'ff...' match in radix tree, ambiguous with wdir
1840 1847 ambiguous = True
1841 1848 else:
1842 1849 return partial
1843 1850 elif maybewdir:
1844 1851 # no 'ff...' match in radix tree, wdir identified
1845 1852 raise error.WdirUnsupported
1846 1853 else:
1847 1854 return None
1848 1855 except error.RevlogError:
1849 1856 # parsers.c radix tree lookup gave multiple matches
1850 1857 # fast path: for unfiltered changelog, radix tree is accurate
1851 1858 if not getattr(self, 'filteredrevs', None):
1852 1859 ambiguous = True
1853 1860 # fall through to slow path that filters hidden revisions
1854 1861 except (AttributeError, ValueError):
1855 1862 # we are pure python, or key is not hex
1856 1863 pass
1857 1864 if ambiguous:
1858 1865 raise error.AmbiguousPrefixLookupError(
1859 1866 id, self.display_id, _(b'ambiguous identifier')
1860 1867 )
1861 1868
1862 1869 if id in self._pcache:
1863 1870 return self._pcache[id]
1864 1871
1865 1872 if len(id) <= 40:
1866 1873 # hex(node)[:...]
1867 1874 l = len(id) // 2 * 2 # grab an even number of digits
1868 1875 try:
1869 1876 # we're dropping the last digit, so let's check that it's hex,
1870 1877 # to avoid the expensive computation below if it's not
1871 1878 if len(id) % 2 > 0:
1872 1879 if not (id[-1] in hexdigits):
1873 1880 return None
1874 1881 prefix = bin(id[:l])
1875 1882 except binascii.Error:
1876 1883 pass
1877 1884 else:
1878 1885 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1879 1886 nl = [
1880 1887 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1881 1888 ]
1882 1889 if self.nodeconstants.nullhex.startswith(id):
1883 1890 nl.append(self.nullid)
1884 1891 if len(nl) > 0:
1885 1892 if len(nl) == 1 and not maybewdir:
1886 1893 self._pcache[id] = nl[0]
1887 1894 return nl[0]
1888 1895 raise error.AmbiguousPrefixLookupError(
1889 1896 id, self.display_id, _(b'ambiguous identifier')
1890 1897 )
1891 1898 if maybewdir:
1892 1899 raise error.WdirUnsupported
1893 1900 return None
1894 1901
1895 1902 def lookup(self, id):
1896 1903 """locate a node based on:
1897 1904 - revision number or str(revision number)
1898 1905 - nodeid or subset of hex nodeid
1899 1906 """
1900 1907 n = self._match(id)
1901 1908 if n is not None:
1902 1909 return n
1903 1910 n = self._partialmatch(id)
1904 1911 if n:
1905 1912 return n
1906 1913
1907 1914 raise error.LookupError(id, self.display_id, _(b'no match found'))
1908 1915
1909 1916 def shortest(self, node, minlength=1):
1910 1917 """Find the shortest unambiguous prefix that matches node."""
1911 1918
1912 1919 def isvalid(prefix):
1913 1920 try:
1914 1921 matchednode = self._partialmatch(prefix)
1915 1922 except error.AmbiguousPrefixLookupError:
1916 1923 return False
1917 1924 except error.WdirUnsupported:
1918 1925 # single 'ff...' match
1919 1926 return True
1920 1927 if matchednode is None:
1921 1928 raise error.LookupError(node, self.display_id, _(b'no node'))
1922 1929 return True
1923 1930
1924 1931 def maybewdir(prefix):
1925 1932 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1926 1933
1927 1934 hexnode = hex(node)
1928 1935
1929 1936 def disambiguate(hexnode, minlength):
1930 1937 """Disambiguate against wdirid."""
1931 1938 for length in range(minlength, len(hexnode) + 1):
1932 1939 prefix = hexnode[:length]
1933 1940 if not maybewdir(prefix):
1934 1941 return prefix
1935 1942
1936 1943 if not getattr(self, 'filteredrevs', None):
1937 1944 try:
1938 1945 length = max(self.index.shortest(node), minlength)
1939 1946 return disambiguate(hexnode, length)
1940 1947 except error.RevlogError:
1941 1948 if node != self.nodeconstants.wdirid:
1942 1949 raise error.LookupError(
1943 1950 node, self.display_id, _(b'no node')
1944 1951 )
1945 1952 except AttributeError:
1946 1953 # Fall through to pure code
1947 1954 pass
1948 1955
1949 1956 if node == self.nodeconstants.wdirid:
1950 1957 for length in range(minlength, len(hexnode) + 1):
1951 1958 prefix = hexnode[:length]
1952 1959 if isvalid(prefix):
1953 1960 return prefix
1954 1961
1955 1962 for length in range(minlength, len(hexnode) + 1):
1956 1963 prefix = hexnode[:length]
1957 1964 if isvalid(prefix):
1958 1965 return disambiguate(hexnode, length)
1959 1966
1960 1967 def cmp(self, node, text):
1961 1968 """compare text with a given file revision
1962 1969
1963 1970 returns True if text is different than what is stored.
1964 1971 """
1965 1972 p1, p2 = self.parents(node)
1966 1973 return storageutil.hashrevisionsha1(text, p1, p2) != node
1967 1974
1968 1975 def _getsegmentforrevs(self, startrev, endrev):
1969 1976 """Obtain a segment of raw data corresponding to a range of revisions.
1970 1977
1971 1978 Accepts the start and end revisions and an optional already-open
1972 1979 file handle to be used for reading. If the file handle is read, its
1973 1980 seek position will not be preserved.
1974 1981
1975 1982 Requests for data may be satisfied by a cache.
1976 1983
1977 1984 Returns a 2-tuple of (offset, data) for the requested range of
1978 1985 revisions. Offset is the integer offset from the beginning of the
1979 1986 revlog and data is a str or buffer of the raw byte data.
1980 1987
1981 1988 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1982 1989 to determine where each revision's data begins and ends.
1983 1990 """
1984 1991 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1985 1992 # (functions are expensive).
1986 1993 index = self.index
1987 1994 istart = index[startrev]
1988 1995 start = int(istart[0] >> 16)
1989 1996 if startrev == endrev:
1990 1997 end = start + istart[1]
1991 1998 else:
1992 1999 iend = index[endrev]
1993 2000 end = int(iend[0] >> 16) + iend[1]
1994 2001
1995 2002 if self._inline:
1996 2003 start += (startrev + 1) * self.index.entry_size
1997 2004 end += (endrev + 1) * self.index.entry_size
1998 2005 length = end - start
1999 2006
2000 2007 return start, self._segmentfile.read_chunk(start, length)
2001 2008
2002 2009 def _chunk(self, rev):
2003 2010 """Obtain a single decompressed chunk for a revision.
2004 2011
2005 2012 Accepts an integer revision and an optional already-open file handle
2006 2013 to be used for reading. If used, the seek position of the file will not
2007 2014 be preserved.
2008 2015
2009 2016 Returns a str holding uncompressed data for the requested revision.
2010 2017 """
2011 2018 compression_mode = self.index[rev][10]
2012 2019 data = self._getsegmentforrevs(rev, rev)[1]
2013 2020 if compression_mode == COMP_MODE_PLAIN:
2014 2021 return data
2015 2022 elif compression_mode == COMP_MODE_DEFAULT:
2016 2023 return self._decompressor(data)
2017 2024 elif compression_mode == COMP_MODE_INLINE:
2018 2025 return self.decompress(data)
2019 2026 else:
2020 2027 msg = b'unknown compression mode %d'
2021 2028 msg %= compression_mode
2022 2029 raise error.RevlogError(msg)
2023 2030
2024 2031 def _chunks(self, revs, targetsize=None):
2025 2032 """Obtain decompressed chunks for the specified revisions.
2026 2033
2027 2034 Accepts an iterable of numeric revisions that are assumed to be in
2028 2035 ascending order. Also accepts an optional already-open file handle
2029 2036 to be used for reading. If used, the seek position of the file will
2030 2037 not be preserved.
2031 2038
2032 2039 This function is similar to calling ``self._chunk()`` multiple times,
2033 2040 but is faster.
2034 2041
2035 2042 Returns a list with decompressed data for each requested revision.
2036 2043 """
2037 2044 if not revs:
2038 2045 return []
2039 2046 start = self.start
2040 2047 length = self.length
2041 2048 inline = self._inline
2042 2049 iosize = self.index.entry_size
2043 2050 buffer = util.buffer
2044 2051
2045 2052 l = []
2046 2053 ladd = l.append
2047 2054
2048 2055 if not self._withsparseread:
2049 2056 slicedchunks = (revs,)
2050 2057 else:
2051 2058 slicedchunks = deltautil.slicechunk(
2052 2059 self, revs, targetsize=targetsize
2053 2060 )
2054 2061
2055 2062 for revschunk in slicedchunks:
2056 2063 firstrev = revschunk[0]
2057 2064 # Skip trailing revisions with empty diff
2058 2065 for lastrev in revschunk[::-1]:
2059 2066 if length(lastrev) != 0:
2060 2067 break
2061 2068
2062 2069 try:
2063 2070 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2064 2071 except OverflowError:
2065 2072 # issue4215 - we can't cache a run of chunks greater than
2066 2073 # 2G on Windows
2067 2074 return [self._chunk(rev) for rev in revschunk]
2068 2075
2069 2076 decomp = self.decompress
2070 2077 # self._decompressor might be None, but will not be used in that case
2071 2078 def_decomp = self._decompressor
2072 2079 for rev in revschunk:
2073 2080 chunkstart = start(rev)
2074 2081 if inline:
2075 2082 chunkstart += (rev + 1) * iosize
2076 2083 chunklength = length(rev)
2077 2084 comp_mode = self.index[rev][10]
2078 2085 c = buffer(data, chunkstart - offset, chunklength)
2079 2086 if comp_mode == COMP_MODE_PLAIN:
2080 2087 ladd(c)
2081 2088 elif comp_mode == COMP_MODE_INLINE:
2082 2089 ladd(decomp(c))
2083 2090 elif comp_mode == COMP_MODE_DEFAULT:
2084 2091 ladd(def_decomp(c))
2085 2092 else:
2086 2093 msg = b'unknown compression mode %d'
2087 2094 msg %= comp_mode
2088 2095 raise error.RevlogError(msg)
2089 2096
2090 2097 return l
2091 2098
2092 2099 def deltaparent(self, rev):
2093 2100 """return deltaparent of the given revision"""
2094 2101 base = self.index[rev][3]
2095 2102 if base == rev:
2096 2103 return nullrev
2097 2104 elif self._generaldelta:
2098 2105 return base
2099 2106 else:
2100 2107 return rev - 1
2101 2108
2102 2109 def issnapshot(self, rev):
2103 2110 """tells whether rev is a snapshot"""
2104 2111 if not self._sparserevlog:
2105 2112 return self.deltaparent(rev) == nullrev
2106 2113 elif hasattr(self.index, 'issnapshot'):
2107 2114 # directly assign the method to cache the testing and access
2108 2115 self.issnapshot = self.index.issnapshot
2109 2116 return self.issnapshot(rev)
2110 2117 if rev == nullrev:
2111 2118 return True
2112 2119 entry = self.index[rev]
2113 2120 base = entry[3]
2114 2121 if base == rev:
2115 2122 return True
2116 2123 if base == nullrev:
2117 2124 return True
2118 2125 p1 = entry[5]
2119 2126 while self.length(p1) == 0:
2120 2127 b = self.deltaparent(p1)
2121 2128 if b == p1:
2122 2129 break
2123 2130 p1 = b
2124 2131 p2 = entry[6]
2125 2132 while self.length(p2) == 0:
2126 2133 b = self.deltaparent(p2)
2127 2134 if b == p2:
2128 2135 break
2129 2136 p2 = b
2130 2137 if base == p1 or base == p2:
2131 2138 return False
2132 2139 return self.issnapshot(base)
2133 2140
2134 2141 def snapshotdepth(self, rev):
2135 2142 """number of snapshot in the chain before this one"""
2136 2143 if not self.issnapshot(rev):
2137 2144 raise error.ProgrammingError(b'revision %d not a snapshot')
2138 2145 return len(self._deltachain(rev)[0]) - 1
2139 2146
2140 2147 def revdiff(self, rev1, rev2):
2141 2148 """return or calculate a delta between two revisions
2142 2149
2143 2150 The delta calculated is in binary form and is intended to be written to
2144 2151 revlog data directly. So this function needs raw revision data.
2145 2152 """
2146 2153 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2147 2154 return bytes(self._chunk(rev2))
2148 2155
2149 2156 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2150 2157
2151 2158 def revision(self, nodeorrev):
2152 2159 """return an uncompressed revision of a given node or revision
2153 2160 number.
2154 2161 """
2155 2162 return self._revisiondata(nodeorrev)
2156 2163
2157 2164 def sidedata(self, nodeorrev):
2158 2165 """a map of extra data related to the changeset but not part of the hash
2159 2166
2160 2167 This function currently return a dictionary. However, more advanced
2161 2168 mapping object will likely be used in the future for a more
2162 2169 efficient/lazy code.
2163 2170 """
2164 2171 # deal with <nodeorrev> argument type
2165 2172 if isinstance(nodeorrev, int):
2166 2173 rev = nodeorrev
2167 2174 else:
2168 2175 rev = self.rev(nodeorrev)
2169 2176 return self._sidedata(rev)
2170 2177
2171 2178 def _revisiondata(self, nodeorrev, raw=False):
2172 2179 # deal with <nodeorrev> argument type
2173 2180 if isinstance(nodeorrev, int):
2174 2181 rev = nodeorrev
2175 2182 node = self.node(rev)
2176 2183 else:
2177 2184 node = nodeorrev
2178 2185 rev = None
2179 2186
2180 2187 # fast path the special `nullid` rev
2181 2188 if node == self.nullid:
2182 2189 return b""
2183 2190
2184 2191 # ``rawtext`` is the text as stored inside the revlog. Might be the
2185 2192 # revision or might need to be processed to retrieve the revision.
2186 2193 rev, rawtext, validated = self._rawtext(node, rev)
2187 2194
2188 2195 if raw and validated:
2189 2196 # if we don't want to process the raw text and that raw
2190 2197 # text is cached, we can exit early.
2191 2198 return rawtext
2192 2199 if rev is None:
2193 2200 rev = self.rev(node)
2194 2201 # the revlog's flag for this revision
2195 2202 # (usually alter its state or content)
2196 2203 flags = self.flags(rev)
2197 2204
2198 2205 if validated and flags == REVIDX_DEFAULT_FLAGS:
2199 2206 # no extra flags set, no flag processor runs, text = rawtext
2200 2207 return rawtext
2201 2208
2202 2209 if raw:
2203 2210 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2204 2211 text = rawtext
2205 2212 else:
2206 2213 r = flagutil.processflagsread(self, rawtext, flags)
2207 2214 text, validatehash = r
2208 2215 if validatehash:
2209 2216 self.checkhash(text, node, rev=rev)
2210 2217 if not validated:
2211 2218 self._revisioncache = (node, rev, rawtext)
2212 2219
2213 2220 return text
2214 2221
2215 2222 def _rawtext(self, node, rev):
2216 2223 """return the possibly unvalidated rawtext for a revision
2217 2224
2218 2225 returns (rev, rawtext, validated)
2219 2226 """
2220 2227
2221 2228 # revision in the cache (could be useful to apply delta)
2222 2229 cachedrev = None
2223 2230 # An intermediate text to apply deltas to
2224 2231 basetext = None
2225 2232
2226 2233 # Check if we have the entry in cache
2227 2234 # The cache entry looks like (node, rev, rawtext)
2228 2235 if self._revisioncache:
2229 2236 if self._revisioncache[0] == node:
2230 2237 return (rev, self._revisioncache[2], True)
2231 2238 cachedrev = self._revisioncache[1]
2232 2239
2233 2240 if rev is None:
2234 2241 rev = self.rev(node)
2235 2242
2236 2243 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2237 2244 if stopped:
2238 2245 basetext = self._revisioncache[2]
2239 2246
2240 2247 # drop cache to save memory, the caller is expected to
2241 2248 # update self._revisioncache after validating the text
2242 2249 self._revisioncache = None
2243 2250
2244 2251 targetsize = None
2245 2252 rawsize = self.index[rev][2]
2246 2253 if 0 <= rawsize:
2247 2254 targetsize = 4 * rawsize
2248 2255
2249 2256 bins = self._chunks(chain, targetsize=targetsize)
2250 2257 if basetext is None:
2251 2258 basetext = bytes(bins[0])
2252 2259 bins = bins[1:]
2253 2260
2254 2261 rawtext = mdiff.patches(basetext, bins)
2255 2262 del basetext # let us have a chance to free memory early
2256 2263 return (rev, rawtext, False)
2257 2264
2258 2265 def _sidedata(self, rev):
2259 2266 """Return the sidedata for a given revision number."""
2260 2267 index_entry = self.index[rev]
2261 2268 sidedata_offset = index_entry[8]
2262 2269 sidedata_size = index_entry[9]
2263 2270
2264 2271 if self._inline:
2265 2272 sidedata_offset += self.index.entry_size * (1 + rev)
2266 2273 if sidedata_size == 0:
2267 2274 return {}
2268 2275
2269 2276 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2270 2277 filename = self._sidedatafile
2271 2278 end = self._docket.sidedata_end
2272 2279 offset = sidedata_offset
2273 2280 length = sidedata_size
2274 2281 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2275 2282 raise error.RevlogError(m)
2276 2283
2277 2284 comp_segment = self._segmentfile_sidedata.read_chunk(
2278 2285 sidedata_offset, sidedata_size
2279 2286 )
2280 2287
2281 2288 comp = self.index[rev][11]
2282 2289 if comp == COMP_MODE_PLAIN:
2283 2290 segment = comp_segment
2284 2291 elif comp == COMP_MODE_DEFAULT:
2285 2292 segment = self._decompressor(comp_segment)
2286 2293 elif comp == COMP_MODE_INLINE:
2287 2294 segment = self.decompress(comp_segment)
2288 2295 else:
2289 2296 msg = b'unknown compression mode %d'
2290 2297 msg %= comp
2291 2298 raise error.RevlogError(msg)
2292 2299
2293 2300 sidedata = sidedatautil.deserialize_sidedata(segment)
2294 2301 return sidedata
2295 2302
2296 2303 def rawdata(self, nodeorrev):
2297 2304 """return an uncompressed raw data of a given node or revision number."""
2298 2305 return self._revisiondata(nodeorrev, raw=True)
2299 2306
2300 2307 def hash(self, text, p1, p2):
2301 2308 """Compute a node hash.
2302 2309
2303 2310 Available as a function so that subclasses can replace the hash
2304 2311 as needed.
2305 2312 """
2306 2313 return storageutil.hashrevisionsha1(text, p1, p2)
2307 2314
2308 2315 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2309 2316 """Check node hash integrity.
2310 2317
2311 2318 Available as a function so that subclasses can extend hash mismatch
2312 2319 behaviors as needed.
2313 2320 """
2314 2321 try:
2315 2322 if p1 is None and p2 is None:
2316 2323 p1, p2 = self.parents(node)
2317 2324 if node != self.hash(text, p1, p2):
2318 2325 # Clear the revision cache on hash failure. The revision cache
2319 2326 # only stores the raw revision and clearing the cache does have
2320 2327 # the side-effect that we won't have a cache hit when the raw
2321 2328 # revision data is accessed. But this case should be rare and
2322 2329 # it is extra work to teach the cache about the hash
2323 2330 # verification state.
2324 2331 if self._revisioncache and self._revisioncache[0] == node:
2325 2332 self._revisioncache = None
2326 2333
2327 2334 revornode = rev
2328 2335 if revornode is None:
2329 2336 revornode = templatefilters.short(hex(node))
2330 2337 raise error.RevlogError(
2331 2338 _(b"integrity check failed on %s:%s")
2332 2339 % (self.display_id, pycompat.bytestr(revornode))
2333 2340 )
2334 2341 except error.RevlogError:
2335 2342 if self._censorable and storageutil.iscensoredtext(text):
2336 2343 raise error.CensoredNodeError(self.display_id, node, text)
2337 2344 raise
2338 2345
2339 2346 @property
2340 2347 def _split_index_file(self):
2341 2348 """the path where to expect the index of an ongoing splitting operation
2342 2349
2343 2350 The file will only exist if a splitting operation is in progress, but
2344 2351 it is always expected at the same location."""
2345 2352 parts = self.radix.split(b'/')
2346 2353 if len(parts) > 1:
2347 2354 # adds a '-s' prefix to the ``data/` or `meta/` base
2348 2355 head = parts[0] + b'-s'
2349 2356 mids = parts[1:-1]
2350 2357 tail = parts[-1] + b'.i'
2351 2358 pieces = [head] + mids + [tail]
2352 2359 return b'/'.join(pieces)
2353 2360 else:
2354 2361 # the revlog is stored at the root of the store (changelog or
2355 2362 # manifest), no risk of collision.
2356 2363 return self.radix + b'.i.s'
2357 2364
2358 2365 def _enforceinlinesize(self, tr, side_write=True):
2359 2366 """Check if the revlog is too big for inline and convert if so.
2360 2367
2361 2368 This should be called after revisions are added to the revlog. If the
2362 2369 revlog has grown too large to be an inline revlog, it will convert it
2363 2370 to use multiple index and data files.
2364 2371 """
2365 2372 tiprev = len(self) - 1
2366 2373 total_size = self.start(tiprev) + self.length(tiprev)
2367 2374 if not self._inline or total_size < _maxinline:
2368 2375 return
2369 2376
2370 2377 troffset = tr.findoffset(self._indexfile)
2371 2378 if troffset is None:
2372 2379 raise error.RevlogError(
2373 2380 _(b"%s not found in the transaction") % self._indexfile
2374 2381 )
2375 2382 if troffset:
2376 2383 tr.addbackup(self._indexfile, for_offset=True)
2377 2384 tr.add(self._datafile, 0)
2378 2385
2379 2386 existing_handles = False
2380 2387 if self._writinghandles is not None:
2381 2388 existing_handles = True
2382 2389 fp = self._writinghandles[0]
2383 2390 fp.flush()
2384 2391 fp.close()
2385 2392 # We can't use the cached file handle after close(). So prevent
2386 2393 # its usage.
2387 2394 self._writinghandles = None
2388 2395 self._segmentfile.writing_handle = None
2389 2396 # No need to deal with sidedata writing handle as it is only
2390 2397 # relevant with revlog-v2 which is never inline, not reaching
2391 2398 # this code
2392 2399 if side_write:
2393 2400 old_index_file_path = self._indexfile
2394 2401 new_index_file_path = self._split_index_file
2395 2402 opener = self.opener
2396 2403 weak_self = weakref.ref(self)
2397 2404
2398 2405 # the "split" index replace the real index when the transaction is finalized
2399 2406 def finalize_callback(tr):
2400 2407 opener.rename(
2401 2408 new_index_file_path,
2402 2409 old_index_file_path,
2403 2410 checkambig=True,
2404 2411 )
2405 2412 maybe_self = weak_self()
2406 2413 if maybe_self is not None:
2407 2414 maybe_self._indexfile = old_index_file_path
2408 2415
2409 2416 def abort_callback(tr):
2410 2417 maybe_self = weak_self()
2411 2418 if maybe_self is not None:
2412 2419 maybe_self._indexfile = old_index_file_path
2413 2420
2414 2421 tr.registertmp(new_index_file_path)
2415 2422 if self.target[1] is not None:
2416 2423 callback_id = b'000-revlog-split-%d-%s' % self.target
2417 2424 else:
2418 2425 callback_id = b'000-revlog-split-%d' % self.target[0]
2419 2426 tr.addfinalize(callback_id, finalize_callback)
2420 2427 tr.addabort(callback_id, abort_callback)
2421 2428
2422 2429 new_dfh = self._datafp(b'w+')
2423 2430 new_dfh.truncate(0) # drop any potentially existing data
2424 2431 try:
2425 2432 with self.reading():
2426 2433 for r in self:
2427 2434 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2428 2435 new_dfh.flush()
2429 2436
2430 2437 if side_write:
2431 2438 self._indexfile = new_index_file_path
2432 2439 with self.__index_new_fp() as fp:
2433 2440 self._format_flags &= ~FLAG_INLINE_DATA
2434 2441 self._inline = False
2435 2442 for i in self:
2436 2443 e = self.index.entry_binary(i)
2437 2444 if i == 0 and self._docket is None:
2438 2445 header = self._format_flags | self._format_version
2439 2446 header = self.index.pack_header(header)
2440 2447 e = header + e
2441 2448 fp.write(e)
2442 2449 if self._docket is not None:
2443 2450 self._docket.index_end = fp.tell()
2444 2451
2445 2452 # If we don't use side-write, the temp file replace the real
2446 2453 # index when we exit the context manager
2447 2454
2448 2455 nodemaputil.setup_persistent_nodemap(tr, self)
2449 2456 self._segmentfile = randomaccessfile.randomaccessfile(
2450 2457 self.opener,
2451 2458 self._datafile,
2452 2459 self._chunkcachesize,
2453 2460 )
2454 2461
2455 2462 if existing_handles:
2456 2463 # switched from inline to conventional reopen the index
2457 2464 ifh = self.__index_write_fp()
2458 2465 self._writinghandles = (ifh, new_dfh, None)
2459 2466 self._segmentfile.writing_handle = new_dfh
2460 2467 new_dfh = None
2461 2468 # No need to deal with sidedata writing handle as it is only
2462 2469 # relevant with revlog-v2 which is never inline, not reaching
2463 2470 # this code
2464 2471 finally:
2465 2472 if new_dfh is not None:
2466 2473 new_dfh.close()
2467 2474
2468 2475 def _nodeduplicatecallback(self, transaction, node):
2469 2476 """called when trying to add a node already stored."""
2470 2477
2471 2478 @contextlib.contextmanager
2472 2479 def reading(self):
2473 2480 """Context manager that keeps data and sidedata files open for reading"""
2474 2481 if len(self.index) == 0:
2475 2482 yield # nothing to be read
2476 2483 else:
2477 2484 with self._segmentfile.reading():
2478 2485 with self._segmentfile_sidedata.reading():
2479 2486 yield
2480 2487
2481 2488 @contextlib.contextmanager
2482 2489 def _writing(self, transaction):
2483 2490 if self._trypending:
2484 2491 msg = b'try to write in a `trypending` revlog: %s'
2485 2492 msg %= self.display_id
2486 2493 raise error.ProgrammingError(msg)
2487 2494 if self._writinghandles is not None:
2488 2495 yield
2489 2496 else:
2490 2497 ifh = dfh = sdfh = None
2491 2498 try:
2492 2499 r = len(self)
2493 2500 # opening the data file.
2494 2501 dsize = 0
2495 2502 if r:
2496 2503 dsize = self.end(r - 1)
2497 2504 dfh = None
2498 2505 if not self._inline:
2499 2506 try:
2500 2507 dfh = self._datafp(b"r+")
2501 2508 if self._docket is None:
2502 2509 dfh.seek(0, os.SEEK_END)
2503 2510 else:
2504 2511 dfh.seek(self._docket.data_end, os.SEEK_SET)
2505 2512 except FileNotFoundError:
2506 2513 dfh = self._datafp(b"w+")
2507 2514 transaction.add(self._datafile, dsize)
2508 2515 if self._sidedatafile is not None:
2509 2516 # revlog-v2 does not inline, help Pytype
2510 2517 assert dfh is not None
2511 2518 try:
2512 2519 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2513 2520 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2514 2521 except FileNotFoundError:
2515 2522 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2516 2523 transaction.add(
2517 2524 self._sidedatafile, self._docket.sidedata_end
2518 2525 )
2519 2526
2520 2527 # opening the index file.
2521 2528 isize = r * self.index.entry_size
2522 2529 ifh = self.__index_write_fp()
2523 2530 if self._inline:
2524 2531 transaction.add(self._indexfile, dsize + isize)
2525 2532 else:
2526 2533 transaction.add(self._indexfile, isize)
2527 2534 # exposing all file handle for writing.
2528 2535 self._writinghandles = (ifh, dfh, sdfh)
2529 2536 self._segmentfile.writing_handle = ifh if self._inline else dfh
2530 2537 self._segmentfile_sidedata.writing_handle = sdfh
2531 2538 yield
2532 2539 if self._docket is not None:
2533 2540 self._write_docket(transaction)
2534 2541 finally:
2535 2542 self._writinghandles = None
2536 2543 self._segmentfile.writing_handle = None
2537 2544 self._segmentfile_sidedata.writing_handle = None
2538 2545 if dfh is not None:
2539 2546 dfh.close()
2540 2547 if sdfh is not None:
2541 2548 sdfh.close()
2542 2549 # closing the index file last to avoid exposing referent to
2543 2550 # potential unflushed data content.
2544 2551 if ifh is not None:
2545 2552 ifh.close()
2546 2553
2547 2554 def _write_docket(self, transaction):
2548 2555 """write the current docket on disk
2549 2556
2550 2557 Exist as a method to help changelog to implement transaction logic
2551 2558
2552 2559 We could also imagine using the same transaction logic for all revlog
2553 2560 since docket are cheap."""
2554 2561 self._docket.write(transaction)
2555 2562
2556 2563 def addrevision(
2557 2564 self,
2558 2565 text,
2559 2566 transaction,
2560 2567 link,
2561 2568 p1,
2562 2569 p2,
2563 2570 cachedelta=None,
2564 2571 node=None,
2565 2572 flags=REVIDX_DEFAULT_FLAGS,
2566 2573 deltacomputer=None,
2567 2574 sidedata=None,
2568 2575 ):
2569 2576 """add a revision to the log
2570 2577
2571 2578 text - the revision data to add
2572 2579 transaction - the transaction object used for rollback
2573 2580 link - the linkrev data to add
2574 2581 p1, p2 - the parent nodeids of the revision
2575 2582 cachedelta - an optional precomputed delta
2576 2583 node - nodeid of revision; typically node is not specified, and it is
2577 2584 computed by default as hash(text, p1, p2), however subclasses might
2578 2585 use different hashing method (and override checkhash() in such case)
2579 2586 flags - the known flags to set on the revision
2580 2587 deltacomputer - an optional deltacomputer instance shared between
2581 2588 multiple calls
2582 2589 """
2583 2590 if link == nullrev:
2584 2591 raise error.RevlogError(
2585 2592 _(b"attempted to add linkrev -1 to %s") % self.display_id
2586 2593 )
2587 2594
2588 2595 if sidedata is None:
2589 2596 sidedata = {}
2590 2597 elif sidedata and not self.hassidedata:
2591 2598 raise error.ProgrammingError(
2592 2599 _(b"trying to add sidedata to a revlog who don't support them")
2593 2600 )
2594 2601
2595 2602 if flags:
2596 2603 node = node or self.hash(text, p1, p2)
2597 2604
2598 2605 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2599 2606
2600 2607 # If the flag processor modifies the revision data, ignore any provided
2601 2608 # cachedelta.
2602 2609 if rawtext != text:
2603 2610 cachedelta = None
2604 2611
2605 2612 if len(rawtext) > _maxentrysize:
2606 2613 raise error.RevlogError(
2607 2614 _(
2608 2615 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2609 2616 )
2610 2617 % (self.display_id, len(rawtext))
2611 2618 )
2612 2619
2613 2620 node = node or self.hash(rawtext, p1, p2)
2614 2621 rev = self.index.get_rev(node)
2615 2622 if rev is not None:
2616 2623 return rev
2617 2624
2618 2625 if validatehash:
2619 2626 self.checkhash(rawtext, node, p1=p1, p2=p2)
2620 2627
2621 2628 return self.addrawrevision(
2622 2629 rawtext,
2623 2630 transaction,
2624 2631 link,
2625 2632 p1,
2626 2633 p2,
2627 2634 node,
2628 2635 flags,
2629 2636 cachedelta=cachedelta,
2630 2637 deltacomputer=deltacomputer,
2631 2638 sidedata=sidedata,
2632 2639 )
2633 2640
2634 2641 def addrawrevision(
2635 2642 self,
2636 2643 rawtext,
2637 2644 transaction,
2638 2645 link,
2639 2646 p1,
2640 2647 p2,
2641 2648 node,
2642 2649 flags,
2643 2650 cachedelta=None,
2644 2651 deltacomputer=None,
2645 2652 sidedata=None,
2646 2653 ):
2647 2654 """add a raw revision with known flags, node and parents
2648 2655 useful when reusing a revision not stored in this revlog (ex: received
2649 2656 over wire, or read from an external bundle).
2650 2657 """
2651 2658 with self._writing(transaction):
2652 2659 return self._addrevision(
2653 2660 node,
2654 2661 rawtext,
2655 2662 transaction,
2656 2663 link,
2657 2664 p1,
2658 2665 p2,
2659 2666 flags,
2660 2667 cachedelta,
2661 2668 deltacomputer=deltacomputer,
2662 2669 sidedata=sidedata,
2663 2670 )
2664 2671
2665 2672 def compress(self, data):
2666 2673 """Generate a possibly-compressed representation of data."""
2667 2674 if not data:
2668 2675 return b'', data
2669 2676
2670 2677 compressed = self._compressor.compress(data)
2671 2678
2672 2679 if compressed:
2673 2680 # The revlog compressor added the header in the returned data.
2674 2681 return b'', compressed
2675 2682
2676 2683 if data[0:1] == b'\0':
2677 2684 return b'', data
2678 2685 return b'u', data
2679 2686
2680 2687 def decompress(self, data):
2681 2688 """Decompress a revlog chunk.
2682 2689
2683 2690 The chunk is expected to begin with a header identifying the
2684 2691 format type so it can be routed to an appropriate decompressor.
2685 2692 """
2686 2693 if not data:
2687 2694 return data
2688 2695
2689 2696 # Revlogs are read much more frequently than they are written and many
2690 2697 # chunks only take microseconds to decompress, so performance is
2691 2698 # important here.
2692 2699 #
2693 2700 # We can make a few assumptions about revlogs:
2694 2701 #
2695 2702 # 1) the majority of chunks will be compressed (as opposed to inline
2696 2703 # raw data).
2697 2704 # 2) decompressing *any* data will likely by at least 10x slower than
2698 2705 # returning raw inline data.
2699 2706 # 3) we want to prioritize common and officially supported compression
2700 2707 # engines
2701 2708 #
2702 2709 # It follows that we want to optimize for "decompress compressed data
2703 2710 # when encoded with common and officially supported compression engines"
2704 2711 # case over "raw data" and "data encoded by less common or non-official
2705 2712 # compression engines." That is why we have the inline lookup first
2706 2713 # followed by the compengines lookup.
2707 2714 #
2708 2715 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2709 2716 # compressed chunks. And this matters for changelog and manifest reads.
2710 2717 t = data[0:1]
2711 2718
2712 2719 if t == b'x':
2713 2720 try:
2714 2721 return _zlibdecompress(data)
2715 2722 except zlib.error as e:
2716 2723 raise error.RevlogError(
2717 2724 _(b'revlog decompress error: %s')
2718 2725 % stringutil.forcebytestr(e)
2719 2726 )
2720 2727 # '\0' is more common than 'u' so it goes first.
2721 2728 elif t == b'\0':
2722 2729 return data
2723 2730 elif t == b'u':
2724 2731 return util.buffer(data, 1)
2725 2732
2726 2733 compressor = self._get_decompressor(t)
2727 2734
2728 2735 return compressor.decompress(data)
2729 2736
2730 2737 def _addrevision(
2731 2738 self,
2732 2739 node,
2733 2740 rawtext,
2734 2741 transaction,
2735 2742 link,
2736 2743 p1,
2737 2744 p2,
2738 2745 flags,
2739 2746 cachedelta,
2740 2747 alwayscache=False,
2741 2748 deltacomputer=None,
2742 2749 sidedata=None,
2743 2750 ):
2744 2751 """internal function to add revisions to the log
2745 2752
2746 2753 see addrevision for argument descriptions.
2747 2754
2748 2755 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2749 2756
2750 2757 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2751 2758 be used.
2752 2759
2753 2760 invariants:
2754 2761 - rawtext is optional (can be None); if not set, cachedelta must be set.
2755 2762 if both are set, they must correspond to each other.
2756 2763 """
2757 2764 if node == self.nullid:
2758 2765 raise error.RevlogError(
2759 2766 _(b"%s: attempt to add null revision") % self.display_id
2760 2767 )
2761 2768 if (
2762 2769 node == self.nodeconstants.wdirid
2763 2770 or node in self.nodeconstants.wdirfilenodeids
2764 2771 ):
2765 2772 raise error.RevlogError(
2766 2773 _(b"%s: attempt to add wdir revision") % self.display_id
2767 2774 )
2768 2775 if self._writinghandles is None:
2769 2776 msg = b'adding revision outside `revlog._writing` context'
2770 2777 raise error.ProgrammingError(msg)
2771 2778
2772 2779 btext = [rawtext]
2773 2780
2774 2781 curr = len(self)
2775 2782 prev = curr - 1
2776 2783
2777 2784 offset = self._get_data_offset(prev)
2778 2785
2779 2786 if self._concurrencychecker:
2780 2787 ifh, dfh, sdfh = self._writinghandles
2781 2788 # XXX no checking for the sidedata file
2782 2789 if self._inline:
2783 2790 # offset is "as if" it were in the .d file, so we need to add on
2784 2791 # the size of the entry metadata.
2785 2792 self._concurrencychecker(
2786 2793 ifh, self._indexfile, offset + curr * self.index.entry_size
2787 2794 )
2788 2795 else:
2789 2796 # Entries in the .i are a consistent size.
2790 2797 self._concurrencychecker(
2791 2798 ifh, self._indexfile, curr * self.index.entry_size
2792 2799 )
2793 2800 self._concurrencychecker(dfh, self._datafile, offset)
2794 2801
2795 2802 p1r, p2r = self.rev(p1), self.rev(p2)
2796 2803
2797 2804 # full versions are inserted when the needed deltas
2798 2805 # become comparable to the uncompressed text
2799 2806 if rawtext is None:
2800 2807 # need rawtext size, before changed by flag processors, which is
2801 2808 # the non-raw size. use revlog explicitly to avoid filelog's extra
2802 2809 # logic that might remove metadata size.
2803 2810 textlen = mdiff.patchedsize(
2804 2811 revlog.size(self, cachedelta[0]), cachedelta[1]
2805 2812 )
2806 2813 else:
2807 2814 textlen = len(rawtext)
2808 2815
2809 2816 if deltacomputer is None:
2810 2817 write_debug = None
2811 2818 if self._debug_delta:
2812 2819 write_debug = transaction._report
2813 2820 deltacomputer = deltautil.deltacomputer(
2814 2821 self, write_debug=write_debug
2815 2822 )
2816 2823
2817 2824 if cachedelta is not None and len(cachedelta) == 2:
2818 2825 # If the cached delta has no information about how it should be
2819 2826 # reused, add the default reuse instruction according to the
2820 2827 # revlog's configuration.
2821 2828 if self._generaldelta and self._lazydeltabase:
2822 2829 delta_base_reuse = DELTA_BASE_REUSE_TRY
2823 2830 else:
2824 2831 delta_base_reuse = DELTA_BASE_REUSE_NO
2825 2832 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2826 2833
2827 2834 revinfo = revlogutils.revisioninfo(
2828 2835 node,
2829 2836 p1,
2830 2837 p2,
2831 2838 btext,
2832 2839 textlen,
2833 2840 cachedelta,
2834 2841 flags,
2835 2842 )
2836 2843
2837 2844 deltainfo = deltacomputer.finddeltainfo(revinfo)
2838 2845
2839 2846 compression_mode = COMP_MODE_INLINE
2840 2847 if self._docket is not None:
2841 2848 default_comp = self._docket.default_compression_header
2842 2849 r = deltautil.delta_compression(default_comp, deltainfo)
2843 2850 compression_mode, deltainfo = r
2844 2851
2845 2852 sidedata_compression_mode = COMP_MODE_INLINE
2846 2853 if sidedata and self.hassidedata:
2847 2854 sidedata_compression_mode = COMP_MODE_PLAIN
2848 2855 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2849 2856 sidedata_offset = self._docket.sidedata_end
2850 2857 h, comp_sidedata = self.compress(serialized_sidedata)
2851 2858 if (
2852 2859 h != b'u'
2853 2860 and comp_sidedata[0:1] != b'\0'
2854 2861 and len(comp_sidedata) < len(serialized_sidedata)
2855 2862 ):
2856 2863 assert not h
2857 2864 if (
2858 2865 comp_sidedata[0:1]
2859 2866 == self._docket.default_compression_header
2860 2867 ):
2861 2868 sidedata_compression_mode = COMP_MODE_DEFAULT
2862 2869 serialized_sidedata = comp_sidedata
2863 2870 else:
2864 2871 sidedata_compression_mode = COMP_MODE_INLINE
2865 2872 serialized_sidedata = comp_sidedata
2866 2873 else:
2867 2874 serialized_sidedata = b""
2868 2875 # Don't store the offset if the sidedata is empty, that way
2869 2876 # we can easily detect empty sidedata and they will be no different
2870 2877 # than ones we manually add.
2871 2878 sidedata_offset = 0
2872 2879
2873 2880 rank = RANK_UNKNOWN
2874 2881 if self._compute_rank:
2875 2882 if (p1r, p2r) == (nullrev, nullrev):
2876 2883 rank = 1
2877 2884 elif p1r != nullrev and p2r == nullrev:
2878 2885 rank = 1 + self.fast_rank(p1r)
2879 2886 elif p1r == nullrev and p2r != nullrev:
2880 2887 rank = 1 + self.fast_rank(p2r)
2881 2888 else: # merge node
2882 2889 if rustdagop is not None and self.index.rust_ext_compat:
2883 2890 rank = rustdagop.rank(self.index, p1r, p2r)
2884 2891 else:
2885 2892 pmin, pmax = sorted((p1r, p2r))
2886 2893 rank = 1 + self.fast_rank(pmax)
2887 2894 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2888 2895
2889 2896 e = revlogutils.entry(
2890 2897 flags=flags,
2891 2898 data_offset=offset,
2892 2899 data_compressed_length=deltainfo.deltalen,
2893 2900 data_uncompressed_length=textlen,
2894 2901 data_compression_mode=compression_mode,
2895 2902 data_delta_base=deltainfo.base,
2896 2903 link_rev=link,
2897 2904 parent_rev_1=p1r,
2898 2905 parent_rev_2=p2r,
2899 2906 node_id=node,
2900 2907 sidedata_offset=sidedata_offset,
2901 2908 sidedata_compressed_length=len(serialized_sidedata),
2902 2909 sidedata_compression_mode=sidedata_compression_mode,
2903 2910 rank=rank,
2904 2911 )
2905 2912
2906 2913 self.index.append(e)
2907 2914 entry = self.index.entry_binary(curr)
2908 2915 if curr == 0 and self._docket is None:
2909 2916 header = self._format_flags | self._format_version
2910 2917 header = self.index.pack_header(header)
2911 2918 entry = header + entry
2912 2919 self._writeentry(
2913 2920 transaction,
2914 2921 entry,
2915 2922 deltainfo.data,
2916 2923 link,
2917 2924 offset,
2918 2925 serialized_sidedata,
2919 2926 sidedata_offset,
2920 2927 )
2921 2928
2922 2929 rawtext = btext[0]
2923 2930
2924 2931 if alwayscache and rawtext is None:
2925 2932 rawtext = deltacomputer.buildtext(revinfo)
2926 2933
2927 2934 if type(rawtext) == bytes: # only accept immutable objects
2928 2935 self._revisioncache = (node, curr, rawtext)
2929 2936 self._chainbasecache[curr] = deltainfo.chainbase
2930 2937 return curr
2931 2938
2932 2939 def _get_data_offset(self, prev):
2933 2940 """Returns the current offset in the (in-transaction) data file.
2934 2941 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2935 2942 file to store that information: since sidedata can be rewritten to the
2936 2943 end of the data file within a transaction, you can have cases where, for
2937 2944 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2938 2945 to `n - 1`'s sidedata being written after `n`'s data.
2939 2946
2940 2947 TODO cache this in a docket file before getting out of experimental."""
2941 2948 if self._docket is None:
2942 2949 return self.end(prev)
2943 2950 else:
2944 2951 return self._docket.data_end
2945 2952
2946 2953 def _writeentry(
2947 2954 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2948 2955 ):
2949 2956 # Files opened in a+ mode have inconsistent behavior on various
2950 2957 # platforms. Windows requires that a file positioning call be made
2951 2958 # when the file handle transitions between reads and writes. See
2952 2959 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2953 2960 # platforms, Python or the platform itself can be buggy. Some versions
2954 2961 # of Solaris have been observed to not append at the end of the file
2955 2962 # if the file was seeked to before the end. See issue4943 for more.
2956 2963 #
2957 2964 # We work around this issue by inserting a seek() before writing.
2958 2965 # Note: This is likely not necessary on Python 3. However, because
2959 2966 # the file handle is reused for reads and may be seeked there, we need
2960 2967 # to be careful before changing this.
2961 2968 if self._writinghandles is None:
2962 2969 msg = b'adding revision outside `revlog._writing` context'
2963 2970 raise error.ProgrammingError(msg)
2964 2971 ifh, dfh, sdfh = self._writinghandles
2965 2972 if self._docket is None:
2966 2973 ifh.seek(0, os.SEEK_END)
2967 2974 else:
2968 2975 ifh.seek(self._docket.index_end, os.SEEK_SET)
2969 2976 if dfh:
2970 2977 if self._docket is None:
2971 2978 dfh.seek(0, os.SEEK_END)
2972 2979 else:
2973 2980 dfh.seek(self._docket.data_end, os.SEEK_SET)
2974 2981 if sdfh:
2975 2982 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2976 2983
2977 2984 curr = len(self) - 1
2978 2985 if not self._inline:
2979 2986 transaction.add(self._datafile, offset)
2980 2987 if self._sidedatafile:
2981 2988 transaction.add(self._sidedatafile, sidedata_offset)
2982 2989 transaction.add(self._indexfile, curr * len(entry))
2983 2990 if data[0]:
2984 2991 dfh.write(data[0])
2985 2992 dfh.write(data[1])
2986 2993 if sidedata:
2987 2994 sdfh.write(sidedata)
2988 2995 ifh.write(entry)
2989 2996 else:
2990 2997 offset += curr * self.index.entry_size
2991 2998 transaction.add(self._indexfile, offset)
2992 2999 ifh.write(entry)
2993 3000 ifh.write(data[0])
2994 3001 ifh.write(data[1])
2995 3002 assert not sidedata
2996 3003 self._enforceinlinesize(transaction)
2997 3004 if self._docket is not None:
2998 3005 # revlog-v2 always has 3 writing handles, help Pytype
2999 3006 wh1 = self._writinghandles[0]
3000 3007 wh2 = self._writinghandles[1]
3001 3008 wh3 = self._writinghandles[2]
3002 3009 assert wh1 is not None
3003 3010 assert wh2 is not None
3004 3011 assert wh3 is not None
3005 3012 self._docket.index_end = wh1.tell()
3006 3013 self._docket.data_end = wh2.tell()
3007 3014 self._docket.sidedata_end = wh3.tell()
3008 3015
3009 3016 nodemaputil.setup_persistent_nodemap(transaction, self)
3010 3017
3011 3018 def addgroup(
3012 3019 self,
3013 3020 deltas,
3014 3021 linkmapper,
3015 3022 transaction,
3016 3023 alwayscache=False,
3017 3024 addrevisioncb=None,
3018 3025 duplicaterevisioncb=None,
3019 3026 debug_info=None,
3020 3027 delta_base_reuse_policy=None,
3021 3028 ):
3022 3029 """
3023 3030 add a delta group
3024 3031
3025 3032 given a set of deltas, add them to the revision log. the
3026 3033 first delta is against its parent, which should be in our
3027 3034 log, the rest are against the previous delta.
3028 3035
3029 3036 If ``addrevisioncb`` is defined, it will be called with arguments of
3030 3037 this revlog and the node that was added.
3031 3038 """
3032 3039
3033 3040 if self._adding_group:
3034 3041 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3035 3042
3036 3043 # read the default delta-base reuse policy from revlog config if the
3037 3044 # group did not specify one.
3038 3045 if delta_base_reuse_policy is None:
3039 3046 if self._generaldelta and self._lazydeltabase:
3040 3047 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3041 3048 else:
3042 3049 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3043 3050
3044 3051 self._adding_group = True
3045 3052 empty = True
3046 3053 try:
3047 3054 with self._writing(transaction):
3048 3055 write_debug = None
3049 3056 if self._debug_delta:
3050 3057 write_debug = transaction._report
3051 3058 deltacomputer = deltautil.deltacomputer(
3052 3059 self,
3053 3060 write_debug=write_debug,
3054 3061 debug_info=debug_info,
3055 3062 )
3056 3063 # loop through our set of deltas
3057 3064 for data in deltas:
3058 3065 (
3059 3066 node,
3060 3067 p1,
3061 3068 p2,
3062 3069 linknode,
3063 3070 deltabase,
3064 3071 delta,
3065 3072 flags,
3066 3073 sidedata,
3067 3074 ) = data
3068 3075 link = linkmapper(linknode)
3069 3076 flags = flags or REVIDX_DEFAULT_FLAGS
3070 3077
3071 3078 rev = self.index.get_rev(node)
3072 3079 if rev is not None:
3073 3080 # this can happen if two branches make the same change
3074 3081 self._nodeduplicatecallback(transaction, rev)
3075 3082 if duplicaterevisioncb:
3076 3083 duplicaterevisioncb(self, rev)
3077 3084 empty = False
3078 3085 continue
3079 3086
3080 3087 for p in (p1, p2):
3081 3088 if not self.index.has_node(p):
3082 3089 raise error.LookupError(
3083 3090 p, self.radix, _(b'unknown parent')
3084 3091 )
3085 3092
3086 3093 if not self.index.has_node(deltabase):
3087 3094 raise error.LookupError(
3088 3095 deltabase, self.display_id, _(b'unknown delta base')
3089 3096 )
3090 3097
3091 3098 baserev = self.rev(deltabase)
3092 3099
3093 3100 if baserev != nullrev and self.iscensored(baserev):
3094 3101 # if base is censored, delta must be full replacement in a
3095 3102 # single patch operation
3096 3103 hlen = struct.calcsize(b">lll")
3097 3104 oldlen = self.rawsize(baserev)
3098 3105 newlen = len(delta) - hlen
3099 3106 if delta[:hlen] != mdiff.replacediffheader(
3100 3107 oldlen, newlen
3101 3108 ):
3102 3109 raise error.CensoredBaseError(
3103 3110 self.display_id, self.node(baserev)
3104 3111 )
3105 3112
3106 3113 if not flags and self._peek_iscensored(baserev, delta):
3107 3114 flags |= REVIDX_ISCENSORED
3108 3115
3109 3116 # We assume consumers of addrevisioncb will want to retrieve
3110 3117 # the added revision, which will require a call to
3111 3118 # revision(). revision() will fast path if there is a cache
3112 3119 # hit. So, we tell _addrevision() to always cache in this case.
3113 3120 # We're only using addgroup() in the context of changegroup
3114 3121 # generation so the revision data can always be handled as raw
3115 3122 # by the flagprocessor.
3116 3123 rev = self._addrevision(
3117 3124 node,
3118 3125 None,
3119 3126 transaction,
3120 3127 link,
3121 3128 p1,
3122 3129 p2,
3123 3130 flags,
3124 3131 (baserev, delta, delta_base_reuse_policy),
3125 3132 alwayscache=alwayscache,
3126 3133 deltacomputer=deltacomputer,
3127 3134 sidedata=sidedata,
3128 3135 )
3129 3136
3130 3137 if addrevisioncb:
3131 3138 addrevisioncb(self, rev)
3132 3139 empty = False
3133 3140 finally:
3134 3141 self._adding_group = False
3135 3142 return not empty
3136 3143
3137 3144 def iscensored(self, rev):
3138 3145 """Check if a file revision is censored."""
3139 3146 if not self._censorable:
3140 3147 return False
3141 3148
3142 3149 return self.flags(rev) & REVIDX_ISCENSORED
3143 3150
3144 3151 def _peek_iscensored(self, baserev, delta):
3145 3152 """Quickly check if a delta produces a censored revision."""
3146 3153 if not self._censorable:
3147 3154 return False
3148 3155
3149 3156 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3150 3157
3151 3158 def getstrippoint(self, minlink):
3152 3159 """find the minimum rev that must be stripped to strip the linkrev
3153 3160
3154 3161 Returns a tuple containing the minimum rev and a set of all revs that
3155 3162 have linkrevs that will be broken by this strip.
3156 3163 """
3157 3164 return storageutil.resolvestripinfo(
3158 3165 minlink,
3159 3166 len(self) - 1,
3160 3167 self.headrevs(),
3161 3168 self.linkrev,
3162 3169 self.parentrevs,
3163 3170 )
3164 3171
3165 3172 def strip(self, minlink, transaction):
3166 3173 """truncate the revlog on the first revision with a linkrev >= minlink
3167 3174
3168 3175 This function is called when we're stripping revision minlink and
3169 3176 its descendants from the repository.
3170 3177
3171 3178 We have to remove all revisions with linkrev >= minlink, because
3172 3179 the equivalent changelog revisions will be renumbered after the
3173 3180 strip.
3174 3181
3175 3182 So we truncate the revlog on the first of these revisions, and
3176 3183 trust that the caller has saved the revisions that shouldn't be
3177 3184 removed and that it'll re-add them after this truncation.
3178 3185 """
3179 3186 if len(self) == 0:
3180 3187 return
3181 3188
3182 3189 rev, _ = self.getstrippoint(minlink)
3183 3190 if rev == len(self):
3184 3191 return
3185 3192
3186 3193 # first truncate the files on disk
3187 3194 data_end = self.start(rev)
3188 3195 if not self._inline:
3189 3196 transaction.add(self._datafile, data_end)
3190 3197 end = rev * self.index.entry_size
3191 3198 else:
3192 3199 end = data_end + (rev * self.index.entry_size)
3193 3200
3194 3201 if self._sidedatafile:
3195 3202 sidedata_end = self.sidedata_cut_off(rev)
3196 3203 transaction.add(self._sidedatafile, sidedata_end)
3197 3204
3198 3205 transaction.add(self._indexfile, end)
3199 3206 if self._docket is not None:
3200 3207 # XXX we could, leverage the docket while stripping. However it is
3201 3208 # not powerfull enough at the time of this comment
3202 3209 self._docket.index_end = end
3203 3210 self._docket.data_end = data_end
3204 3211 self._docket.sidedata_end = sidedata_end
3205 3212 self._docket.write(transaction, stripping=True)
3206 3213
3207 3214 # then reset internal state in memory to forget those revisions
3208 3215 self._revisioncache = None
3209 3216 self._chaininfocache = util.lrucachedict(500)
3210 3217 self._segmentfile.clear_cache()
3211 3218 self._segmentfile_sidedata.clear_cache()
3212 3219
3213 3220 del self.index[rev:-1]
3214 3221
3215 3222 def checksize(self):
3216 3223 """Check size of index and data files
3217 3224
3218 3225 return a (dd, di) tuple.
3219 3226 - dd: extra bytes for the "data" file
3220 3227 - di: extra bytes for the "index" file
3221 3228
3222 3229 A healthy revlog will return (0, 0).
3223 3230 """
3224 3231 expected = 0
3225 3232 if len(self):
3226 3233 expected = max(0, self.end(len(self) - 1))
3227 3234
3228 3235 try:
3229 3236 with self._datafp() as f:
3230 3237 f.seek(0, io.SEEK_END)
3231 3238 actual = f.tell()
3232 3239 dd = actual - expected
3233 3240 except FileNotFoundError:
3234 3241 dd = 0
3235 3242
3236 3243 try:
3237 3244 f = self.opener(self._indexfile)
3238 3245 f.seek(0, io.SEEK_END)
3239 3246 actual = f.tell()
3240 3247 f.close()
3241 3248 s = self.index.entry_size
3242 3249 i = max(0, actual // s)
3243 3250 di = actual - (i * s)
3244 3251 if self._inline:
3245 3252 databytes = 0
3246 3253 for r in self:
3247 3254 databytes += max(0, self.length(r))
3248 3255 dd = 0
3249 3256 di = actual - len(self) * s - databytes
3250 3257 except FileNotFoundError:
3251 3258 di = 0
3252 3259
3253 3260 return (dd, di)
3254 3261
3255 3262 def files(self):
3256 3263 res = [self._indexfile]
3257 3264 if self._docket_file is None:
3258 3265 if not self._inline:
3259 3266 res.append(self._datafile)
3260 3267 else:
3261 3268 res.append(self._docket_file)
3262 3269 res.extend(self._docket.old_index_filepaths(include_empty=False))
3263 3270 if self._docket.data_end:
3264 3271 res.append(self._datafile)
3265 3272 res.extend(self._docket.old_data_filepaths(include_empty=False))
3266 3273 if self._docket.sidedata_end:
3267 3274 res.append(self._sidedatafile)
3268 3275 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3269 3276 return res
3270 3277
3271 3278 def emitrevisions(
3272 3279 self,
3273 3280 nodes,
3274 3281 nodesorder=None,
3275 3282 revisiondata=False,
3276 3283 assumehaveparentrevisions=False,
3277 3284 deltamode=repository.CG_DELTAMODE_STD,
3278 3285 sidedata_helpers=None,
3279 3286 debug_info=None,
3280 3287 ):
3281 3288 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3282 3289 raise error.ProgrammingError(
3283 3290 b'unhandled value for nodesorder: %s' % nodesorder
3284 3291 )
3285 3292
3286 3293 if nodesorder is None and not self._generaldelta:
3287 3294 nodesorder = b'storage'
3288 3295
3289 3296 if (
3290 3297 not self._storedeltachains
3291 3298 and deltamode != repository.CG_DELTAMODE_PREV
3292 3299 ):
3293 3300 deltamode = repository.CG_DELTAMODE_FULL
3294 3301
3295 3302 return storageutil.emitrevisions(
3296 3303 self,
3297 3304 nodes,
3298 3305 nodesorder,
3299 3306 revlogrevisiondelta,
3300 3307 deltaparentfn=self.deltaparent,
3301 3308 candeltafn=self._candelta,
3302 3309 rawsizefn=self.rawsize,
3303 3310 revdifffn=self.revdiff,
3304 3311 flagsfn=self.flags,
3305 3312 deltamode=deltamode,
3306 3313 revisiondata=revisiondata,
3307 3314 assumehaveparentrevisions=assumehaveparentrevisions,
3308 3315 sidedata_helpers=sidedata_helpers,
3309 3316 debug_info=debug_info,
3310 3317 )
3311 3318
3312 3319 DELTAREUSEALWAYS = b'always'
3313 3320 DELTAREUSESAMEREVS = b'samerevs'
3314 3321 DELTAREUSENEVER = b'never'
3315 3322
3316 3323 DELTAREUSEFULLADD = b'fulladd'
3317 3324
3318 3325 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3319 3326
3320 3327 def clone(
3321 3328 self,
3322 3329 tr,
3323 3330 destrevlog,
3324 3331 addrevisioncb=None,
3325 3332 deltareuse=DELTAREUSESAMEREVS,
3326 3333 forcedeltabothparents=None,
3327 3334 sidedata_helpers=None,
3328 3335 ):
3329 3336 """Copy this revlog to another, possibly with format changes.
3330 3337
3331 3338 The destination revlog will contain the same revisions and nodes.
3332 3339 However, it may not be bit-for-bit identical due to e.g. delta encoding
3333 3340 differences.
3334 3341
3335 3342 The ``deltareuse`` argument control how deltas from the existing revlog
3336 3343 are preserved in the destination revlog. The argument can have the
3337 3344 following values:
3338 3345
3339 3346 DELTAREUSEALWAYS
3340 3347 Deltas will always be reused (if possible), even if the destination
3341 3348 revlog would not select the same revisions for the delta. This is the
3342 3349 fastest mode of operation.
3343 3350 DELTAREUSESAMEREVS
3344 3351 Deltas will be reused if the destination revlog would pick the same
3345 3352 revisions for the delta. This mode strikes a balance between speed
3346 3353 and optimization.
3347 3354 DELTAREUSENEVER
3348 3355 Deltas will never be reused. This is the slowest mode of execution.
3349 3356 This mode can be used to recompute deltas (e.g. if the diff/delta
3350 3357 algorithm changes).
3351 3358 DELTAREUSEFULLADD
3352 3359 Revision will be re-added as if their were new content. This is
3353 3360 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3354 3361 eg: large file detection and handling.
3355 3362
3356 3363 Delta computation can be slow, so the choice of delta reuse policy can
3357 3364 significantly affect run time.
3358 3365
3359 3366 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3360 3367 two extremes. Deltas will be reused if they are appropriate. But if the
3361 3368 delta could choose a better revision, it will do so. This means if you
3362 3369 are converting a non-generaldelta revlog to a generaldelta revlog,
3363 3370 deltas will be recomputed if the delta's parent isn't a parent of the
3364 3371 revision.
3365 3372
3366 3373 In addition to the delta policy, the ``forcedeltabothparents``
3367 3374 argument controls whether to force compute deltas against both parents
3368 3375 for merges. By default, the current default is used.
3369 3376
3370 3377 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3371 3378 `sidedata_helpers`.
3372 3379 """
3373 3380 if deltareuse not in self.DELTAREUSEALL:
3374 3381 raise ValueError(
3375 3382 _(b'value for deltareuse invalid: %s') % deltareuse
3376 3383 )
3377 3384
3378 3385 if len(destrevlog):
3379 3386 raise ValueError(_(b'destination revlog is not empty'))
3380 3387
3381 3388 if getattr(self, 'filteredrevs', None):
3382 3389 raise ValueError(_(b'source revlog has filtered revisions'))
3383 3390 if getattr(destrevlog, 'filteredrevs', None):
3384 3391 raise ValueError(_(b'destination revlog has filtered revisions'))
3385 3392
3386 3393 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3387 3394 # if possible.
3388 3395 old_delta_config = destrevlog.delta_config
3389 3396 destrevlog.delta_config = destrevlog.delta_config.copy()
3390 3397
3391 3398 try:
3392 3399 if deltareuse == self.DELTAREUSEALWAYS:
3393 3400 destrevlog.delta_config.lazy_delta_base = True
3394 3401 destrevlog.delta_config.lazy_delta = True
3395 3402 elif deltareuse == self.DELTAREUSESAMEREVS:
3396 3403 destrevlog.delta_config.lazy_delta_base = False
3397 3404 destrevlog.delta_config.lazy_delta = True
3398 3405 elif deltareuse == self.DELTAREUSENEVER:
3399 3406 destrevlog.delta_config.lazy_delta_base = False
3400 3407 destrevlog.delta_config.lazy_delta = False
3401 3408
3402 3409 delta_both_parents = (
3403 3410 forcedeltabothparents or old_delta_config.delta_both_parents
3404 3411 )
3405 3412 destrevlog.delta_config.delta_both_parents = delta_both_parents
3406 3413
3407 3414 with self.reading():
3408 3415 self._clone(
3409 3416 tr,
3410 3417 destrevlog,
3411 3418 addrevisioncb,
3412 3419 deltareuse,
3413 3420 forcedeltabothparents,
3414 3421 sidedata_helpers,
3415 3422 )
3416 3423
3417 3424 finally:
3418 3425 destrevlog.delta_config = old_delta_config
3419 3426
3420 3427 def _clone(
3421 3428 self,
3422 3429 tr,
3423 3430 destrevlog,
3424 3431 addrevisioncb,
3425 3432 deltareuse,
3426 3433 forcedeltabothparents,
3427 3434 sidedata_helpers,
3428 3435 ):
3429 3436 """perform the core duty of `revlog.clone` after parameter processing"""
3430 3437 write_debug = None
3431 3438 if self._debug_delta:
3432 3439 write_debug = tr._report
3433 3440 deltacomputer = deltautil.deltacomputer(
3434 3441 destrevlog,
3435 3442 write_debug=write_debug,
3436 3443 )
3437 3444 index = self.index
3438 3445 for rev in self:
3439 3446 entry = index[rev]
3440 3447
3441 3448 # Some classes override linkrev to take filtered revs into
3442 3449 # account. Use raw entry from index.
3443 3450 flags = entry[0] & 0xFFFF
3444 3451 linkrev = entry[4]
3445 3452 p1 = index[entry[5]][7]
3446 3453 p2 = index[entry[6]][7]
3447 3454 node = entry[7]
3448 3455
3449 3456 # (Possibly) reuse the delta from the revlog if allowed and
3450 3457 # the revlog chunk is a delta.
3451 3458 cachedelta = None
3452 3459 rawtext = None
3453 3460 if deltareuse == self.DELTAREUSEFULLADD:
3454 3461 text = self._revisiondata(rev)
3455 3462 sidedata = self.sidedata(rev)
3456 3463
3457 3464 if sidedata_helpers is not None:
3458 3465 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3459 3466 self, sidedata_helpers, sidedata, rev
3460 3467 )
3461 3468 flags = flags | new_flags[0] & ~new_flags[1]
3462 3469
3463 3470 destrevlog.addrevision(
3464 3471 text,
3465 3472 tr,
3466 3473 linkrev,
3467 3474 p1,
3468 3475 p2,
3469 3476 cachedelta=cachedelta,
3470 3477 node=node,
3471 3478 flags=flags,
3472 3479 deltacomputer=deltacomputer,
3473 3480 sidedata=sidedata,
3474 3481 )
3475 3482 else:
3476 3483 if destrevlog._lazydelta:
3477 3484 dp = self.deltaparent(rev)
3478 3485 if dp != nullrev:
3479 3486 cachedelta = (dp, bytes(self._chunk(rev)))
3480 3487
3481 3488 sidedata = None
3482 3489 if not cachedelta:
3483 3490 rawtext = self._revisiondata(rev)
3484 3491 sidedata = self.sidedata(rev)
3485 3492 if sidedata is None:
3486 3493 sidedata = self.sidedata(rev)
3487 3494
3488 3495 if sidedata_helpers is not None:
3489 3496 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3490 3497 self, sidedata_helpers, sidedata, rev
3491 3498 )
3492 3499 flags = flags | new_flags[0] & ~new_flags[1]
3493 3500
3494 3501 with destrevlog._writing(tr):
3495 3502 destrevlog._addrevision(
3496 3503 node,
3497 3504 rawtext,
3498 3505 tr,
3499 3506 linkrev,
3500 3507 p1,
3501 3508 p2,
3502 3509 flags,
3503 3510 cachedelta,
3504 3511 deltacomputer=deltacomputer,
3505 3512 sidedata=sidedata,
3506 3513 )
3507 3514
3508 3515 if addrevisioncb:
3509 3516 addrevisioncb(self, rev, node)
3510 3517
3511 3518 def censorrevision(self, tr, censornode, tombstone=b''):
3512 3519 if self._format_version == REVLOGV0:
3513 3520 raise error.RevlogError(
3514 3521 _(b'cannot censor with version %d revlogs')
3515 3522 % self._format_version
3516 3523 )
3517 3524 elif self._format_version == REVLOGV1:
3518 3525 rewrite.v1_censor(self, tr, censornode, tombstone)
3519 3526 else:
3520 3527 rewrite.v2_censor(self, tr, censornode, tombstone)
3521 3528
3522 3529 def verifyintegrity(self, state):
3523 3530 """Verifies the integrity of the revlog.
3524 3531
3525 3532 Yields ``revlogproblem`` instances describing problems that are
3526 3533 found.
3527 3534 """
3528 3535 dd, di = self.checksize()
3529 3536 if dd:
3530 3537 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3531 3538 if di:
3532 3539 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3533 3540
3534 3541 version = self._format_version
3535 3542
3536 3543 # The verifier tells us what version revlog we should be.
3537 3544 if version != state[b'expectedversion']:
3538 3545 yield revlogproblem(
3539 3546 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3540 3547 % (self.display_id, version, state[b'expectedversion'])
3541 3548 )
3542 3549
3543 3550 state[b'skipread'] = set()
3544 3551 state[b'safe_renamed'] = set()
3545 3552
3546 3553 for rev in self:
3547 3554 node = self.node(rev)
3548 3555
3549 3556 # Verify contents. 4 cases to care about:
3550 3557 #
3551 3558 # common: the most common case
3552 3559 # rename: with a rename
3553 3560 # meta: file content starts with b'\1\n', the metadata
3554 3561 # header defined in filelog.py, but without a rename
3555 3562 # ext: content stored externally
3556 3563 #
3557 3564 # More formally, their differences are shown below:
3558 3565 #
3559 3566 # | common | rename | meta | ext
3560 3567 # -------------------------------------------------------
3561 3568 # flags() | 0 | 0 | 0 | not 0
3562 3569 # renamed() | False | True | False | ?
3563 3570 # rawtext[0:2]=='\1\n'| False | True | True | ?
3564 3571 #
3565 3572 # "rawtext" means the raw text stored in revlog data, which
3566 3573 # could be retrieved by "rawdata(rev)". "text"
3567 3574 # mentioned below is "revision(rev)".
3568 3575 #
3569 3576 # There are 3 different lengths stored physically:
3570 3577 # 1. L1: rawsize, stored in revlog index
3571 3578 # 2. L2: len(rawtext), stored in revlog data
3572 3579 # 3. L3: len(text), stored in revlog data if flags==0, or
3573 3580 # possibly somewhere else if flags!=0
3574 3581 #
3575 3582 # L1 should be equal to L2. L3 could be different from them.
3576 3583 # "text" may or may not affect commit hash depending on flag
3577 3584 # processors (see flagutil.addflagprocessor).
3578 3585 #
3579 3586 # | common | rename | meta | ext
3580 3587 # -------------------------------------------------
3581 3588 # rawsize() | L1 | L1 | L1 | L1
3582 3589 # size() | L1 | L2-LM | L1(*) | L1 (?)
3583 3590 # len(rawtext) | L2 | L2 | L2 | L2
3584 3591 # len(text) | L2 | L2 | L2 | L3
3585 3592 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3586 3593 #
3587 3594 # LM: length of metadata, depending on rawtext
3588 3595 # (*): not ideal, see comment in filelog.size
3589 3596 # (?): could be "- len(meta)" if the resolved content has
3590 3597 # rename metadata
3591 3598 #
3592 3599 # Checks needed to be done:
3593 3600 # 1. length check: L1 == L2, in all cases.
3594 3601 # 2. hash check: depending on flag processor, we may need to
3595 3602 # use either "text" (external), or "rawtext" (in revlog).
3596 3603
3597 3604 try:
3598 3605 skipflags = state.get(b'skipflags', 0)
3599 3606 if skipflags:
3600 3607 skipflags &= self.flags(rev)
3601 3608
3602 3609 _verify_revision(self, skipflags, state, node)
3603 3610
3604 3611 l1 = self.rawsize(rev)
3605 3612 l2 = len(self.rawdata(node))
3606 3613
3607 3614 if l1 != l2:
3608 3615 yield revlogproblem(
3609 3616 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3610 3617 node=node,
3611 3618 )
3612 3619
3613 3620 except error.CensoredNodeError:
3614 3621 if state[b'erroroncensored']:
3615 3622 yield revlogproblem(
3616 3623 error=_(b'censored file data'), node=node
3617 3624 )
3618 3625 state[b'skipread'].add(node)
3619 3626 except Exception as e:
3620 3627 yield revlogproblem(
3621 3628 error=_(b'unpacking %s: %s')
3622 3629 % (short(node), stringutil.forcebytestr(e)),
3623 3630 node=node,
3624 3631 )
3625 3632 state[b'skipread'].add(node)
3626 3633
3627 3634 def storageinfo(
3628 3635 self,
3629 3636 exclusivefiles=False,
3630 3637 sharedfiles=False,
3631 3638 revisionscount=False,
3632 3639 trackedsize=False,
3633 3640 storedsize=False,
3634 3641 ):
3635 3642 d = {}
3636 3643
3637 3644 if exclusivefiles:
3638 3645 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3639 3646 if not self._inline:
3640 3647 d[b'exclusivefiles'].append((self.opener, self._datafile))
3641 3648
3642 3649 if sharedfiles:
3643 3650 d[b'sharedfiles'] = []
3644 3651
3645 3652 if revisionscount:
3646 3653 d[b'revisionscount'] = len(self)
3647 3654
3648 3655 if trackedsize:
3649 3656 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3650 3657
3651 3658 if storedsize:
3652 3659 d[b'storedsize'] = sum(
3653 3660 self.opener.stat(path).st_size for path in self.files()
3654 3661 )
3655 3662
3656 3663 return d
3657 3664
3658 3665 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3659 3666 if not self.hassidedata:
3660 3667 return
3661 3668 # revlog formats with sidedata support does not support inline
3662 3669 assert not self._inline
3663 3670 if not helpers[1] and not helpers[2]:
3664 3671 # Nothing to generate or remove
3665 3672 return
3666 3673
3667 3674 new_entries = []
3668 3675 # append the new sidedata
3669 3676 with self._writing(transaction):
3670 3677 ifh, dfh, sdfh = self._writinghandles
3671 3678 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3672 3679
3673 3680 current_offset = sdfh.tell()
3674 3681 for rev in range(startrev, endrev + 1):
3675 3682 entry = self.index[rev]
3676 3683 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3677 3684 store=self,
3678 3685 sidedata_helpers=helpers,
3679 3686 sidedata={},
3680 3687 rev=rev,
3681 3688 )
3682 3689
3683 3690 serialized_sidedata = sidedatautil.serialize_sidedata(
3684 3691 new_sidedata
3685 3692 )
3686 3693
3687 3694 sidedata_compression_mode = COMP_MODE_INLINE
3688 3695 if serialized_sidedata and self.hassidedata:
3689 3696 sidedata_compression_mode = COMP_MODE_PLAIN
3690 3697 h, comp_sidedata = self.compress(serialized_sidedata)
3691 3698 if (
3692 3699 h != b'u'
3693 3700 and comp_sidedata[0] != b'\0'
3694 3701 and len(comp_sidedata) < len(serialized_sidedata)
3695 3702 ):
3696 3703 assert not h
3697 3704 if (
3698 3705 comp_sidedata[0]
3699 3706 == self._docket.default_compression_header
3700 3707 ):
3701 3708 sidedata_compression_mode = COMP_MODE_DEFAULT
3702 3709 serialized_sidedata = comp_sidedata
3703 3710 else:
3704 3711 sidedata_compression_mode = COMP_MODE_INLINE
3705 3712 serialized_sidedata = comp_sidedata
3706 3713 if entry[8] != 0 or entry[9] != 0:
3707 3714 # rewriting entries that already have sidedata is not
3708 3715 # supported yet, because it introduces garbage data in the
3709 3716 # revlog.
3710 3717 msg = b"rewriting existing sidedata is not supported yet"
3711 3718 raise error.Abort(msg)
3712 3719
3713 3720 # Apply (potential) flags to add and to remove after running
3714 3721 # the sidedata helpers
3715 3722 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3716 3723 entry_update = (
3717 3724 current_offset,
3718 3725 len(serialized_sidedata),
3719 3726 new_offset_flags,
3720 3727 sidedata_compression_mode,
3721 3728 )
3722 3729
3723 3730 # the sidedata computation might have move the file cursors around
3724 3731 sdfh.seek(current_offset, os.SEEK_SET)
3725 3732 sdfh.write(serialized_sidedata)
3726 3733 new_entries.append(entry_update)
3727 3734 current_offset += len(serialized_sidedata)
3728 3735 self._docket.sidedata_end = sdfh.tell()
3729 3736
3730 3737 # rewrite the new index entries
3731 3738 ifh.seek(startrev * self.index.entry_size)
3732 3739 for i, e in enumerate(new_entries):
3733 3740 rev = startrev + i
3734 3741 self.index.replace_sidedata_info(rev, *e)
3735 3742 packed = self.index.entry_binary(rev)
3736 3743 if rev == 0 and self._docket is None:
3737 3744 header = self._format_flags | self._format_version
3738 3745 header = self.index.pack_header(header)
3739 3746 packed = header + packed
3740 3747 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now