##// END OF EJS Templates
revlog: skip opener option to pass chunk_cache_size value...
marmoute -
r51925:774c0034 default
parent child Browse files
Show More
@@ -1,4047 +1,4047 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 # coding: utf-8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 import functools
11 11 import os
12 12 import random
13 13 import re
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from concurrent import futures
19 19 from typing import (
20 20 Optional,
21 21 )
22 22
23 23 from .i18n import _
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullrev,
28 28 sha1nodeconstants,
29 29 short,
30 30 )
31 31 from . import (
32 32 bookmarks,
33 33 branchmap,
34 34 bundle2,
35 35 bundlecaches,
36 36 changegroup,
37 37 color,
38 38 commit,
39 39 context,
40 40 dirstate,
41 41 discovery,
42 42 encoding,
43 43 error,
44 44 exchange,
45 45 extensions,
46 46 filelog,
47 47 hook,
48 48 lock as lockmod,
49 49 match as matchmod,
50 50 mergestate as mergestatemod,
51 51 mergeutil,
52 52 namespaces,
53 53 narrowspec,
54 54 obsolete,
55 55 pathutil,
56 56 phases,
57 57 policy,
58 58 pushkey,
59 59 pycompat,
60 60 rcutil,
61 61 repoview,
62 62 requirements as requirementsmod,
63 63 revlog,
64 64 revset,
65 65 revsetlang,
66 66 scmutil,
67 67 sparse,
68 68 store as storemod,
69 69 subrepoutil,
70 70 tags as tagsmod,
71 71 transaction,
72 72 txnutil,
73 73 util,
74 74 vfs as vfsmod,
75 75 wireprototypes,
76 76 )
77 77
78 78 from .interfaces import (
79 79 repository,
80 80 util as interfaceutil,
81 81 )
82 82
83 83 from .utils import (
84 84 hashutil,
85 85 procutil,
86 86 stringutil,
87 87 urlutil,
88 88 )
89 89
90 90 from .revlogutils import (
91 91 concurrency_checker as revlogchecker,
92 92 constants as revlogconst,
93 93 sidedata as sidedatamod,
94 94 )
95 95
96 96 release = lockmod.release
97 97 urlerr = util.urlerr
98 98 urlreq = util.urlreq
99 99
100 100 RE_SKIP_DIRSTATE_ROLLBACK = re.compile(
101 101 b"^((dirstate|narrowspec.dirstate).*|branch$)"
102 102 )
103 103
104 104 # set of (path, vfs-location) tuples. vfs-location is:
105 105 # - 'plain for vfs relative paths
106 106 # - '' for svfs relative paths
107 107 _cachedfiles = set()
108 108
109 109
110 110 class _basefilecache(scmutil.filecache):
111 111 """All filecache usage on repo are done for logic that should be unfiltered"""
112 112
113 113 def __get__(self, repo, type=None):
114 114 if repo is None:
115 115 return self
116 116 # proxy to unfiltered __dict__ since filtered repo has no entry
117 117 unfi = repo.unfiltered()
118 118 try:
119 119 return unfi.__dict__[self.sname]
120 120 except KeyError:
121 121 pass
122 122 return super(_basefilecache, self).__get__(unfi, type)
123 123
124 124 def set(self, repo, value):
125 125 return super(_basefilecache, self).set(repo.unfiltered(), value)
126 126
127 127
128 128 class repofilecache(_basefilecache):
129 129 """filecache for files in .hg but outside of .hg/store"""
130 130
131 131 def __init__(self, *paths):
132 132 super(repofilecache, self).__init__(*paths)
133 133 for path in paths:
134 134 _cachedfiles.add((path, b'plain'))
135 135
136 136 def join(self, obj, fname):
137 137 return obj.vfs.join(fname)
138 138
139 139
140 140 class storecache(_basefilecache):
141 141 """filecache for files in the store"""
142 142
143 143 def __init__(self, *paths):
144 144 super(storecache, self).__init__(*paths)
145 145 for path in paths:
146 146 _cachedfiles.add((path, b''))
147 147
148 148 def join(self, obj, fname):
149 149 return obj.sjoin(fname)
150 150
151 151
152 152 class changelogcache(storecache):
153 153 """filecache for the changelog"""
154 154
155 155 def __init__(self):
156 156 super(changelogcache, self).__init__()
157 157 _cachedfiles.add((b'00changelog.i', b''))
158 158 _cachedfiles.add((b'00changelog.n', b''))
159 159
160 160 def tracked_paths(self, obj):
161 161 paths = [self.join(obj, b'00changelog.i')]
162 162 if obj.store.opener.options.get(b'persistent-nodemap', False):
163 163 paths.append(self.join(obj, b'00changelog.n'))
164 164 return paths
165 165
166 166
167 167 class manifestlogcache(storecache):
168 168 """filecache for the manifestlog"""
169 169
170 170 def __init__(self):
171 171 super(manifestlogcache, self).__init__()
172 172 _cachedfiles.add((b'00manifest.i', b''))
173 173 _cachedfiles.add((b'00manifest.n', b''))
174 174
175 175 def tracked_paths(self, obj):
176 176 paths = [self.join(obj, b'00manifest.i')]
177 177 if obj.store.opener.options.get(b'persistent-nodemap', False):
178 178 paths.append(self.join(obj, b'00manifest.n'))
179 179 return paths
180 180
181 181
182 182 class mixedrepostorecache(_basefilecache):
183 183 """filecache for a mix files in .hg/store and outside"""
184 184
185 185 def __init__(self, *pathsandlocations):
186 186 # scmutil.filecache only uses the path for passing back into our
187 187 # join(), so we can safely pass a list of paths and locations
188 188 super(mixedrepostorecache, self).__init__(*pathsandlocations)
189 189 _cachedfiles.update(pathsandlocations)
190 190
191 191 def join(self, obj, fnameandlocation):
192 192 fname, location = fnameandlocation
193 193 if location == b'plain':
194 194 return obj.vfs.join(fname)
195 195 else:
196 196 if location != b'':
197 197 raise error.ProgrammingError(
198 198 b'unexpected location: %s' % location
199 199 )
200 200 return obj.sjoin(fname)
201 201
202 202
203 203 def isfilecached(repo, name):
204 204 """check if a repo has already cached "name" filecache-ed property
205 205
206 206 This returns (cachedobj-or-None, iscached) tuple.
207 207 """
208 208 cacheentry = repo.unfiltered()._filecache.get(name, None)
209 209 if not cacheentry:
210 210 return None, False
211 211 return cacheentry.obj, True
212 212
213 213
214 214 class unfilteredpropertycache(util.propertycache):
215 215 """propertycache that apply to unfiltered repo only"""
216 216
217 217 def __get__(self, repo, type=None):
218 218 unfi = repo.unfiltered()
219 219 if unfi is repo:
220 220 return super(unfilteredpropertycache, self).__get__(unfi)
221 221 return getattr(unfi, self.name)
222 222
223 223
224 224 class filteredpropertycache(util.propertycache):
225 225 """propertycache that must take filtering in account"""
226 226
227 227 def cachevalue(self, obj, value):
228 228 object.__setattr__(obj, self.name, value)
229 229
230 230
231 231 def hasunfilteredcache(repo, name):
232 232 """check if a repo has an unfilteredpropertycache value for <name>"""
233 233 return name in vars(repo.unfiltered())
234 234
235 235
236 236 def unfilteredmethod(orig):
237 237 """decorate method that always need to be run on unfiltered version"""
238 238
239 239 @functools.wraps(orig)
240 240 def wrapper(repo, *args, **kwargs):
241 241 return orig(repo.unfiltered(), *args, **kwargs)
242 242
243 243 return wrapper
244 244
245 245
246 246 moderncaps = {
247 247 b'lookup',
248 248 b'branchmap',
249 249 b'pushkey',
250 250 b'known',
251 251 b'getbundle',
252 252 b'unbundle',
253 253 }
254 254 legacycaps = moderncaps.union({b'changegroupsubset'})
255 255
256 256
257 257 @interfaceutil.implementer(repository.ipeercommandexecutor)
258 258 class localcommandexecutor:
259 259 def __init__(self, peer):
260 260 self._peer = peer
261 261 self._sent = False
262 262 self._closed = False
263 263
264 264 def __enter__(self):
265 265 return self
266 266
267 267 def __exit__(self, exctype, excvalue, exctb):
268 268 self.close()
269 269
270 270 def callcommand(self, command, args):
271 271 if self._sent:
272 272 raise error.ProgrammingError(
273 273 b'callcommand() cannot be used after sendcommands()'
274 274 )
275 275
276 276 if self._closed:
277 277 raise error.ProgrammingError(
278 278 b'callcommand() cannot be used after close()'
279 279 )
280 280
281 281 # We don't need to support anything fancy. Just call the named
282 282 # method on the peer and return a resolved future.
283 283 fn = getattr(self._peer, pycompat.sysstr(command))
284 284
285 285 f = futures.Future()
286 286
287 287 try:
288 288 result = fn(**pycompat.strkwargs(args))
289 289 except Exception:
290 290 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
291 291 else:
292 292 f.set_result(result)
293 293
294 294 return f
295 295
296 296 def sendcommands(self):
297 297 self._sent = True
298 298
299 299 def close(self):
300 300 self._closed = True
301 301
302 302
303 303 @interfaceutil.implementer(repository.ipeercommands)
304 304 class localpeer(repository.peer):
305 305 '''peer for a local repo; reflects only the most recent API'''
306 306
307 307 def __init__(self, repo, caps=None, path=None, remotehidden=False):
308 308 super(localpeer, self).__init__(
309 309 repo.ui, path=path, remotehidden=remotehidden
310 310 )
311 311
312 312 if caps is None:
313 313 caps = moderncaps.copy()
314 314 if remotehidden:
315 315 self._repo = repo.filtered(b'served.hidden')
316 316 else:
317 317 self._repo = repo.filtered(b'served')
318 318 if repo._wanted_sidedata:
319 319 formatted = bundle2.format_remote_wanted_sidedata(repo)
320 320 caps.add(b'exp-wanted-sidedata=' + formatted)
321 321
322 322 self._caps = repo._restrictcapabilities(caps)
323 323
324 324 # Begin of _basepeer interface.
325 325
326 326 def url(self):
327 327 return self._repo.url()
328 328
329 329 def local(self):
330 330 return self._repo
331 331
332 332 def canpush(self):
333 333 return True
334 334
335 335 def close(self):
336 336 self._repo.close()
337 337
338 338 # End of _basepeer interface.
339 339
340 340 # Begin of _basewirecommands interface.
341 341
342 342 def branchmap(self):
343 343 return self._repo.branchmap()
344 344
345 345 def capabilities(self):
346 346 return self._caps
347 347
348 348 def get_cached_bundle_inline(self, path):
349 349 # not needed with local peer
350 350 raise NotImplementedError
351 351
352 352 def clonebundles(self):
353 353 return bundlecaches.get_manifest(self._repo)
354 354
355 355 def debugwireargs(self, one, two, three=None, four=None, five=None):
356 356 """Used to test argument passing over the wire"""
357 357 return b"%s %s %s %s %s" % (
358 358 one,
359 359 two,
360 360 pycompat.bytestr(three),
361 361 pycompat.bytestr(four),
362 362 pycompat.bytestr(five),
363 363 )
364 364
365 365 def getbundle(
366 366 self,
367 367 source,
368 368 heads=None,
369 369 common=None,
370 370 bundlecaps=None,
371 371 remote_sidedata=None,
372 372 **kwargs
373 373 ):
374 374 chunks = exchange.getbundlechunks(
375 375 self._repo,
376 376 source,
377 377 heads=heads,
378 378 common=common,
379 379 bundlecaps=bundlecaps,
380 380 remote_sidedata=remote_sidedata,
381 381 **kwargs
382 382 )[1]
383 383 cb = util.chunkbuffer(chunks)
384 384
385 385 if exchange.bundle2requested(bundlecaps):
386 386 # When requesting a bundle2, getbundle returns a stream to make the
387 387 # wire level function happier. We need to build a proper object
388 388 # from it in local peer.
389 389 return bundle2.getunbundler(self.ui, cb)
390 390 else:
391 391 return changegroup.getunbundler(b'01', cb, None)
392 392
393 393 def heads(self):
394 394 return self._repo.heads()
395 395
396 396 def known(self, nodes):
397 397 return self._repo.known(nodes)
398 398
399 399 def listkeys(self, namespace):
400 400 return self._repo.listkeys(namespace)
401 401
402 402 def lookup(self, key):
403 403 return self._repo.lookup(key)
404 404
405 405 def pushkey(self, namespace, key, old, new):
406 406 return self._repo.pushkey(namespace, key, old, new)
407 407
408 408 def stream_out(self):
409 409 raise error.Abort(_(b'cannot perform stream clone against local peer'))
410 410
411 411 def unbundle(self, bundle, heads, url):
412 412 """apply a bundle on a repo
413 413
414 414 This function handles the repo locking itself."""
415 415 try:
416 416 try:
417 417 bundle = exchange.readbundle(self.ui, bundle, None)
418 418 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
419 419 if hasattr(ret, 'getchunks'):
420 420 # This is a bundle20 object, turn it into an unbundler.
421 421 # This little dance should be dropped eventually when the
422 422 # API is finally improved.
423 423 stream = util.chunkbuffer(ret.getchunks())
424 424 ret = bundle2.getunbundler(self.ui, stream)
425 425 return ret
426 426 except Exception as exc:
427 427 # If the exception contains output salvaged from a bundle2
428 428 # reply, we need to make sure it is printed before continuing
429 429 # to fail. So we build a bundle2 with such output and consume
430 430 # it directly.
431 431 #
432 432 # This is not very elegant but allows a "simple" solution for
433 433 # issue4594
434 434 output = getattr(exc, '_bundle2salvagedoutput', ())
435 435 if output:
436 436 bundler = bundle2.bundle20(self._repo.ui)
437 437 for out in output:
438 438 bundler.addpart(out)
439 439 stream = util.chunkbuffer(bundler.getchunks())
440 440 b = bundle2.getunbundler(self.ui, stream)
441 441 bundle2.processbundle(self._repo, b)
442 442 raise
443 443 except error.PushRaced as exc:
444 444 raise error.ResponseError(
445 445 _(b'push failed:'), stringutil.forcebytestr(exc)
446 446 )
447 447
448 448 # End of _basewirecommands interface.
449 449
450 450 # Begin of peer interface.
451 451
452 452 def commandexecutor(self):
453 453 return localcommandexecutor(self)
454 454
455 455 # End of peer interface.
456 456
457 457
458 458 @interfaceutil.implementer(repository.ipeerlegacycommands)
459 459 class locallegacypeer(localpeer):
460 460 """peer extension which implements legacy methods too; used for tests with
461 461 restricted capabilities"""
462 462
463 463 def __init__(self, repo, path=None, remotehidden=False):
464 464 super(locallegacypeer, self).__init__(
465 465 repo, caps=legacycaps, path=path, remotehidden=remotehidden
466 466 )
467 467
468 468 # Begin of baselegacywirecommands interface.
469 469
470 470 def between(self, pairs):
471 471 return self._repo.between(pairs)
472 472
473 473 def branches(self, nodes):
474 474 return self._repo.branches(nodes)
475 475
476 476 def changegroup(self, nodes, source):
477 477 outgoing = discovery.outgoing(
478 478 self._repo, missingroots=nodes, ancestorsof=self._repo.heads()
479 479 )
480 480 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
481 481
482 482 def changegroupsubset(self, bases, heads, source):
483 483 outgoing = discovery.outgoing(
484 484 self._repo, missingroots=bases, ancestorsof=heads
485 485 )
486 486 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
487 487
488 488 # End of baselegacywirecommands interface.
489 489
490 490
491 491 # Functions receiving (ui, features) that extensions can register to impact
492 492 # the ability to load repositories with custom requirements. Only
493 493 # functions defined in loaded extensions are called.
494 494 #
495 495 # The function receives a set of requirement strings that the repository
496 496 # is capable of opening. Functions will typically add elements to the
497 497 # set to reflect that the extension knows how to handle that requirements.
498 498 featuresetupfuncs = set()
499 499
500 500
501 501 def _getsharedvfs(hgvfs, requirements):
502 502 """returns the vfs object pointing to root of shared source
503 503 repo for a shared repository
504 504
505 505 hgvfs is vfs pointing at .hg/ of current repo (shared one)
506 506 requirements is a set of requirements of current repo (shared one)
507 507 """
508 508 # The ``shared`` or ``relshared`` requirements indicate the
509 509 # store lives in the path contained in the ``.hg/sharedpath`` file.
510 510 # This is an absolute path for ``shared`` and relative to
511 511 # ``.hg/`` for ``relshared``.
512 512 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
513 513 if requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements:
514 514 sharedpath = util.normpath(hgvfs.join(sharedpath))
515 515
516 516 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
517 517
518 518 if not sharedvfs.exists():
519 519 raise error.RepoError(
520 520 _(b'.hg/sharedpath points to nonexistent directory %s')
521 521 % sharedvfs.base
522 522 )
523 523 return sharedvfs
524 524
525 525
526 526 def _readrequires(vfs, allowmissing):
527 527 """reads the require file present at root of this vfs
528 528 and return a set of requirements
529 529
530 530 If allowmissing is True, we suppress FileNotFoundError if raised"""
531 531 # requires file contains a newline-delimited list of
532 532 # features/capabilities the opener (us) must have in order to use
533 533 # the repository. This file was introduced in Mercurial 0.9.2,
534 534 # which means very old repositories may not have one. We assume
535 535 # a missing file translates to no requirements.
536 536 read = vfs.tryread if allowmissing else vfs.read
537 537 return set(read(b'requires').splitlines())
538 538
539 539
540 540 def makelocalrepository(baseui, path: bytes, intents=None):
541 541 """Create a local repository object.
542 542
543 543 Given arguments needed to construct a local repository, this function
544 544 performs various early repository loading functionality (such as
545 545 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
546 546 the repository can be opened, derives a type suitable for representing
547 547 that repository, and returns an instance of it.
548 548
549 549 The returned object conforms to the ``repository.completelocalrepository``
550 550 interface.
551 551
552 552 The repository type is derived by calling a series of factory functions
553 553 for each aspect/interface of the final repository. These are defined by
554 554 ``REPO_INTERFACES``.
555 555
556 556 Each factory function is called to produce a type implementing a specific
557 557 interface. The cumulative list of returned types will be combined into a
558 558 new type and that type will be instantiated to represent the local
559 559 repository.
560 560
561 561 The factory functions each receive various state that may be consulted
562 562 as part of deriving a type.
563 563
564 564 Extensions should wrap these factory functions to customize repository type
565 565 creation. Note that an extension's wrapped function may be called even if
566 566 that extension is not loaded for the repo being constructed. Extensions
567 567 should check if their ``__name__`` appears in the
568 568 ``extensionmodulenames`` set passed to the factory function and no-op if
569 569 not.
570 570 """
571 571 ui = baseui.copy()
572 572 # Prevent copying repo configuration.
573 573 ui.copy = baseui.copy
574 574
575 575 # Working directory VFS rooted at repository root.
576 576 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
577 577
578 578 # Main VFS for .hg/ directory.
579 579 hgpath = wdirvfs.join(b'.hg')
580 580 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
581 581 # Whether this repository is shared one or not
582 582 shared = False
583 583 # If this repository is shared, vfs pointing to shared repo
584 584 sharedvfs = None
585 585
586 586 # The .hg/ path should exist and should be a directory. All other
587 587 # cases are errors.
588 588 if not hgvfs.isdir():
589 589 try:
590 590 hgvfs.stat()
591 591 except FileNotFoundError:
592 592 pass
593 593 except ValueError as e:
594 594 # Can be raised on Python 3.8 when path is invalid.
595 595 raise error.Abort(
596 596 _(b'invalid path %s: %s') % (path, stringutil.forcebytestr(e))
597 597 )
598 598
599 599 raise error.RepoError(_(b'repository %s not found') % path)
600 600
601 601 requirements = _readrequires(hgvfs, True)
602 602 shared = (
603 603 requirementsmod.SHARED_REQUIREMENT in requirements
604 604 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
605 605 )
606 606 storevfs = None
607 607 if shared:
608 608 # This is a shared repo
609 609 sharedvfs = _getsharedvfs(hgvfs, requirements)
610 610 storevfs = vfsmod.vfs(sharedvfs.join(b'store'))
611 611 else:
612 612 storevfs = vfsmod.vfs(hgvfs.join(b'store'))
613 613
614 614 # if .hg/requires contains the sharesafe requirement, it means
615 615 # there exists a `.hg/store/requires` too and we should read it
616 616 # NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
617 617 # is present. We never write SHARESAFE_REQUIREMENT for a repo if store
618 618 # is not present, refer checkrequirementscompat() for that
619 619 #
620 620 # However, if SHARESAFE_REQUIREMENT is not present, it means that the
621 621 # repository was shared the old way. We check the share source .hg/requires
622 622 # for SHARESAFE_REQUIREMENT to detect whether the current repository needs
623 623 # to be reshared
624 624 hint = _(b"see `hg help config.format.use-share-safe` for more information")
625 625 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
626 626 if (
627 627 shared
628 628 and requirementsmod.SHARESAFE_REQUIREMENT
629 629 not in _readrequires(sharedvfs, True)
630 630 ):
631 631 mismatch_warn = ui.configbool(
632 632 b'share', b'safe-mismatch.source-not-safe.warn'
633 633 )
634 634 mismatch_config = ui.config(
635 635 b'share', b'safe-mismatch.source-not-safe'
636 636 )
637 637 mismatch_verbose_upgrade = ui.configbool(
638 638 b'share', b'safe-mismatch.source-not-safe:verbose-upgrade'
639 639 )
640 640 if mismatch_config in (
641 641 b'downgrade-allow',
642 642 b'allow',
643 643 b'downgrade-abort',
644 644 ):
645 645 # prevent cyclic import localrepo -> upgrade -> localrepo
646 646 from . import upgrade
647 647
648 648 upgrade.downgrade_share_to_non_safe(
649 649 ui,
650 650 hgvfs,
651 651 sharedvfs,
652 652 requirements,
653 653 mismatch_config,
654 654 mismatch_warn,
655 655 mismatch_verbose_upgrade,
656 656 )
657 657 elif mismatch_config == b'abort':
658 658 raise error.Abort(
659 659 _(b"share source does not support share-safe requirement"),
660 660 hint=hint,
661 661 )
662 662 else:
663 663 raise error.Abort(
664 664 _(
665 665 b"share-safe mismatch with source.\nUnrecognized"
666 666 b" value '%s' of `share.safe-mismatch.source-not-safe`"
667 667 b" set."
668 668 )
669 669 % mismatch_config,
670 670 hint=hint,
671 671 )
672 672 else:
673 673 requirements |= _readrequires(storevfs, False)
674 674 elif shared:
675 675 sourcerequires = _readrequires(sharedvfs, False)
676 676 if requirementsmod.SHARESAFE_REQUIREMENT in sourcerequires:
677 677 mismatch_config = ui.config(b'share', b'safe-mismatch.source-safe')
678 678 mismatch_warn = ui.configbool(
679 679 b'share', b'safe-mismatch.source-safe.warn'
680 680 )
681 681 mismatch_verbose_upgrade = ui.configbool(
682 682 b'share', b'safe-mismatch.source-safe:verbose-upgrade'
683 683 )
684 684 if mismatch_config in (
685 685 b'upgrade-allow',
686 686 b'allow',
687 687 b'upgrade-abort',
688 688 ):
689 689 # prevent cyclic import localrepo -> upgrade -> localrepo
690 690 from . import upgrade
691 691
692 692 upgrade.upgrade_share_to_safe(
693 693 ui,
694 694 hgvfs,
695 695 storevfs,
696 696 requirements,
697 697 mismatch_config,
698 698 mismatch_warn,
699 699 mismatch_verbose_upgrade,
700 700 )
701 701 elif mismatch_config == b'abort':
702 702 raise error.Abort(
703 703 _(
704 704 b'version mismatch: source uses share-safe'
705 705 b' functionality while the current share does not'
706 706 ),
707 707 hint=hint,
708 708 )
709 709 else:
710 710 raise error.Abort(
711 711 _(
712 712 b"share-safe mismatch with source.\nUnrecognized"
713 713 b" value '%s' of `share.safe-mismatch.source-safe` set."
714 714 )
715 715 % mismatch_config,
716 716 hint=hint,
717 717 )
718 718
719 719 # The .hg/hgrc file may load extensions or contain config options
720 720 # that influence repository construction. Attempt to load it and
721 721 # process any new extensions that it may have pulled in.
722 722 if loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs):
723 723 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
724 724 extensions.loadall(ui)
725 725 extensions.populateui(ui)
726 726
727 727 # Set of module names of extensions loaded for this repository.
728 728 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
729 729
730 730 supportedrequirements = gathersupportedrequirements(ui)
731 731
732 732 # We first validate the requirements are known.
733 733 ensurerequirementsrecognized(requirements, supportedrequirements)
734 734
735 735 # Then we validate that the known set is reasonable to use together.
736 736 ensurerequirementscompatible(ui, requirements)
737 737
738 738 # TODO there are unhandled edge cases related to opening repositories with
739 739 # shared storage. If storage is shared, we should also test for requirements
740 740 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
741 741 # that repo, as that repo may load extensions needed to open it. This is a
742 742 # bit complicated because we don't want the other hgrc to overwrite settings
743 743 # in this hgrc.
744 744 #
745 745 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
746 746 # file when sharing repos. But if a requirement is added after the share is
747 747 # performed, thereby introducing a new requirement for the opener, we may
748 748 # will not see that and could encounter a run-time error interacting with
749 749 # that shared store since it has an unknown-to-us requirement.
750 750
751 751 # At this point, we know we should be capable of opening the repository.
752 752 # Now get on with doing that.
753 753
754 754 features = set()
755 755
756 756 # The "store" part of the repository holds versioned data. How it is
757 757 # accessed is determined by various requirements. If `shared` or
758 758 # `relshared` requirements are present, this indicates current repository
759 759 # is a share and store exists in path mentioned in `.hg/sharedpath`
760 760 if shared:
761 761 storebasepath = sharedvfs.base
762 762 cachepath = sharedvfs.join(b'cache')
763 763 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
764 764 else:
765 765 storebasepath = hgvfs.base
766 766 cachepath = hgvfs.join(b'cache')
767 767 wcachepath = hgvfs.join(b'wcache')
768 768
769 769 # The store has changed over time and the exact layout is dictated by
770 770 # requirements. The store interface abstracts differences across all
771 771 # of them.
772 772 store = makestore(
773 773 requirements,
774 774 storebasepath,
775 775 lambda base: vfsmod.vfs(base, cacheaudited=True),
776 776 )
777 777 hgvfs.createmode = store.createmode
778 778
779 779 storevfs = store.vfs
780 780 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
781 781
782 782 if (
783 783 requirementsmod.REVLOGV2_REQUIREMENT in requirements
784 784 or requirementsmod.CHANGELOGV2_REQUIREMENT in requirements
785 785 ):
786 786 features.add(repository.REPO_FEATURE_SIDE_DATA)
787 787 # the revlogv2 docket introduced race condition that we need to fix
788 788 features.discard(repository.REPO_FEATURE_STREAM_CLONE)
789 789
790 790 # The cache vfs is used to manage cache files.
791 791 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
792 792 cachevfs.createmode = store.createmode
793 793 # The cache vfs is used to manage cache files related to the working copy
794 794 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
795 795 wcachevfs.createmode = store.createmode
796 796
797 797 # Now resolve the type for the repository object. We do this by repeatedly
798 798 # calling a factory function to produces types for specific aspects of the
799 799 # repo's operation. The aggregate returned types are used as base classes
800 800 # for a dynamically-derived type, which will represent our new repository.
801 801
802 802 bases = []
803 803 extrastate = {}
804 804
805 805 for iface, fn in REPO_INTERFACES:
806 806 # We pass all potentially useful state to give extensions tons of
807 807 # flexibility.
808 808 typ = fn()(
809 809 ui=ui,
810 810 intents=intents,
811 811 requirements=requirements,
812 812 features=features,
813 813 wdirvfs=wdirvfs,
814 814 hgvfs=hgvfs,
815 815 store=store,
816 816 storevfs=storevfs,
817 817 storeoptions=storevfs.options,
818 818 cachevfs=cachevfs,
819 819 wcachevfs=wcachevfs,
820 820 extensionmodulenames=extensionmodulenames,
821 821 extrastate=extrastate,
822 822 baseclasses=bases,
823 823 )
824 824
825 825 if not isinstance(typ, type):
826 826 raise error.ProgrammingError(
827 827 b'unable to construct type for %s' % iface
828 828 )
829 829
830 830 bases.append(typ)
831 831
832 832 # type() allows you to use characters in type names that wouldn't be
833 833 # recognized as Python symbols in source code. We abuse that to add
834 834 # rich information about our constructed repo.
835 835 name = pycompat.sysstr(
836 836 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
837 837 )
838 838
839 839 cls = type(name, tuple(bases), {})
840 840
841 841 return cls(
842 842 baseui=baseui,
843 843 ui=ui,
844 844 origroot=path,
845 845 wdirvfs=wdirvfs,
846 846 hgvfs=hgvfs,
847 847 requirements=requirements,
848 848 supportedrequirements=supportedrequirements,
849 849 sharedpath=storebasepath,
850 850 store=store,
851 851 cachevfs=cachevfs,
852 852 wcachevfs=wcachevfs,
853 853 features=features,
854 854 intents=intents,
855 855 )
856 856
857 857
858 858 def loadhgrc(
859 859 ui,
860 860 wdirvfs: vfsmod.vfs,
861 861 hgvfs: vfsmod.vfs,
862 862 requirements,
863 863 sharedvfs: Optional[vfsmod.vfs] = None,
864 864 ):
865 865 """Load hgrc files/content into a ui instance.
866 866
867 867 This is called during repository opening to load any additional
868 868 config files or settings relevant to the current repository.
869 869
870 870 Returns a bool indicating whether any additional configs were loaded.
871 871
872 872 Extensions should monkeypatch this function to modify how per-repo
873 873 configs are loaded. For example, an extension may wish to pull in
874 874 configs from alternate files or sources.
875 875
876 876 sharedvfs is vfs object pointing to source repo if the current one is a
877 877 shared one
878 878 """
879 879 if not rcutil.use_repo_hgrc():
880 880 return False
881 881
882 882 ret = False
883 883 # first load config from shared source if we has to
884 884 if requirementsmod.SHARESAFE_REQUIREMENT in requirements and sharedvfs:
885 885 try:
886 886 ui.readconfig(sharedvfs.join(b'hgrc'), root=sharedvfs.base)
887 887 ret = True
888 888 except IOError:
889 889 pass
890 890
891 891 try:
892 892 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
893 893 ret = True
894 894 except IOError:
895 895 pass
896 896
897 897 try:
898 898 ui.readconfig(hgvfs.join(b'hgrc-not-shared'), root=wdirvfs.base)
899 899 ret = True
900 900 except IOError:
901 901 pass
902 902
903 903 return ret
904 904
905 905
906 906 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
907 907 """Perform additional actions after .hg/hgrc is loaded.
908 908
909 909 This function is called during repository loading immediately after
910 910 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
911 911
912 912 The function can be used to validate configs, automatically add
913 913 options (including extensions) based on requirements, etc.
914 914 """
915 915
916 916 # Map of requirements to list of extensions to load automatically when
917 917 # requirement is present.
918 918 autoextensions = {
919 919 b'git': [b'git'],
920 920 b'largefiles': [b'largefiles'],
921 921 b'lfs': [b'lfs'],
922 922 }
923 923
924 924 for requirement, names in sorted(autoextensions.items()):
925 925 if requirement not in requirements:
926 926 continue
927 927
928 928 for name in names:
929 929 if not ui.hasconfig(b'extensions', name):
930 930 ui.setconfig(b'extensions', name, b'', source=b'autoload')
931 931
932 932
933 933 def gathersupportedrequirements(ui):
934 934 """Determine the complete set of recognized requirements."""
935 935 # Start with all requirements supported by this file.
936 936 supported = set(localrepository._basesupported)
937 937
938 938 # Execute ``featuresetupfuncs`` entries if they belong to an extension
939 939 # relevant to this ui instance.
940 940 modules = {m.__name__ for n, m in extensions.extensions(ui)}
941 941
942 942 for fn in featuresetupfuncs:
943 943 if fn.__module__ in modules:
944 944 fn(ui, supported)
945 945
946 946 # Add derived requirements from registered compression engines.
947 947 for name in util.compengines:
948 948 engine = util.compengines[name]
949 949 if engine.available() and engine.revlogheader():
950 950 supported.add(b'exp-compression-%s' % name)
951 951 if engine.name() == b'zstd':
952 952 supported.add(requirementsmod.REVLOG_COMPRESSION_ZSTD)
953 953
954 954 return supported
955 955
956 956
957 957 def ensurerequirementsrecognized(requirements, supported):
958 958 """Validate that a set of local requirements is recognized.
959 959
960 960 Receives a set of requirements. Raises an ``error.RepoError`` if there
961 961 exists any requirement in that set that currently loaded code doesn't
962 962 recognize.
963 963
964 964 Returns a set of supported requirements.
965 965 """
966 966 missing = set()
967 967
968 968 for requirement in requirements:
969 969 if requirement in supported:
970 970 continue
971 971
972 972 if not requirement or not requirement[0:1].isalnum():
973 973 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
974 974
975 975 missing.add(requirement)
976 976
977 977 if missing:
978 978 raise error.RequirementError(
979 979 _(b'repository requires features unknown to this Mercurial: %s')
980 980 % b' '.join(sorted(missing)),
981 981 hint=_(
982 982 b'see https://mercurial-scm.org/wiki/MissingRequirement '
983 983 b'for more information'
984 984 ),
985 985 )
986 986
987 987
988 988 def ensurerequirementscompatible(ui, requirements):
989 989 """Validates that a set of recognized requirements is mutually compatible.
990 990
991 991 Some requirements may not be compatible with others or require
992 992 config options that aren't enabled. This function is called during
993 993 repository opening to ensure that the set of requirements needed
994 994 to open a repository is sane and compatible with config options.
995 995
996 996 Extensions can monkeypatch this function to perform additional
997 997 checking.
998 998
999 999 ``error.RepoError`` should be raised on failure.
1000 1000 """
1001 1001 if (
1002 1002 requirementsmod.SPARSE_REQUIREMENT in requirements
1003 1003 and not sparse.enabled
1004 1004 ):
1005 1005 raise error.RepoError(
1006 1006 _(
1007 1007 b'repository is using sparse feature but '
1008 1008 b'sparse is not enabled; enable the '
1009 1009 b'"sparse" extensions to access'
1010 1010 )
1011 1011 )
1012 1012
1013 1013
1014 1014 def makestore(requirements, path, vfstype):
1015 1015 """Construct a storage object for a repository."""
1016 1016 if requirementsmod.STORE_REQUIREMENT in requirements:
1017 1017 if requirementsmod.FNCACHE_REQUIREMENT in requirements:
1018 1018 dotencode = requirementsmod.DOTENCODE_REQUIREMENT in requirements
1019 1019 return storemod.fncachestore(path, vfstype, dotencode)
1020 1020
1021 1021 return storemod.encodedstore(path, vfstype)
1022 1022
1023 1023 return storemod.basicstore(path, vfstype)
1024 1024
1025 1025
1026 1026 def resolvestorevfsoptions(ui, requirements, features):
1027 1027 """Resolve the options to pass to the store vfs opener.
1028 1028
1029 1029 The returned dict is used to influence behavior of the storage layer.
1030 1030 """
1031 1031 options = {}
1032 1032
1033 1033 if requirementsmod.TREEMANIFEST_REQUIREMENT in requirements:
1034 1034 options[b'treemanifest'] = True
1035 1035
1036 1036 # experimental config: format.manifestcachesize
1037 1037 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
1038 1038 if manifestcachesize is not None:
1039 1039 options[b'manifestcachesize'] = manifestcachesize
1040 1040
1041 1041 # In the absence of another requirement superseding a revlog-related
1042 1042 # requirement, we have to assume the repo is using revlog version 0.
1043 1043 # This revlog format is super old and we don't bother trying to parse
1044 1044 # opener options for it because those options wouldn't do anything
1045 1045 # meaningful on such old repos.
1046 1046 if (
1047 1047 requirementsmod.REVLOGV1_REQUIREMENT in requirements
1048 1048 or requirementsmod.REVLOGV2_REQUIREMENT in requirements
1049 1049 ):
1050 1050 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
1051 1051 else: # explicitly mark repo as using revlogv0
1052 1052 options[b'revlogv0'] = True
1053 1053
1054 1054 if requirementsmod.COPIESSDC_REQUIREMENT in requirements:
1055 1055 options[b'copies-storage'] = b'changeset-sidedata'
1056 1056 else:
1057 1057 writecopiesto = ui.config(b'experimental', b'copies.write-to')
1058 1058 copiesextramode = (b'changeset-only', b'compatibility')
1059 1059 if writecopiesto in copiesextramode:
1060 1060 options[b'copies-storage'] = b'extra'
1061 1061
1062 1062 return options
1063 1063
1064 1064
1065 1065 def resolverevlogstorevfsoptions(ui, requirements, features):
1066 1066 """Resolve opener options specific to revlogs."""
1067 1067
1068 1068 options = {}
1069 1069 options[b'flagprocessors'] = {}
1070 1070
1071 1071 feature_config = options[b'feature-config'] = revlog.FeatureConfig()
1072 1072 data_config = options[b'data-config'] = revlog.DataConfig()
1073 1073 delta_config = options[b'delta-config'] = revlog.DeltaConfig()
1074 1074
1075 1075 if requirementsmod.REVLOGV1_REQUIREMENT in requirements:
1076 1076 options[b'revlogv1'] = True
1077 1077 if requirementsmod.REVLOGV2_REQUIREMENT in requirements:
1078 1078 options[b'revlogv2'] = True
1079 1079 if requirementsmod.CHANGELOGV2_REQUIREMENT in requirements:
1080 1080 options[b'changelogv2'] = True
1081 1081 cmp_rank = ui.configbool(b'experimental', b'changelog-v2.compute-rank')
1082 1082 options[b'changelogv2.compute-rank'] = cmp_rank
1083 1083
1084 1084 if requirementsmod.GENERALDELTA_REQUIREMENT in requirements:
1085 1085 options[b'generaldelta'] = True
1086 1086
1087 1087 # experimental config: format.chunkcachesize
1088 1088 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
1089 1089 if chunkcachesize is not None:
1090 options[b'chunkcachesize'] = chunkcachesize
1090 data_config.chunk_cache_size = chunkcachesize
1091 1091
1092 1092 deltabothparents = ui.configbool(
1093 1093 b'storage', b'revlog.optimize-delta-parent-choice'
1094 1094 )
1095 1095 options[b'deltabothparents'] = deltabothparents
1096 1096 dps_cgds = ui.configint(
1097 1097 b'storage',
1098 1098 b'revlog.delta-parent-search.candidate-group-chunk-size',
1099 1099 )
1100 1100 options[b'delta-parent-search.candidate-group-chunk-size'] = dps_cgds
1101 1101 options[b'debug-delta'] = ui.configbool(b'debug', b'revlog.debug-delta')
1102 1102
1103 1103 issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming')
1104 1104 options[b'issue6528.fix-incoming'] = issue6528
1105 1105
1106 1106 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
1107 1107 lazydeltabase = False
1108 1108 if lazydelta:
1109 1109 lazydeltabase = ui.configbool(
1110 1110 b'storage', b'revlog.reuse-external-delta-parent'
1111 1111 )
1112 1112 if lazydeltabase is None:
1113 1113 lazydeltabase = not scmutil.gddeltaconfig(ui)
1114 1114 options[b'lazydelta'] = lazydelta
1115 1115 options[b'lazydeltabase'] = lazydeltabase
1116 1116
1117 1117 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
1118 1118 if 0 <= chainspan:
1119 1119 options[b'maxdeltachainspan'] = chainspan
1120 1120
1121 1121 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
1122 1122 if mmapindexthreshold is not None:
1123 1123 options[b'mmapindexthreshold'] = mmapindexthreshold
1124 1124
1125 1125 withsparseread = ui.configbool(b'experimental', b'sparse-read')
1126 1126 srdensitythres = float(
1127 1127 ui.config(b'experimental', b'sparse-read.density-threshold')
1128 1128 )
1129 1129 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
1130 1130 options[b'with-sparse-read'] = withsparseread
1131 1131 options[b'sparse-read-density-threshold'] = srdensitythres
1132 1132 options[b'sparse-read-min-gap-size'] = srmingapsize
1133 1133
1134 1134 sparserevlog = requirementsmod.SPARSEREVLOG_REQUIREMENT in requirements
1135 1135 options[b'sparse-revlog'] = sparserevlog
1136 1136 if sparserevlog:
1137 1137 options[b'generaldelta'] = True
1138 1138
1139 1139 maxchainlen = None
1140 1140 if sparserevlog:
1141 1141 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
1142 1142 # experimental config: format.maxchainlen
1143 1143 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
1144 1144 if maxchainlen is not None:
1145 1145 options[b'maxchainlen'] = maxchainlen
1146 1146
1147 1147 for r in requirements:
1148 1148 # we allow multiple compression engine requirement to co-exist because
1149 1149 # strickly speaking, revlog seems to support mixed compression style.
1150 1150 #
1151 1151 # The compression used for new entries will be "the last one"
1152 1152 prefix = r.startswith
1153 1153 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
1154 1154 options[b'compengine'] = r.split(b'-', 2)[2]
1155 1155
1156 1156 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
1157 1157 if options[b'zlib.level'] is not None:
1158 1158 if not (0 <= options[b'zlib.level'] <= 9):
1159 1159 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
1160 1160 raise error.Abort(msg % options[b'zlib.level'])
1161 1161 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
1162 1162 if options[b'zstd.level'] is not None:
1163 1163 if not (0 <= options[b'zstd.level'] <= 22):
1164 1164 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
1165 1165 raise error.Abort(msg % options[b'zstd.level'])
1166 1166
1167 1167 if requirementsmod.NARROW_REQUIREMENT in requirements:
1168 1168 options[b'enableellipsis'] = True
1169 1169
1170 1170 if ui.configbool(b'experimental', b'rust.index'):
1171 1171 options[b'rust.index'] = True
1172 1172 if requirementsmod.NODEMAP_REQUIREMENT in requirements:
1173 1173 slow_path = ui.config(
1174 1174 b'storage', b'revlog.persistent-nodemap.slow-path'
1175 1175 )
1176 1176 if slow_path not in (b'allow', b'warn', b'abort'):
1177 1177 default = ui.config_default(
1178 1178 b'storage', b'revlog.persistent-nodemap.slow-path'
1179 1179 )
1180 1180 msg = _(
1181 1181 b'unknown value for config '
1182 1182 b'"storage.revlog.persistent-nodemap.slow-path": "%s"\n'
1183 1183 )
1184 1184 ui.warn(msg % slow_path)
1185 1185 if not ui.quiet:
1186 1186 ui.warn(_(b'falling back to default value: %s\n') % default)
1187 1187 slow_path = default
1188 1188
1189 1189 msg = _(
1190 1190 b"accessing `persistent-nodemap` repository without associated "
1191 1191 b"fast implementation."
1192 1192 )
1193 1193 hint = _(
1194 1194 b"check `hg help config.format.use-persistent-nodemap` "
1195 1195 b"for details"
1196 1196 )
1197 1197 if not revlog.HAS_FAST_PERSISTENT_NODEMAP:
1198 1198 if slow_path == b'warn':
1199 1199 msg = b"warning: " + msg + b'\n'
1200 1200 ui.warn(msg)
1201 1201 if not ui.quiet:
1202 1202 hint = b'(' + hint + b')\n'
1203 1203 ui.warn(hint)
1204 1204 if slow_path == b'abort':
1205 1205 raise error.Abort(msg, hint=hint)
1206 1206 options[b'persistent-nodemap'] = True
1207 1207 if requirementsmod.DIRSTATE_V2_REQUIREMENT in requirements:
1208 1208 slow_path = ui.config(b'storage', b'dirstate-v2.slow-path')
1209 1209 if slow_path not in (b'allow', b'warn', b'abort'):
1210 1210 default = ui.config_default(b'storage', b'dirstate-v2.slow-path')
1211 1211 msg = _(b'unknown value for config "dirstate-v2.slow-path": "%s"\n')
1212 1212 ui.warn(msg % slow_path)
1213 1213 if not ui.quiet:
1214 1214 ui.warn(_(b'falling back to default value: %s\n') % default)
1215 1215 slow_path = default
1216 1216
1217 1217 msg = _(
1218 1218 b"accessing `dirstate-v2` repository without associated "
1219 1219 b"fast implementation."
1220 1220 )
1221 1221 hint = _(
1222 1222 b"check `hg help config.format.use-dirstate-v2` " b"for details"
1223 1223 )
1224 1224 if not dirstate.HAS_FAST_DIRSTATE_V2:
1225 1225 if slow_path == b'warn':
1226 1226 msg = b"warning: " + msg + b'\n'
1227 1227 ui.warn(msg)
1228 1228 if not ui.quiet:
1229 1229 hint = b'(' + hint + b')\n'
1230 1230 ui.warn(hint)
1231 1231 if slow_path == b'abort':
1232 1232 raise error.Abort(msg, hint=hint)
1233 1233 if ui.configbool(b'storage', b'revlog.persistent-nodemap.mmap'):
1234 1234 options[b'persistent-nodemap.mmap'] = True
1235 1235 if ui.configbool(b'devel', b'persistent-nodemap'):
1236 1236 options[b'devel-force-nodemap'] = True
1237 1237
1238 1238 return options
1239 1239
1240 1240
1241 1241 def makemain(**kwargs):
1242 1242 """Produce a type conforming to ``ilocalrepositorymain``."""
1243 1243 return localrepository
1244 1244
1245 1245
1246 1246 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1247 1247 class revlogfilestorage:
1248 1248 """File storage when using revlogs."""
1249 1249
1250 1250 def file(self, path):
1251 1251 if path.startswith(b'/'):
1252 1252 path = path[1:]
1253 1253
1254 1254 try_split = (
1255 1255 self.currenttransaction() is not None
1256 1256 or txnutil.mayhavepending(self.root)
1257 1257 )
1258 1258
1259 1259 return filelog.filelog(self.svfs, path, try_split=try_split)
1260 1260
1261 1261
1262 1262 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1263 1263 class revlognarrowfilestorage:
1264 1264 """File storage when using revlogs and narrow files."""
1265 1265
1266 1266 def file(self, path):
1267 1267 if path.startswith(b'/'):
1268 1268 path = path[1:]
1269 1269
1270 1270 try_split = (
1271 1271 self.currenttransaction() is not None
1272 1272 or txnutil.mayhavepending(self.root)
1273 1273 )
1274 1274 return filelog.narrowfilelog(
1275 1275 self.svfs, path, self._storenarrowmatch, try_split=try_split
1276 1276 )
1277 1277
1278 1278
1279 1279 def makefilestorage(requirements, features, **kwargs):
1280 1280 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1281 1281 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
1282 1282 features.add(repository.REPO_FEATURE_STREAM_CLONE)
1283 1283
1284 1284 if requirementsmod.NARROW_REQUIREMENT in requirements:
1285 1285 return revlognarrowfilestorage
1286 1286 else:
1287 1287 return revlogfilestorage
1288 1288
1289 1289
1290 1290 # List of repository interfaces and factory functions for them. Each
1291 1291 # will be called in order during ``makelocalrepository()`` to iteratively
1292 1292 # derive the final type for a local repository instance. We capture the
1293 1293 # function as a lambda so we don't hold a reference and the module-level
1294 1294 # functions can be wrapped.
1295 1295 REPO_INTERFACES = [
1296 1296 (repository.ilocalrepositorymain, lambda: makemain),
1297 1297 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
1298 1298 ]
1299 1299
1300 1300
1301 1301 @interfaceutil.implementer(repository.ilocalrepositorymain)
1302 1302 class localrepository:
1303 1303 """Main class for representing local repositories.
1304 1304
1305 1305 All local repositories are instances of this class.
1306 1306
1307 1307 Constructed on its own, instances of this class are not usable as
1308 1308 repository objects. To obtain a usable repository object, call
1309 1309 ``hg.repository()``, ``localrepo.instance()``, or
1310 1310 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1311 1311 ``instance()`` adds support for creating new repositories.
1312 1312 ``hg.repository()`` adds more extension integration, including calling
1313 1313 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1314 1314 used.
1315 1315 """
1316 1316
1317 1317 _basesupported = {
1318 1318 requirementsmod.ARCHIVED_PHASE_REQUIREMENT,
1319 1319 requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT,
1320 1320 requirementsmod.CHANGELOGV2_REQUIREMENT,
1321 1321 requirementsmod.COPIESSDC_REQUIREMENT,
1322 1322 requirementsmod.DIRSTATE_TRACKED_HINT_V1,
1323 1323 requirementsmod.DIRSTATE_V2_REQUIREMENT,
1324 1324 requirementsmod.DOTENCODE_REQUIREMENT,
1325 1325 requirementsmod.FNCACHE_REQUIREMENT,
1326 1326 requirementsmod.GENERALDELTA_REQUIREMENT,
1327 1327 requirementsmod.INTERNAL_PHASE_REQUIREMENT,
1328 1328 requirementsmod.NODEMAP_REQUIREMENT,
1329 1329 requirementsmod.RELATIVE_SHARED_REQUIREMENT,
1330 1330 requirementsmod.REVLOGV1_REQUIREMENT,
1331 1331 requirementsmod.REVLOGV2_REQUIREMENT,
1332 1332 requirementsmod.SHARED_REQUIREMENT,
1333 1333 requirementsmod.SHARESAFE_REQUIREMENT,
1334 1334 requirementsmod.SPARSE_REQUIREMENT,
1335 1335 requirementsmod.SPARSEREVLOG_REQUIREMENT,
1336 1336 requirementsmod.STORE_REQUIREMENT,
1337 1337 requirementsmod.TREEMANIFEST_REQUIREMENT,
1338 1338 }
1339 1339
1340 1340 # list of prefix for file which can be written without 'wlock'
1341 1341 # Extensions should extend this list when needed
1342 1342 _wlockfreeprefix = {
1343 1343 # We migh consider requiring 'wlock' for the next
1344 1344 # two, but pretty much all the existing code assume
1345 1345 # wlock is not needed so we keep them excluded for
1346 1346 # now.
1347 1347 b'hgrc',
1348 1348 b'requires',
1349 1349 # XXX cache is a complicatged business someone
1350 1350 # should investigate this in depth at some point
1351 1351 b'cache/',
1352 1352 # XXX bisect was still a bit too messy at the time
1353 1353 # this changeset was introduced. Someone should fix
1354 1354 # the remainig bit and drop this line
1355 1355 b'bisect.state',
1356 1356 }
1357 1357
1358 1358 def __init__(
1359 1359 self,
1360 1360 baseui,
1361 1361 ui,
1362 1362 origroot: bytes,
1363 1363 wdirvfs: vfsmod.vfs,
1364 1364 hgvfs: vfsmod.vfs,
1365 1365 requirements,
1366 1366 supportedrequirements,
1367 1367 sharedpath: bytes,
1368 1368 store,
1369 1369 cachevfs: vfsmod.vfs,
1370 1370 wcachevfs: vfsmod.vfs,
1371 1371 features,
1372 1372 intents=None,
1373 1373 ):
1374 1374 """Create a new local repository instance.
1375 1375
1376 1376 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1377 1377 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1378 1378 object.
1379 1379
1380 1380 Arguments:
1381 1381
1382 1382 baseui
1383 1383 ``ui.ui`` instance that ``ui`` argument was based off of.
1384 1384
1385 1385 ui
1386 1386 ``ui.ui`` instance for use by the repository.
1387 1387
1388 1388 origroot
1389 1389 ``bytes`` path to working directory root of this repository.
1390 1390
1391 1391 wdirvfs
1392 1392 ``vfs.vfs`` rooted at the working directory.
1393 1393
1394 1394 hgvfs
1395 1395 ``vfs.vfs`` rooted at .hg/
1396 1396
1397 1397 requirements
1398 1398 ``set`` of bytestrings representing repository opening requirements.
1399 1399
1400 1400 supportedrequirements
1401 1401 ``set`` of bytestrings representing repository requirements that we
1402 1402 know how to open. May be a supetset of ``requirements``.
1403 1403
1404 1404 sharedpath
1405 1405 ``bytes`` Defining path to storage base directory. Points to a
1406 1406 ``.hg/`` directory somewhere.
1407 1407
1408 1408 store
1409 1409 ``store.basicstore`` (or derived) instance providing access to
1410 1410 versioned storage.
1411 1411
1412 1412 cachevfs
1413 1413 ``vfs.vfs`` used for cache files.
1414 1414
1415 1415 wcachevfs
1416 1416 ``vfs.vfs`` used for cache files related to the working copy.
1417 1417
1418 1418 features
1419 1419 ``set`` of bytestrings defining features/capabilities of this
1420 1420 instance.
1421 1421
1422 1422 intents
1423 1423 ``set`` of system strings indicating what this repo will be used
1424 1424 for.
1425 1425 """
1426 1426 self.baseui = baseui
1427 1427 self.ui = ui
1428 1428 self.origroot = origroot
1429 1429 # vfs rooted at working directory.
1430 1430 self.wvfs = wdirvfs
1431 1431 self.root = wdirvfs.base
1432 1432 # vfs rooted at .hg/. Used to access most non-store paths.
1433 1433 self.vfs = hgvfs
1434 1434 self.path = hgvfs.base
1435 1435 self.requirements = requirements
1436 1436 self.nodeconstants = sha1nodeconstants
1437 1437 self.nullid = self.nodeconstants.nullid
1438 1438 self.supported = supportedrequirements
1439 1439 self.sharedpath = sharedpath
1440 1440 self.store = store
1441 1441 self.cachevfs = cachevfs
1442 1442 self.wcachevfs = wcachevfs
1443 1443 self.features = features
1444 1444
1445 1445 self.filtername = None
1446 1446
1447 1447 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1448 1448 b'devel', b'check-locks'
1449 1449 ):
1450 1450 self.vfs.audit = self._getvfsward(self.vfs.audit)
1451 1451 # A list of callback to shape the phase if no data were found.
1452 1452 # Callback are in the form: func(repo, roots) --> processed root.
1453 1453 # This list it to be filled by extension during repo setup
1454 1454 self._phasedefaults = []
1455 1455
1456 1456 color.setup(self.ui)
1457 1457
1458 1458 self.spath = self.store.path
1459 1459 self.svfs = self.store.vfs
1460 1460 self.sjoin = self.store.join
1461 1461 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1462 1462 b'devel', b'check-locks'
1463 1463 ):
1464 1464 if hasattr(self.svfs, 'vfs'): # this is filtervfs
1465 1465 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1466 1466 else: # standard vfs
1467 1467 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1468 1468
1469 1469 self._dirstatevalidatewarned = False
1470 1470
1471 1471 self._branchcaches = branchmap.BranchMapCache()
1472 1472 self._revbranchcache = None
1473 1473 self._filterpats = {}
1474 1474 self._datafilters = {}
1475 1475 self._transref = self._lockref = self._wlockref = None
1476 1476
1477 1477 # A cache for various files under .hg/ that tracks file changes,
1478 1478 # (used by the filecache decorator)
1479 1479 #
1480 1480 # Maps a property name to its util.filecacheentry
1481 1481 self._filecache = {}
1482 1482
1483 1483 # hold sets of revision to be filtered
1484 1484 # should be cleared when something might have changed the filter value:
1485 1485 # - new changesets,
1486 1486 # - phase change,
1487 1487 # - new obsolescence marker,
1488 1488 # - working directory parent change,
1489 1489 # - bookmark changes
1490 1490 self.filteredrevcache = {}
1491 1491
1492 1492 self._dirstate = None
1493 1493 # post-dirstate-status hooks
1494 1494 self._postdsstatus = []
1495 1495
1496 1496 self._pending_narrow_pats = None
1497 1497 self._pending_narrow_pats_dirstate = None
1498 1498
1499 1499 # generic mapping between names and nodes
1500 1500 self.names = namespaces.namespaces()
1501 1501
1502 1502 # Key to signature value.
1503 1503 self._sparsesignaturecache = {}
1504 1504 # Signature to cached matcher instance.
1505 1505 self._sparsematchercache = {}
1506 1506
1507 1507 self._extrafilterid = repoview.extrafilter(ui)
1508 1508
1509 1509 self.filecopiesmode = None
1510 1510 if requirementsmod.COPIESSDC_REQUIREMENT in self.requirements:
1511 1511 self.filecopiesmode = b'changeset-sidedata'
1512 1512
1513 1513 self._wanted_sidedata = set()
1514 1514 self._sidedata_computers = {}
1515 1515 sidedatamod.set_sidedata_spec_for_repo(self)
1516 1516
1517 1517 def _getvfsward(self, origfunc):
1518 1518 """build a ward for self.vfs"""
1519 1519 rref = weakref.ref(self)
1520 1520
1521 1521 def checkvfs(path, mode=None):
1522 1522 ret = origfunc(path, mode=mode)
1523 1523 repo = rref()
1524 1524 if (
1525 1525 repo is None
1526 1526 or not hasattr(repo, '_wlockref')
1527 1527 or not hasattr(repo, '_lockref')
1528 1528 ):
1529 1529 return
1530 1530 if mode in (None, b'r', b'rb'):
1531 1531 return
1532 1532 if path.startswith(repo.path):
1533 1533 # truncate name relative to the repository (.hg)
1534 1534 path = path[len(repo.path) + 1 :]
1535 1535 if path.startswith(b'cache/'):
1536 1536 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1537 1537 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1538 1538 # path prefixes covered by 'lock'
1539 1539 vfs_path_prefixes = (
1540 1540 b'journal.',
1541 1541 b'undo.',
1542 1542 b'strip-backup/',
1543 1543 b'cache/',
1544 1544 )
1545 1545 if any(path.startswith(prefix) for prefix in vfs_path_prefixes):
1546 1546 if repo._currentlock(repo._lockref) is None:
1547 1547 repo.ui.develwarn(
1548 1548 b'write with no lock: "%s"' % path,
1549 1549 stacklevel=3,
1550 1550 config=b'check-locks',
1551 1551 )
1552 1552 elif repo._currentlock(repo._wlockref) is None:
1553 1553 # rest of vfs files are covered by 'wlock'
1554 1554 #
1555 1555 # exclude special files
1556 1556 for prefix in self._wlockfreeprefix:
1557 1557 if path.startswith(prefix):
1558 1558 return
1559 1559 repo.ui.develwarn(
1560 1560 b'write with no wlock: "%s"' % path,
1561 1561 stacklevel=3,
1562 1562 config=b'check-locks',
1563 1563 )
1564 1564 return ret
1565 1565
1566 1566 return checkvfs
1567 1567
1568 1568 def _getsvfsward(self, origfunc):
1569 1569 """build a ward for self.svfs"""
1570 1570 rref = weakref.ref(self)
1571 1571
1572 1572 def checksvfs(path, mode=None):
1573 1573 ret = origfunc(path, mode=mode)
1574 1574 repo = rref()
1575 1575 if repo is None or not hasattr(repo, '_lockref'):
1576 1576 return
1577 1577 if mode in (None, b'r', b'rb'):
1578 1578 return
1579 1579 if path.startswith(repo.sharedpath):
1580 1580 # truncate name relative to the repository (.hg)
1581 1581 path = path[len(repo.sharedpath) + 1 :]
1582 1582 if repo._currentlock(repo._lockref) is None:
1583 1583 repo.ui.develwarn(
1584 1584 b'write with no lock: "%s"' % path, stacklevel=4
1585 1585 )
1586 1586 return ret
1587 1587
1588 1588 return checksvfs
1589 1589
1590 1590 @property
1591 1591 def vfs_map(self):
1592 1592 return {
1593 1593 b'': self.svfs,
1594 1594 b'plain': self.vfs,
1595 1595 b'store': self.svfs,
1596 1596 }
1597 1597
1598 1598 def close(self):
1599 1599 self._writecaches()
1600 1600
1601 1601 def _writecaches(self):
1602 1602 if self._revbranchcache:
1603 1603 self._revbranchcache.write()
1604 1604
1605 1605 def _restrictcapabilities(self, caps):
1606 1606 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1607 1607 caps = set(caps)
1608 1608 capsblob = bundle2.encodecaps(
1609 1609 bundle2.getrepocaps(self, role=b'client')
1610 1610 )
1611 1611 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1612 1612 if self.ui.configbool(b'experimental', b'narrow'):
1613 1613 caps.add(wireprototypes.NARROWCAP)
1614 1614 return caps
1615 1615
1616 1616 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1617 1617 # self -> auditor -> self._checknested -> self
1618 1618
1619 1619 @property
1620 1620 def auditor(self):
1621 1621 # This is only used by context.workingctx.match in order to
1622 1622 # detect files in subrepos.
1623 1623 return pathutil.pathauditor(self.root, callback=self._checknested)
1624 1624
1625 1625 @property
1626 1626 def nofsauditor(self):
1627 1627 # This is only used by context.basectx.match in order to detect
1628 1628 # files in subrepos.
1629 1629 return pathutil.pathauditor(
1630 1630 self.root, callback=self._checknested, realfs=False, cached=True
1631 1631 )
1632 1632
1633 1633 def _checknested(self, path):
1634 1634 """Determine if path is a legal nested repository."""
1635 1635 if not path.startswith(self.root):
1636 1636 return False
1637 1637 subpath = path[len(self.root) + 1 :]
1638 1638 normsubpath = util.pconvert(subpath)
1639 1639
1640 1640 # XXX: Checking against the current working copy is wrong in
1641 1641 # the sense that it can reject things like
1642 1642 #
1643 1643 # $ hg cat -r 10 sub/x.txt
1644 1644 #
1645 1645 # if sub/ is no longer a subrepository in the working copy
1646 1646 # parent revision.
1647 1647 #
1648 1648 # However, it can of course also allow things that would have
1649 1649 # been rejected before, such as the above cat command if sub/
1650 1650 # is a subrepository now, but was a normal directory before.
1651 1651 # The old path auditor would have rejected by mistake since it
1652 1652 # panics when it sees sub/.hg/.
1653 1653 #
1654 1654 # All in all, checking against the working copy seems sensible
1655 1655 # since we want to prevent access to nested repositories on
1656 1656 # the filesystem *now*.
1657 1657 ctx = self[None]
1658 1658 parts = util.splitpath(subpath)
1659 1659 while parts:
1660 1660 prefix = b'/'.join(parts)
1661 1661 if prefix in ctx.substate:
1662 1662 if prefix == normsubpath:
1663 1663 return True
1664 1664 else:
1665 1665 sub = ctx.sub(prefix)
1666 1666 return sub.checknested(subpath[len(prefix) + 1 :])
1667 1667 else:
1668 1668 parts.pop()
1669 1669 return False
1670 1670
1671 1671 def peer(self, path=None, remotehidden=False):
1672 1672 return localpeer(
1673 1673 self, path=path, remotehidden=remotehidden
1674 1674 ) # not cached to avoid reference cycle
1675 1675
1676 1676 def unfiltered(self):
1677 1677 """Return unfiltered version of the repository
1678 1678
1679 1679 Intended to be overwritten by filtered repo."""
1680 1680 return self
1681 1681
1682 1682 def filtered(self, name, visibilityexceptions=None):
1683 1683 """Return a filtered version of a repository
1684 1684
1685 1685 The `name` parameter is the identifier of the requested view. This
1686 1686 will return a repoview object set "exactly" to the specified view.
1687 1687
1688 1688 This function does not apply recursive filtering to a repository. For
1689 1689 example calling `repo.filtered("served")` will return a repoview using
1690 1690 the "served" view, regardless of the initial view used by `repo`.
1691 1691
1692 1692 In other word, there is always only one level of `repoview` "filtering".
1693 1693 """
1694 1694 if self._extrafilterid is not None and b'%' not in name:
1695 1695 name = name + b'%' + self._extrafilterid
1696 1696
1697 1697 cls = repoview.newtype(self.unfiltered().__class__)
1698 1698 return cls(self, name, visibilityexceptions)
1699 1699
1700 1700 @mixedrepostorecache(
1701 1701 (b'bookmarks', b'plain'),
1702 1702 (b'bookmarks.current', b'plain'),
1703 1703 (b'bookmarks', b''),
1704 1704 (b'00changelog.i', b''),
1705 1705 )
1706 1706 def _bookmarks(self):
1707 1707 # Since the multiple files involved in the transaction cannot be
1708 1708 # written atomically (with current repository format), there is a race
1709 1709 # condition here.
1710 1710 #
1711 1711 # 1) changelog content A is read
1712 1712 # 2) outside transaction update changelog to content B
1713 1713 # 3) outside transaction update bookmark file referring to content B
1714 1714 # 4) bookmarks file content is read and filtered against changelog-A
1715 1715 #
1716 1716 # When this happens, bookmarks against nodes missing from A are dropped.
1717 1717 #
1718 1718 # Having this happening during read is not great, but it become worse
1719 1719 # when this happen during write because the bookmarks to the "unknown"
1720 1720 # nodes will be dropped for good. However, writes happen within locks.
1721 1721 # This locking makes it possible to have a race free consistent read.
1722 1722 # For this purpose data read from disc before locking are
1723 1723 # "invalidated" right after the locks are taken. This invalidations are
1724 1724 # "light", the `filecache` mechanism keep the data in memory and will
1725 1725 # reuse them if the underlying files did not changed. Not parsing the
1726 1726 # same data multiple times helps performances.
1727 1727 #
1728 1728 # Unfortunately in the case describe above, the files tracked by the
1729 1729 # bookmarks file cache might not have changed, but the in-memory
1730 1730 # content is still "wrong" because we used an older changelog content
1731 1731 # to process the on-disk data. So after locking, the changelog would be
1732 1732 # refreshed but `_bookmarks` would be preserved.
1733 1733 # Adding `00changelog.i` to the list of tracked file is not
1734 1734 # enough, because at the time we build the content for `_bookmarks` in
1735 1735 # (4), the changelog file has already diverged from the content used
1736 1736 # for loading `changelog` in (1)
1737 1737 #
1738 1738 # To prevent the issue, we force the changelog to be explicitly
1739 1739 # reloaded while computing `_bookmarks`. The data race can still happen
1740 1740 # without the lock (with a narrower window), but it would no longer go
1741 1741 # undetected during the lock time refresh.
1742 1742 #
1743 1743 # The new schedule is as follow
1744 1744 #
1745 1745 # 1) filecache logic detect that `_bookmarks` needs to be computed
1746 1746 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1747 1747 # 3) We force `changelog` filecache to be tested
1748 1748 # 4) cachestat for `changelog` are captured (for changelog)
1749 1749 # 5) `_bookmarks` is computed and cached
1750 1750 #
1751 1751 # The step in (3) ensure we have a changelog at least as recent as the
1752 1752 # cache stat computed in (1). As a result at locking time:
1753 1753 # * if the changelog did not changed since (1) -> we can reuse the data
1754 1754 # * otherwise -> the bookmarks get refreshed.
1755 1755 self._refreshchangelog()
1756 1756 return bookmarks.bmstore(self)
1757 1757
1758 1758 def _refreshchangelog(self):
1759 1759 """make sure the in memory changelog match the on-disk one"""
1760 1760 if 'changelog' in vars(self) and self.currenttransaction() is None:
1761 1761 del self.changelog
1762 1762
1763 1763 @property
1764 1764 def _activebookmark(self):
1765 1765 return self._bookmarks.active
1766 1766
1767 1767 # _phasesets depend on changelog. what we need is to call
1768 1768 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1769 1769 # can't be easily expressed in filecache mechanism.
1770 1770 @storecache(b'phaseroots', b'00changelog.i')
1771 1771 def _phasecache(self):
1772 1772 return phases.phasecache(self, self._phasedefaults)
1773 1773
1774 1774 @storecache(b'obsstore')
1775 1775 def obsstore(self):
1776 1776 return obsolete.makestore(self.ui, self)
1777 1777
1778 1778 @changelogcache()
1779 1779 def changelog(repo):
1780 1780 # load dirstate before changelog to avoid race see issue6303
1781 1781 repo.dirstate.prefetch_parents()
1782 1782 return repo.store.changelog(
1783 1783 txnutil.mayhavepending(repo.root),
1784 1784 concurrencychecker=revlogchecker.get_checker(repo.ui, b'changelog'),
1785 1785 )
1786 1786
1787 1787 @manifestlogcache()
1788 1788 def manifestlog(self):
1789 1789 return self.store.manifestlog(self, self._storenarrowmatch)
1790 1790
1791 1791 @unfilteredpropertycache
1792 1792 def dirstate(self):
1793 1793 if self._dirstate is None:
1794 1794 self._dirstate = self._makedirstate()
1795 1795 else:
1796 1796 self._dirstate.refresh()
1797 1797 return self._dirstate
1798 1798
1799 1799 def _makedirstate(self):
1800 1800 """Extension point for wrapping the dirstate per-repo."""
1801 1801 sparsematchfn = None
1802 1802 if sparse.use_sparse(self):
1803 1803 sparsematchfn = lambda: sparse.matcher(self)
1804 1804 v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT
1805 1805 th = requirementsmod.DIRSTATE_TRACKED_HINT_V1
1806 1806 use_dirstate_v2 = v2_req in self.requirements
1807 1807 use_tracked_hint = th in self.requirements
1808 1808
1809 1809 return dirstate.dirstate(
1810 1810 self.vfs,
1811 1811 self.ui,
1812 1812 self.root,
1813 1813 self._dirstatevalidate,
1814 1814 sparsematchfn,
1815 1815 self.nodeconstants,
1816 1816 use_dirstate_v2,
1817 1817 use_tracked_hint=use_tracked_hint,
1818 1818 )
1819 1819
1820 1820 def _dirstatevalidate(self, node):
1821 1821 okay = True
1822 1822 try:
1823 1823 self.changelog.rev(node)
1824 1824 except error.LookupError:
1825 1825 # If the parent are unknown it might just be because the changelog
1826 1826 # in memory is lagging behind the dirstate in memory. So try to
1827 1827 # refresh the changelog first.
1828 1828 #
1829 1829 # We only do so if we don't hold the lock, if we do hold the lock
1830 1830 # the invalidation at that time should have taken care of this and
1831 1831 # something is very fishy.
1832 1832 if self.currentlock() is None:
1833 1833 self.invalidate()
1834 1834 try:
1835 1835 self.changelog.rev(node)
1836 1836 except error.LookupError:
1837 1837 okay = False
1838 1838 else:
1839 1839 # XXX we should consider raising an error here.
1840 1840 okay = False
1841 1841 if okay:
1842 1842 return node
1843 1843 else:
1844 1844 if not self._dirstatevalidatewarned:
1845 1845 self._dirstatevalidatewarned = True
1846 1846 self.ui.warn(
1847 1847 _(b"warning: ignoring unknown working parent %s!\n")
1848 1848 % short(node)
1849 1849 )
1850 1850 return self.nullid
1851 1851
1852 1852 @storecache(narrowspec.FILENAME)
1853 1853 def narrowpats(self):
1854 1854 """matcher patterns for this repository's narrowspec
1855 1855
1856 1856 A tuple of (includes, excludes).
1857 1857 """
1858 1858 # the narrow management should probably move into its own object
1859 1859 val = self._pending_narrow_pats
1860 1860 if val is None:
1861 1861 val = narrowspec.load(self)
1862 1862 return val
1863 1863
1864 1864 @storecache(narrowspec.FILENAME)
1865 1865 def _storenarrowmatch(self):
1866 1866 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1867 1867 return matchmod.always()
1868 1868 include, exclude = self.narrowpats
1869 1869 return narrowspec.match(self.root, include=include, exclude=exclude)
1870 1870
1871 1871 @storecache(narrowspec.FILENAME)
1872 1872 def _narrowmatch(self):
1873 1873 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1874 1874 return matchmod.always()
1875 1875 narrowspec.checkworkingcopynarrowspec(self)
1876 1876 include, exclude = self.narrowpats
1877 1877 return narrowspec.match(self.root, include=include, exclude=exclude)
1878 1878
1879 1879 def narrowmatch(self, match=None, includeexact=False):
1880 1880 """matcher corresponding the the repo's narrowspec
1881 1881
1882 1882 If `match` is given, then that will be intersected with the narrow
1883 1883 matcher.
1884 1884
1885 1885 If `includeexact` is True, then any exact matches from `match` will
1886 1886 be included even if they're outside the narrowspec.
1887 1887 """
1888 1888 if match:
1889 1889 if includeexact and not self._narrowmatch.always():
1890 1890 # do not exclude explicitly-specified paths so that they can
1891 1891 # be warned later on
1892 1892 em = matchmod.exact(match.files())
1893 1893 nm = matchmod.unionmatcher([self._narrowmatch, em])
1894 1894 return matchmod.intersectmatchers(match, nm)
1895 1895 return matchmod.intersectmatchers(match, self._narrowmatch)
1896 1896 return self._narrowmatch
1897 1897
1898 1898 def setnarrowpats(self, newincludes, newexcludes):
1899 1899 narrowspec.save(self, newincludes, newexcludes)
1900 1900 self.invalidate(clearfilecache=True)
1901 1901
1902 1902 @unfilteredpropertycache
1903 1903 def _quick_access_changeid_null(self):
1904 1904 return {
1905 1905 b'null': (nullrev, self.nodeconstants.nullid),
1906 1906 nullrev: (nullrev, self.nodeconstants.nullid),
1907 1907 self.nullid: (nullrev, self.nullid),
1908 1908 }
1909 1909
1910 1910 @unfilteredpropertycache
1911 1911 def _quick_access_changeid_wc(self):
1912 1912 # also fast path access to the working copy parents
1913 1913 # however, only do it for filter that ensure wc is visible.
1914 1914 quick = self._quick_access_changeid_null.copy()
1915 1915 cl = self.unfiltered().changelog
1916 1916 for node in self.dirstate.parents():
1917 1917 if node == self.nullid:
1918 1918 continue
1919 1919 rev = cl.index.get_rev(node)
1920 1920 if rev is None:
1921 1921 # unknown working copy parent case:
1922 1922 #
1923 1923 # skip the fast path and let higher code deal with it
1924 1924 continue
1925 1925 pair = (rev, node)
1926 1926 quick[rev] = pair
1927 1927 quick[node] = pair
1928 1928 # also add the parents of the parents
1929 1929 for r in cl.parentrevs(rev):
1930 1930 if r == nullrev:
1931 1931 continue
1932 1932 n = cl.node(r)
1933 1933 pair = (r, n)
1934 1934 quick[r] = pair
1935 1935 quick[n] = pair
1936 1936 p1node = self.dirstate.p1()
1937 1937 if p1node != self.nullid:
1938 1938 quick[b'.'] = quick[p1node]
1939 1939 return quick
1940 1940
1941 1941 @unfilteredmethod
1942 1942 def _quick_access_changeid_invalidate(self):
1943 1943 if '_quick_access_changeid_wc' in vars(self):
1944 1944 del self.__dict__['_quick_access_changeid_wc']
1945 1945
1946 1946 @property
1947 1947 def _quick_access_changeid(self):
1948 1948 """an helper dictionnary for __getitem__ calls
1949 1949
1950 1950 This contains a list of symbol we can recognise right away without
1951 1951 further processing.
1952 1952 """
1953 1953 if self.filtername in repoview.filter_has_wc:
1954 1954 return self._quick_access_changeid_wc
1955 1955 return self._quick_access_changeid_null
1956 1956
1957 1957 def __getitem__(self, changeid):
1958 1958 # dealing with special cases
1959 1959 if changeid is None:
1960 1960 return context.workingctx(self)
1961 1961 if isinstance(changeid, context.basectx):
1962 1962 return changeid
1963 1963
1964 1964 # dealing with multiple revisions
1965 1965 if isinstance(changeid, slice):
1966 1966 # wdirrev isn't contiguous so the slice shouldn't include it
1967 1967 return [
1968 1968 self[i]
1969 1969 for i in range(*changeid.indices(len(self)))
1970 1970 if i not in self.changelog.filteredrevs
1971 1971 ]
1972 1972
1973 1973 # dealing with some special values
1974 1974 quick_access = self._quick_access_changeid.get(changeid)
1975 1975 if quick_access is not None:
1976 1976 rev, node = quick_access
1977 1977 return context.changectx(self, rev, node, maybe_filtered=False)
1978 1978 if changeid == b'tip':
1979 1979 node = self.changelog.tip()
1980 1980 rev = self.changelog.rev(node)
1981 1981 return context.changectx(self, rev, node)
1982 1982
1983 1983 # dealing with arbitrary values
1984 1984 try:
1985 1985 if isinstance(changeid, int):
1986 1986 node = self.changelog.node(changeid)
1987 1987 rev = changeid
1988 1988 elif changeid == b'.':
1989 1989 # this is a hack to delay/avoid loading obsmarkers
1990 1990 # when we know that '.' won't be hidden
1991 1991 node = self.dirstate.p1()
1992 1992 rev = self.unfiltered().changelog.rev(node)
1993 1993 elif len(changeid) == self.nodeconstants.nodelen:
1994 1994 try:
1995 1995 node = changeid
1996 1996 rev = self.changelog.rev(changeid)
1997 1997 except error.FilteredLookupError:
1998 1998 changeid = hex(changeid) # for the error message
1999 1999 raise
2000 2000 except LookupError:
2001 2001 # check if it might have come from damaged dirstate
2002 2002 #
2003 2003 # XXX we could avoid the unfiltered if we had a recognizable
2004 2004 # exception for filtered changeset access
2005 2005 if (
2006 2006 self.local()
2007 2007 and changeid in self.unfiltered().dirstate.parents()
2008 2008 ):
2009 2009 msg = _(b"working directory has unknown parent '%s'!")
2010 2010 raise error.Abort(msg % short(changeid))
2011 2011 changeid = hex(changeid) # for the error message
2012 2012 raise
2013 2013
2014 2014 elif len(changeid) == 2 * self.nodeconstants.nodelen:
2015 2015 node = bin(changeid)
2016 2016 rev = self.changelog.rev(node)
2017 2017 else:
2018 2018 raise error.ProgrammingError(
2019 2019 b"unsupported changeid '%s' of type %s"
2020 2020 % (changeid, pycompat.bytestr(type(changeid)))
2021 2021 )
2022 2022
2023 2023 return context.changectx(self, rev, node)
2024 2024
2025 2025 except (error.FilteredIndexError, error.FilteredLookupError):
2026 2026 raise error.FilteredRepoLookupError(
2027 2027 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
2028 2028 )
2029 2029 except (IndexError, LookupError):
2030 2030 raise error.RepoLookupError(
2031 2031 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
2032 2032 )
2033 2033 except error.WdirUnsupported:
2034 2034 return context.workingctx(self)
2035 2035
2036 2036 def __contains__(self, changeid):
2037 2037 """True if the given changeid exists"""
2038 2038 try:
2039 2039 self[changeid]
2040 2040 return True
2041 2041 except error.RepoLookupError:
2042 2042 return False
2043 2043
2044 2044 def __nonzero__(self):
2045 2045 return True
2046 2046
2047 2047 __bool__ = __nonzero__
2048 2048
2049 2049 def __len__(self):
2050 2050 # no need to pay the cost of repoview.changelog
2051 2051 unfi = self.unfiltered()
2052 2052 return len(unfi.changelog)
2053 2053
2054 2054 def __iter__(self):
2055 2055 return iter(self.changelog)
2056 2056
2057 2057 def revs(self, expr: bytes, *args):
2058 2058 """Find revisions matching a revset.
2059 2059
2060 2060 The revset is specified as a string ``expr`` that may contain
2061 2061 %-formatting to escape certain types. See ``revsetlang.formatspec``.
2062 2062
2063 2063 Revset aliases from the configuration are not expanded. To expand
2064 2064 user aliases, consider calling ``scmutil.revrange()`` or
2065 2065 ``repo.anyrevs([expr], user=True)``.
2066 2066
2067 2067 Returns a smartset.abstractsmartset, which is a list-like interface
2068 2068 that contains integer revisions.
2069 2069 """
2070 2070 tree = revsetlang.spectree(expr, *args)
2071 2071 return revset.makematcher(tree)(self)
2072 2072
2073 2073 def set(self, expr: bytes, *args):
2074 2074 """Find revisions matching a revset and emit changectx instances.
2075 2075
2076 2076 This is a convenience wrapper around ``revs()`` that iterates the
2077 2077 result and is a generator of changectx instances.
2078 2078
2079 2079 Revset aliases from the configuration are not expanded. To expand
2080 2080 user aliases, consider calling ``scmutil.revrange()``.
2081 2081 """
2082 2082 for r in self.revs(expr, *args):
2083 2083 yield self[r]
2084 2084
2085 2085 def anyrevs(self, specs: bytes, user=False, localalias=None):
2086 2086 """Find revisions matching one of the given revsets.
2087 2087
2088 2088 Revset aliases from the configuration are not expanded by default. To
2089 2089 expand user aliases, specify ``user=True``. To provide some local
2090 2090 definitions overriding user aliases, set ``localalias`` to
2091 2091 ``{name: definitionstring}``.
2092 2092 """
2093 2093 if specs == [b'null']:
2094 2094 return revset.baseset([nullrev])
2095 2095 if specs == [b'.']:
2096 2096 quick_data = self._quick_access_changeid.get(b'.')
2097 2097 if quick_data is not None:
2098 2098 return revset.baseset([quick_data[0]])
2099 2099 if user:
2100 2100 m = revset.matchany(
2101 2101 self.ui,
2102 2102 specs,
2103 2103 lookup=revset.lookupfn(self),
2104 2104 localalias=localalias,
2105 2105 )
2106 2106 else:
2107 2107 m = revset.matchany(None, specs, localalias=localalias)
2108 2108 return m(self)
2109 2109
2110 2110 def url(self) -> bytes:
2111 2111 return b'file:' + self.root
2112 2112
2113 2113 def hook(self, name, throw=False, **args):
2114 2114 """Call a hook, passing this repo instance.
2115 2115
2116 2116 This a convenience method to aid invoking hooks. Extensions likely
2117 2117 won't call this unless they have registered a custom hook or are
2118 2118 replacing code that is expected to call a hook.
2119 2119 """
2120 2120 return hook.hook(self.ui, self, name, throw, **args)
2121 2121
2122 2122 @filteredpropertycache
2123 2123 def _tagscache(self):
2124 2124 """Returns a tagscache object that contains various tags related
2125 2125 caches."""
2126 2126
2127 2127 # This simplifies its cache management by having one decorated
2128 2128 # function (this one) and the rest simply fetch things from it.
2129 2129 class tagscache:
2130 2130 def __init__(self):
2131 2131 # These two define the set of tags for this repository. tags
2132 2132 # maps tag name to node; tagtypes maps tag name to 'global' or
2133 2133 # 'local'. (Global tags are defined by .hgtags across all
2134 2134 # heads, and local tags are defined in .hg/localtags.)
2135 2135 # They constitute the in-memory cache of tags.
2136 2136 self.tags = self.tagtypes = None
2137 2137
2138 2138 self.nodetagscache = self.tagslist = None
2139 2139
2140 2140 cache = tagscache()
2141 2141 cache.tags, cache.tagtypes = self._findtags()
2142 2142
2143 2143 return cache
2144 2144
2145 2145 def tags(self):
2146 2146 '''return a mapping of tag to node'''
2147 2147 t = {}
2148 2148 if self.changelog.filteredrevs:
2149 2149 tags, tt = self._findtags()
2150 2150 else:
2151 2151 tags = self._tagscache.tags
2152 2152 rev = self.changelog.rev
2153 2153 for k, v in tags.items():
2154 2154 try:
2155 2155 # ignore tags to unknown nodes
2156 2156 rev(v)
2157 2157 t[k] = v
2158 2158 except (error.LookupError, ValueError):
2159 2159 pass
2160 2160 return t
2161 2161
2162 2162 def _findtags(self):
2163 2163 """Do the hard work of finding tags. Return a pair of dicts
2164 2164 (tags, tagtypes) where tags maps tag name to node, and tagtypes
2165 2165 maps tag name to a string like \'global\' or \'local\'.
2166 2166 Subclasses or extensions are free to add their own tags, but
2167 2167 should be aware that the returned dicts will be retained for the
2168 2168 duration of the localrepo object."""
2169 2169
2170 2170 # XXX what tagtype should subclasses/extensions use? Currently
2171 2171 # mq and bookmarks add tags, but do not set the tagtype at all.
2172 2172 # Should each extension invent its own tag type? Should there
2173 2173 # be one tagtype for all such "virtual" tags? Or is the status
2174 2174 # quo fine?
2175 2175
2176 2176 # map tag name to (node, hist)
2177 2177 alltags = tagsmod.findglobaltags(self.ui, self)
2178 2178 # map tag name to tag type
2179 2179 tagtypes = {tag: b'global' for tag in alltags}
2180 2180
2181 2181 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
2182 2182
2183 2183 # Build the return dicts. Have to re-encode tag names because
2184 2184 # the tags module always uses UTF-8 (in order not to lose info
2185 2185 # writing to the cache), but the rest of Mercurial wants them in
2186 2186 # local encoding.
2187 2187 tags = {}
2188 2188 for name, (node, hist) in alltags.items():
2189 2189 if node != self.nullid:
2190 2190 tags[encoding.tolocal(name)] = node
2191 2191 tags[b'tip'] = self.changelog.tip()
2192 2192 tagtypes = {
2193 2193 encoding.tolocal(name): value for (name, value) in tagtypes.items()
2194 2194 }
2195 2195 return (tags, tagtypes)
2196 2196
2197 2197 def tagtype(self, tagname):
2198 2198 """
2199 2199 return the type of the given tag. result can be:
2200 2200
2201 2201 'local' : a local tag
2202 2202 'global' : a global tag
2203 2203 None : tag does not exist
2204 2204 """
2205 2205
2206 2206 return self._tagscache.tagtypes.get(tagname)
2207 2207
2208 2208 def tagslist(self):
2209 2209 '''return a list of tags ordered by revision'''
2210 2210 if not self._tagscache.tagslist:
2211 2211 l = []
2212 2212 for t, n in self.tags().items():
2213 2213 l.append((self.changelog.rev(n), t, n))
2214 2214 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
2215 2215
2216 2216 return self._tagscache.tagslist
2217 2217
2218 2218 def nodetags(self, node):
2219 2219 '''return the tags associated with a node'''
2220 2220 if not self._tagscache.nodetagscache:
2221 2221 nodetagscache = {}
2222 2222 for t, n in self._tagscache.tags.items():
2223 2223 nodetagscache.setdefault(n, []).append(t)
2224 2224 for tags in nodetagscache.values():
2225 2225 tags.sort()
2226 2226 self._tagscache.nodetagscache = nodetagscache
2227 2227 return self._tagscache.nodetagscache.get(node, [])
2228 2228
2229 2229 def nodebookmarks(self, node):
2230 2230 """return the list of bookmarks pointing to the specified node"""
2231 2231 return self._bookmarks.names(node)
2232 2232
2233 2233 def branchmap(self):
2234 2234 """returns a dictionary {branch: [branchheads]} with branchheads
2235 2235 ordered by increasing revision number"""
2236 2236 return self._branchcaches[self]
2237 2237
2238 2238 @unfilteredmethod
2239 2239 def revbranchcache(self):
2240 2240 if not self._revbranchcache:
2241 2241 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
2242 2242 return self._revbranchcache
2243 2243
2244 2244 def register_changeset(self, rev, changelogrevision):
2245 2245 self.revbranchcache().setdata(rev, changelogrevision)
2246 2246
2247 2247 def branchtip(self, branch, ignoremissing=False):
2248 2248 """return the tip node for a given branch
2249 2249
2250 2250 If ignoremissing is True, then this method will not raise an error.
2251 2251 This is helpful for callers that only expect None for a missing branch
2252 2252 (e.g. namespace).
2253 2253
2254 2254 """
2255 2255 try:
2256 2256 return self.branchmap().branchtip(branch)
2257 2257 except KeyError:
2258 2258 if not ignoremissing:
2259 2259 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
2260 2260 else:
2261 2261 pass
2262 2262
2263 2263 def lookup(self, key):
2264 2264 node = scmutil.revsymbol(self, key).node()
2265 2265 if node is None:
2266 2266 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
2267 2267 return node
2268 2268
2269 2269 def lookupbranch(self, key):
2270 2270 if self.branchmap().hasbranch(key):
2271 2271 return key
2272 2272
2273 2273 return scmutil.revsymbol(self, key).branch()
2274 2274
2275 2275 def known(self, nodes):
2276 2276 cl = self.changelog
2277 2277 get_rev = cl.index.get_rev
2278 2278 filtered = cl.filteredrevs
2279 2279 result = []
2280 2280 for n in nodes:
2281 2281 r = get_rev(n)
2282 2282 resp = not (r is None or r in filtered)
2283 2283 result.append(resp)
2284 2284 return result
2285 2285
2286 2286 def local(self):
2287 2287 return self
2288 2288
2289 2289 def publishing(self):
2290 2290 # it's safe (and desirable) to trust the publish flag unconditionally
2291 2291 # so that we don't finalize changes shared between users via ssh or nfs
2292 2292 return self.ui.configbool(b'phases', b'publish', untrusted=True)
2293 2293
2294 2294 def cancopy(self):
2295 2295 # so statichttprepo's override of local() works
2296 2296 if not self.local():
2297 2297 return False
2298 2298 if not self.publishing():
2299 2299 return True
2300 2300 # if publishing we can't copy if there is filtered content
2301 2301 return not self.filtered(b'visible').changelog.filteredrevs
2302 2302
2303 2303 def shared(self):
2304 2304 '''the type of shared repository (None if not shared)'''
2305 2305 if self.sharedpath != self.path:
2306 2306 return b'store'
2307 2307 return None
2308 2308
2309 2309 def wjoin(self, f: bytes, *insidef: bytes) -> bytes:
2310 2310 return self.vfs.reljoin(self.root, f, *insidef)
2311 2311
2312 2312 def setparents(self, p1, p2=None):
2313 2313 if p2 is None:
2314 2314 p2 = self.nullid
2315 2315 self[None].setparents(p1, p2)
2316 2316 self._quick_access_changeid_invalidate()
2317 2317
2318 2318 def filectx(self, path: bytes, changeid=None, fileid=None, changectx=None):
2319 2319 """changeid must be a changeset revision, if specified.
2320 2320 fileid can be a file revision or node."""
2321 2321 return context.filectx(
2322 2322 self, path, changeid, fileid, changectx=changectx
2323 2323 )
2324 2324
2325 2325 def getcwd(self) -> bytes:
2326 2326 return self.dirstate.getcwd()
2327 2327
2328 2328 def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
2329 2329 return self.dirstate.pathto(f, cwd)
2330 2330
2331 2331 def _loadfilter(self, filter):
2332 2332 if filter not in self._filterpats:
2333 2333 l = []
2334 2334 for pat, cmd in self.ui.configitems(filter):
2335 2335 if cmd == b'!':
2336 2336 continue
2337 2337 mf = matchmod.match(self.root, b'', [pat])
2338 2338 fn = None
2339 2339 params = cmd
2340 2340 for name, filterfn in self._datafilters.items():
2341 2341 if cmd.startswith(name):
2342 2342 fn = filterfn
2343 2343 params = cmd[len(name) :].lstrip()
2344 2344 break
2345 2345 if not fn:
2346 2346 fn = lambda s, c, **kwargs: procutil.filter(s, c)
2347 2347 fn.__name__ = 'commandfilter'
2348 2348 # Wrap old filters not supporting keyword arguments
2349 2349 if not pycompat.getargspec(fn)[2]:
2350 2350 oldfn = fn
2351 2351 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
2352 2352 fn.__name__ = 'compat-' + oldfn.__name__
2353 2353 l.append((mf, fn, params))
2354 2354 self._filterpats[filter] = l
2355 2355 return self._filterpats[filter]
2356 2356
2357 2357 def _filter(self, filterpats, filename, data):
2358 2358 for mf, fn, cmd in filterpats:
2359 2359 if mf(filename):
2360 2360 self.ui.debug(
2361 2361 b"filtering %s through %s\n"
2362 2362 % (filename, cmd or pycompat.sysbytes(fn.__name__))
2363 2363 )
2364 2364 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2365 2365 break
2366 2366
2367 2367 return data
2368 2368
2369 2369 @unfilteredpropertycache
2370 2370 def _encodefilterpats(self):
2371 2371 return self._loadfilter(b'encode')
2372 2372
2373 2373 @unfilteredpropertycache
2374 2374 def _decodefilterpats(self):
2375 2375 return self._loadfilter(b'decode')
2376 2376
2377 2377 def adddatafilter(self, name, filter):
2378 2378 self._datafilters[name] = filter
2379 2379
2380 2380 def wread(self, filename: bytes) -> bytes:
2381 2381 if self.wvfs.islink(filename):
2382 2382 data = self.wvfs.readlink(filename)
2383 2383 else:
2384 2384 data = self.wvfs.read(filename)
2385 2385 return self._filter(self._encodefilterpats, filename, data)
2386 2386
2387 2387 def wwrite(
2388 2388 self,
2389 2389 filename: bytes,
2390 2390 data: bytes,
2391 2391 flags: bytes,
2392 2392 backgroundclose=False,
2393 2393 **kwargs
2394 2394 ) -> int:
2395 2395 """write ``data`` into ``filename`` in the working directory
2396 2396
2397 2397 This returns length of written (maybe decoded) data.
2398 2398 """
2399 2399 data = self._filter(self._decodefilterpats, filename, data)
2400 2400 if b'l' in flags:
2401 2401 self.wvfs.symlink(data, filename)
2402 2402 else:
2403 2403 self.wvfs.write(
2404 2404 filename, data, backgroundclose=backgroundclose, **kwargs
2405 2405 )
2406 2406 if b'x' in flags:
2407 2407 self.wvfs.setflags(filename, False, True)
2408 2408 else:
2409 2409 self.wvfs.setflags(filename, False, False)
2410 2410 return len(data)
2411 2411
2412 2412 def wwritedata(self, filename: bytes, data: bytes) -> bytes:
2413 2413 return self._filter(self._decodefilterpats, filename, data)
2414 2414
2415 2415 def currenttransaction(self):
2416 2416 """return the current transaction or None if non exists"""
2417 2417 if self._transref:
2418 2418 tr = self._transref()
2419 2419 else:
2420 2420 tr = None
2421 2421
2422 2422 if tr and tr.running():
2423 2423 return tr
2424 2424 return None
2425 2425
2426 2426 def transaction(self, desc, report=None):
2427 2427 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2428 2428 b'devel', b'check-locks'
2429 2429 ):
2430 2430 if self._currentlock(self._lockref) is None:
2431 2431 raise error.ProgrammingError(b'transaction requires locking')
2432 2432 tr = self.currenttransaction()
2433 2433 if tr is not None:
2434 2434 return tr.nest(name=desc)
2435 2435
2436 2436 # abort here if the journal already exists
2437 2437 if self.svfs.exists(b"journal"):
2438 2438 raise error.RepoError(
2439 2439 _(b"abandoned transaction found"),
2440 2440 hint=_(b"run 'hg recover' to clean up transaction"),
2441 2441 )
2442 2442
2443 2443 # At that point your dirstate should be clean:
2444 2444 #
2445 2445 # - If you don't have the wlock, why would you still have a dirty
2446 2446 # dirstate ?
2447 2447 #
2448 2448 # - If you hold the wlock, you should not be opening a transaction in
2449 2449 # the middle of a `distate.changing_*` block. The transaction needs to
2450 2450 # be open before that and wrap the change-context.
2451 2451 #
2452 2452 # - If you are not within a `dirstate.changing_*` context, why is our
2453 2453 # dirstate dirty?
2454 2454 if self.dirstate._dirty:
2455 2455 m = "cannot open a transaction with a dirty dirstate"
2456 2456 raise error.ProgrammingError(m)
2457 2457
2458 2458 idbase = b"%.40f#%f" % (random.random(), time.time())
2459 2459 ha = hex(hashutil.sha1(idbase).digest())
2460 2460 txnid = b'TXN:' + ha
2461 2461 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2462 2462
2463 2463 self._writejournal(desc)
2464 2464 if report:
2465 2465 rp = report
2466 2466 else:
2467 2467 rp = self.ui.warn
2468 2468 vfsmap = self.vfs_map
2469 2469 # we must avoid cyclic reference between repo and transaction.
2470 2470 reporef = weakref.ref(self)
2471 2471 # Code to track tag movement
2472 2472 #
2473 2473 # Since tags are all handled as file content, it is actually quite hard
2474 2474 # to track these movement from a code perspective. So we fallback to a
2475 2475 # tracking at the repository level. One could envision to track changes
2476 2476 # to the '.hgtags' file through changegroup apply but that fails to
2477 2477 # cope with case where transaction expose new heads without changegroup
2478 2478 # being involved (eg: phase movement).
2479 2479 #
2480 2480 # For now, We gate the feature behind a flag since this likely comes
2481 2481 # with performance impacts. The current code run more often than needed
2482 2482 # and do not use caches as much as it could. The current focus is on
2483 2483 # the behavior of the feature so we disable it by default. The flag
2484 2484 # will be removed when we are happy with the performance impact.
2485 2485 #
2486 2486 # Once this feature is no longer experimental move the following
2487 2487 # documentation to the appropriate help section:
2488 2488 #
2489 2489 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2490 2490 # tags (new or changed or deleted tags). In addition the details of
2491 2491 # these changes are made available in a file at:
2492 2492 # ``REPOROOT/.hg/changes/tags.changes``.
2493 2493 # Make sure you check for HG_TAG_MOVED before reading that file as it
2494 2494 # might exist from a previous transaction even if no tag were touched
2495 2495 # in this one. Changes are recorded in a line base format::
2496 2496 #
2497 2497 # <action> <hex-node> <tag-name>\n
2498 2498 #
2499 2499 # Actions are defined as follow:
2500 2500 # "-R": tag is removed,
2501 2501 # "+A": tag is added,
2502 2502 # "-M": tag is moved (old value),
2503 2503 # "+M": tag is moved (new value),
2504 2504 tracktags = lambda x: None
2505 2505 # experimental config: experimental.hook-track-tags
2506 2506 shouldtracktags = self.ui.configbool(
2507 2507 b'experimental', b'hook-track-tags'
2508 2508 )
2509 2509 if desc != b'strip' and shouldtracktags:
2510 2510 oldheads = self.changelog.headrevs()
2511 2511
2512 2512 def tracktags(tr2):
2513 2513 repo = reporef()
2514 2514 assert repo is not None # help pytype
2515 2515 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2516 2516 newheads = repo.changelog.headrevs()
2517 2517 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2518 2518 # notes: we compare lists here.
2519 2519 # As we do it only once buiding set would not be cheaper
2520 2520 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2521 2521 if changes:
2522 2522 tr2.hookargs[b'tag_moved'] = b'1'
2523 2523 with repo.vfs(
2524 2524 b'changes/tags.changes', b'w', atomictemp=True
2525 2525 ) as changesfile:
2526 2526 # note: we do not register the file to the transaction
2527 2527 # because we needs it to still exist on the transaction
2528 2528 # is close (for txnclose hooks)
2529 2529 tagsmod.writediff(changesfile, changes)
2530 2530
2531 2531 def validate(tr2):
2532 2532 """will run pre-closing hooks"""
2533 2533 # XXX the transaction API is a bit lacking here so we take a hacky
2534 2534 # path for now
2535 2535 #
2536 2536 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2537 2537 # dict is copied before these run. In addition we needs the data
2538 2538 # available to in memory hooks too.
2539 2539 #
2540 2540 # Moreover, we also need to make sure this runs before txnclose
2541 2541 # hooks and there is no "pending" mechanism that would execute
2542 2542 # logic only if hooks are about to run.
2543 2543 #
2544 2544 # Fixing this limitation of the transaction is also needed to track
2545 2545 # other families of changes (bookmarks, phases, obsolescence).
2546 2546 #
2547 2547 # This will have to be fixed before we remove the experimental
2548 2548 # gating.
2549 2549 tracktags(tr2)
2550 2550 repo = reporef()
2551 2551 assert repo is not None # help pytype
2552 2552
2553 2553 singleheadopt = (b'experimental', b'single-head-per-branch')
2554 2554 singlehead = repo.ui.configbool(*singleheadopt)
2555 2555 if singlehead:
2556 2556 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2557 2557 accountclosed = singleheadsub.get(
2558 2558 b"account-closed-heads", False
2559 2559 )
2560 2560 if singleheadsub.get(b"public-changes-only", False):
2561 2561 filtername = b"immutable"
2562 2562 else:
2563 2563 filtername = b"visible"
2564 2564 scmutil.enforcesinglehead(
2565 2565 repo, tr2, desc, accountclosed, filtername
2566 2566 )
2567 2567 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2568 2568 for name, (old, new) in sorted(
2569 2569 tr.changes[b'bookmarks'].items()
2570 2570 ):
2571 2571 args = tr.hookargs.copy()
2572 2572 args.update(bookmarks.preparehookargs(name, old, new))
2573 2573 repo.hook(
2574 2574 b'pretxnclose-bookmark',
2575 2575 throw=True,
2576 2576 **pycompat.strkwargs(args)
2577 2577 )
2578 2578 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2579 2579 cl = repo.unfiltered().changelog
2580 2580 for revs, (old, new) in tr.changes[b'phases']:
2581 2581 for rev in revs:
2582 2582 args = tr.hookargs.copy()
2583 2583 node = hex(cl.node(rev))
2584 2584 args.update(phases.preparehookargs(node, old, new))
2585 2585 repo.hook(
2586 2586 b'pretxnclose-phase',
2587 2587 throw=True,
2588 2588 **pycompat.strkwargs(args)
2589 2589 )
2590 2590
2591 2591 repo.hook(
2592 2592 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2593 2593 )
2594 2594
2595 2595 def releasefn(tr, success):
2596 2596 repo = reporef()
2597 2597 if repo is None:
2598 2598 # If the repo has been GC'd (and this release function is being
2599 2599 # called from transaction.__del__), there's not much we can do,
2600 2600 # so just leave the unfinished transaction there and let the
2601 2601 # user run `hg recover`.
2602 2602 return
2603 2603 if success:
2604 2604 # this should be explicitly invoked here, because
2605 2605 # in-memory changes aren't written out at closing
2606 2606 # transaction, if tr.addfilegenerator (via
2607 2607 # dirstate.write or so) isn't invoked while
2608 2608 # transaction running
2609 2609 repo.dirstate.write(None)
2610 2610 else:
2611 2611 # discard all changes (including ones already written
2612 2612 # out) in this transaction
2613 2613 repo.invalidate(clearfilecache=True)
2614 2614
2615 2615 tr = transaction.transaction(
2616 2616 rp,
2617 2617 self.svfs,
2618 2618 vfsmap,
2619 2619 b"journal",
2620 2620 b"undo",
2621 2621 lambda: None,
2622 2622 self.store.createmode,
2623 2623 validator=validate,
2624 2624 releasefn=releasefn,
2625 2625 checkambigfiles=_cachedfiles,
2626 2626 name=desc,
2627 2627 )
2628 2628 for vfs_id, path in self._journalfiles():
2629 2629 tr.add_journal(vfs_id, path)
2630 2630 tr.changes[b'origrepolen'] = len(self)
2631 2631 tr.changes[b'obsmarkers'] = set()
2632 2632 tr.changes[b'phases'] = []
2633 2633 tr.changes[b'bookmarks'] = {}
2634 2634
2635 2635 tr.hookargs[b'txnid'] = txnid
2636 2636 tr.hookargs[b'txnname'] = desc
2637 2637 tr.hookargs[b'changes'] = tr.changes
2638 2638 # note: writing the fncache only during finalize mean that the file is
2639 2639 # outdated when running hooks. As fncache is used for streaming clone,
2640 2640 # this is not expected to break anything that happen during the hooks.
2641 2641 tr.addfinalize(b'flush-fncache', self.store.write)
2642 2642
2643 2643 def txnclosehook(tr2):
2644 2644 """To be run if transaction is successful, will schedule a hook run"""
2645 2645 # Don't reference tr2 in hook() so we don't hold a reference.
2646 2646 # This reduces memory consumption when there are multiple
2647 2647 # transactions per lock. This can likely go away if issue5045
2648 2648 # fixes the function accumulation.
2649 2649 hookargs = tr2.hookargs
2650 2650
2651 2651 def hookfunc(unused_success):
2652 2652 repo = reporef()
2653 2653 assert repo is not None # help pytype
2654 2654
2655 2655 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2656 2656 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2657 2657 for name, (old, new) in bmchanges:
2658 2658 args = tr.hookargs.copy()
2659 2659 args.update(bookmarks.preparehookargs(name, old, new))
2660 2660 repo.hook(
2661 2661 b'txnclose-bookmark',
2662 2662 throw=False,
2663 2663 **pycompat.strkwargs(args)
2664 2664 )
2665 2665
2666 2666 if hook.hashook(repo.ui, b'txnclose-phase'):
2667 2667 cl = repo.unfiltered().changelog
2668 2668 phasemv = sorted(
2669 2669 tr.changes[b'phases'], key=lambda r: r[0][0]
2670 2670 )
2671 2671 for revs, (old, new) in phasemv:
2672 2672 for rev in revs:
2673 2673 args = tr.hookargs.copy()
2674 2674 node = hex(cl.node(rev))
2675 2675 args.update(phases.preparehookargs(node, old, new))
2676 2676 repo.hook(
2677 2677 b'txnclose-phase',
2678 2678 throw=False,
2679 2679 **pycompat.strkwargs(args)
2680 2680 )
2681 2681
2682 2682 repo.hook(
2683 2683 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2684 2684 )
2685 2685
2686 2686 repo = reporef()
2687 2687 assert repo is not None # help pytype
2688 2688 repo._afterlock(hookfunc)
2689 2689
2690 2690 tr.addfinalize(b'txnclose-hook', txnclosehook)
2691 2691 # Include a leading "-" to make it happen before the transaction summary
2692 2692 # reports registered via scmutil.registersummarycallback() whose names
2693 2693 # are 00-txnreport etc. That way, the caches will be warm when the
2694 2694 # callbacks run.
2695 2695 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2696 2696
2697 2697 def txnaborthook(tr2):
2698 2698 """To be run if transaction is aborted"""
2699 2699 repo = reporef()
2700 2700 assert repo is not None # help pytype
2701 2701 repo.hook(
2702 2702 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2703 2703 )
2704 2704
2705 2705 tr.addabort(b'txnabort-hook', txnaborthook)
2706 2706 # avoid eager cache invalidation. in-memory data should be identical
2707 2707 # to stored data if transaction has no error.
2708 2708 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2709 2709 self._transref = weakref.ref(tr)
2710 2710 scmutil.registersummarycallback(self, tr, desc)
2711 2711 # This only exist to deal with the need of rollback to have viable
2712 2712 # parents at the end of the operation. So backup viable parents at the
2713 2713 # time of this operation.
2714 2714 #
2715 2715 # We only do it when the `wlock` is taken, otherwise other might be
2716 2716 # altering the dirstate under us.
2717 2717 #
2718 2718 # This is really not a great way to do this (first, because we cannot
2719 2719 # always do it). There are more viable alternative that exists
2720 2720 #
2721 2721 # - backing only the working copy parent in a dedicated files and doing
2722 2722 # a clean "keep-update" to them on `hg rollback`.
2723 2723 #
2724 2724 # - slightly changing the behavior an applying a logic similar to "hg
2725 2725 # strip" to pick a working copy destination on `hg rollback`
2726 2726 if self.currentwlock() is not None:
2727 2727 ds = self.dirstate
2728 2728 if not self.vfs.exists(b'branch'):
2729 2729 # force a file to be written if None exist
2730 2730 ds.setbranch(b'default', None)
2731 2731
2732 2732 def backup_dirstate(tr):
2733 2733 for f in ds.all_file_names():
2734 2734 # hardlink backup is okay because `dirstate` is always
2735 2735 # atomically written and possible data file are append only
2736 2736 # and resistant to trailing data.
2737 2737 tr.addbackup(f, hardlink=True, location=b'plain')
2738 2738
2739 2739 tr.addvalidator(b'dirstate-backup', backup_dirstate)
2740 2740 return tr
2741 2741
2742 2742 def _journalfiles(self):
2743 2743 return (
2744 2744 (self.svfs, b'journal'),
2745 2745 (self.vfs, b'journal.desc'),
2746 2746 )
2747 2747
2748 2748 def undofiles(self):
2749 2749 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2750 2750
2751 2751 @unfilteredmethod
2752 2752 def _writejournal(self, desc):
2753 2753 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2754 2754
2755 2755 def recover(self):
2756 2756 with self.lock():
2757 2757 if self.svfs.exists(b"journal"):
2758 2758 self.ui.status(_(b"rolling back interrupted transaction\n"))
2759 2759 vfsmap = self.vfs_map
2760 2760 transaction.rollback(
2761 2761 self.svfs,
2762 2762 vfsmap,
2763 2763 b"journal",
2764 2764 self.ui.warn,
2765 2765 checkambigfiles=_cachedfiles,
2766 2766 )
2767 2767 self.invalidate()
2768 2768 return True
2769 2769 else:
2770 2770 self.ui.warn(_(b"no interrupted transaction available\n"))
2771 2771 return False
2772 2772
2773 2773 def rollback(self, dryrun=False, force=False):
2774 2774 wlock = lock = None
2775 2775 try:
2776 2776 wlock = self.wlock()
2777 2777 lock = self.lock()
2778 2778 if self.svfs.exists(b"undo"):
2779 2779 return self._rollback(dryrun, force)
2780 2780 else:
2781 2781 self.ui.warn(_(b"no rollback information available\n"))
2782 2782 return 1
2783 2783 finally:
2784 2784 release(lock, wlock)
2785 2785
2786 2786 @unfilteredmethod # Until we get smarter cache management
2787 2787 def _rollback(self, dryrun, force):
2788 2788 ui = self.ui
2789 2789
2790 2790 parents = self.dirstate.parents()
2791 2791 try:
2792 2792 args = self.vfs.read(b'undo.desc').splitlines()
2793 2793 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2794 2794 if len(args) >= 3:
2795 2795 detail = args[2]
2796 2796 oldtip = oldlen - 1
2797 2797
2798 2798 if detail and ui.verbose:
2799 2799 msg = _(
2800 2800 b'repository tip rolled back to revision %d'
2801 2801 b' (undo %s: %s)\n'
2802 2802 ) % (oldtip, desc, detail)
2803 2803 else:
2804 2804 msg = _(
2805 2805 b'repository tip rolled back to revision %d (undo %s)\n'
2806 2806 ) % (oldtip, desc)
2807 2807 parentgone = any(self[p].rev() > oldtip for p in parents)
2808 2808 except IOError:
2809 2809 msg = _(b'rolling back unknown transaction\n')
2810 2810 desc = None
2811 2811 parentgone = True
2812 2812
2813 2813 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2814 2814 raise error.Abort(
2815 2815 _(
2816 2816 b'rollback of last commit while not checked out '
2817 2817 b'may lose data'
2818 2818 ),
2819 2819 hint=_(b'use -f to force'),
2820 2820 )
2821 2821
2822 2822 ui.status(msg)
2823 2823 if dryrun:
2824 2824 return 0
2825 2825
2826 2826 self.destroying()
2827 2827 vfsmap = self.vfs_map
2828 2828 skip_journal_pattern = None
2829 2829 if not parentgone:
2830 2830 skip_journal_pattern = RE_SKIP_DIRSTATE_ROLLBACK
2831 2831 transaction.rollback(
2832 2832 self.svfs,
2833 2833 vfsmap,
2834 2834 b'undo',
2835 2835 ui.warn,
2836 2836 checkambigfiles=_cachedfiles,
2837 2837 skip_journal_pattern=skip_journal_pattern,
2838 2838 )
2839 2839 self.invalidate()
2840 2840 self.dirstate.invalidate()
2841 2841
2842 2842 if parentgone:
2843 2843 # replace this with some explicit parent update in the future.
2844 2844 has_node = self.changelog.index.has_node
2845 2845 if not all(has_node(p) for p in self.dirstate._pl):
2846 2846 # There was no dirstate to backup initially, we need to drop
2847 2847 # the existing one.
2848 2848 with self.dirstate.changing_parents(self):
2849 2849 self.dirstate.setparents(self.nullid)
2850 2850 self.dirstate.clear()
2851 2851
2852 2852 parents = tuple([p.rev() for p in self[None].parents()])
2853 2853 if len(parents) > 1:
2854 2854 ui.status(
2855 2855 _(
2856 2856 b'working directory now based on '
2857 2857 b'revisions %d and %d\n'
2858 2858 )
2859 2859 % parents
2860 2860 )
2861 2861 else:
2862 2862 ui.status(
2863 2863 _(b'working directory now based on revision %d\n') % parents
2864 2864 )
2865 2865 mergestatemod.mergestate.clean(self)
2866 2866
2867 2867 # TODO: if we know which new heads may result from this rollback, pass
2868 2868 # them to destroy(), which will prevent the branchhead cache from being
2869 2869 # invalidated.
2870 2870 self.destroyed()
2871 2871 return 0
2872 2872
2873 2873 def _buildcacheupdater(self, newtransaction):
2874 2874 """called during transaction to build the callback updating cache
2875 2875
2876 2876 Lives on the repository to help extension who might want to augment
2877 2877 this logic. For this purpose, the created transaction is passed to the
2878 2878 method.
2879 2879 """
2880 2880 # we must avoid cyclic reference between repo and transaction.
2881 2881 reporef = weakref.ref(self)
2882 2882
2883 2883 def updater(tr):
2884 2884 repo = reporef()
2885 2885 assert repo is not None # help pytype
2886 2886 repo.updatecaches(tr)
2887 2887
2888 2888 return updater
2889 2889
2890 2890 @unfilteredmethod
2891 2891 def updatecaches(self, tr=None, full=False, caches=None):
2892 2892 """warm appropriate caches
2893 2893
2894 2894 If this function is called after a transaction closed. The transaction
2895 2895 will be available in the 'tr' argument. This can be used to selectively
2896 2896 update caches relevant to the changes in that transaction.
2897 2897
2898 2898 If 'full' is set, make sure all caches the function knows about have
2899 2899 up-to-date data. Even the ones usually loaded more lazily.
2900 2900
2901 2901 The `full` argument can take a special "post-clone" value. In this case
2902 2902 the cache warming is made after a clone and of the slower cache might
2903 2903 be skipped, namely the `.fnodetags` one. This argument is 5.8 specific
2904 2904 as we plan for a cleaner way to deal with this for 5.9.
2905 2905 """
2906 2906 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2907 2907 # During strip, many caches are invalid but
2908 2908 # later call to `destroyed` will refresh them.
2909 2909 return
2910 2910
2911 2911 unfi = self.unfiltered()
2912 2912
2913 2913 if full:
2914 2914 msg = (
2915 2915 "`full` argument for `repo.updatecaches` is deprecated\n"
2916 2916 "(use `caches=repository.CACHE_ALL` instead)"
2917 2917 )
2918 2918 self.ui.deprecwarn(msg, b"5.9")
2919 2919 caches = repository.CACHES_ALL
2920 2920 if full == b"post-clone":
2921 2921 caches = repository.CACHES_POST_CLONE
2922 2922 caches = repository.CACHES_ALL
2923 2923 elif caches is None:
2924 2924 caches = repository.CACHES_DEFAULT
2925 2925
2926 2926 if repository.CACHE_BRANCHMAP_SERVED in caches:
2927 2927 if tr is None or tr.changes[b'origrepolen'] < len(self):
2928 2928 # accessing the 'served' branchmap should refresh all the others,
2929 2929 self.ui.debug(b'updating the branch cache\n')
2930 2930 self.filtered(b'served').branchmap()
2931 2931 self.filtered(b'served.hidden').branchmap()
2932 2932 # flush all possibly delayed write.
2933 2933 self._branchcaches.write_delayed(self)
2934 2934
2935 2935 if repository.CACHE_CHANGELOG_CACHE in caches:
2936 2936 self.changelog.update_caches(transaction=tr)
2937 2937
2938 2938 if repository.CACHE_MANIFESTLOG_CACHE in caches:
2939 2939 self.manifestlog.update_caches(transaction=tr)
2940 2940 for entry in self.store.walk():
2941 2941 if not entry.is_revlog:
2942 2942 continue
2943 2943 if not entry.is_manifestlog:
2944 2944 continue
2945 2945 manifestrevlog = entry.get_revlog_instance(self).get_revlog()
2946 2946 if manifestrevlog is not None:
2947 2947 manifestrevlog.update_caches(transaction=tr)
2948 2948
2949 2949 if repository.CACHE_REV_BRANCH in caches:
2950 2950 rbc = unfi.revbranchcache()
2951 2951 for r in unfi.changelog:
2952 2952 rbc.branchinfo(r)
2953 2953 rbc.write()
2954 2954
2955 2955 if repository.CACHE_FULL_MANIFEST in caches:
2956 2956 # ensure the working copy parents are in the manifestfulltextcache
2957 2957 for ctx in self[b'.'].parents():
2958 2958 ctx.manifest() # accessing the manifest is enough
2959 2959
2960 2960 if repository.CACHE_FILE_NODE_TAGS in caches:
2961 2961 # accessing fnode cache warms the cache
2962 2962 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2963 2963
2964 2964 if repository.CACHE_TAGS_DEFAULT in caches:
2965 2965 # accessing tags warm the cache
2966 2966 self.tags()
2967 2967 if repository.CACHE_TAGS_SERVED in caches:
2968 2968 self.filtered(b'served').tags()
2969 2969
2970 2970 if repository.CACHE_BRANCHMAP_ALL in caches:
2971 2971 # The CACHE_BRANCHMAP_ALL updates lazily-loaded caches immediately,
2972 2972 # so we're forcing a write to cause these caches to be warmed up
2973 2973 # even if they haven't explicitly been requested yet (if they've
2974 2974 # never been used by hg, they won't ever have been written, even if
2975 2975 # they're a subset of another kind of cache that *has* been used).
2976 2976 for filt in repoview.filtertable.keys():
2977 2977 filtered = self.filtered(filt)
2978 2978 filtered.branchmap().write(filtered)
2979 2979
2980 2980 def invalidatecaches(self):
2981 2981 if '_tagscache' in vars(self):
2982 2982 # can't use delattr on proxy
2983 2983 del self.__dict__['_tagscache']
2984 2984
2985 2985 self._branchcaches.clear()
2986 2986 self.invalidatevolatilesets()
2987 2987 self._sparsesignaturecache.clear()
2988 2988
2989 2989 def invalidatevolatilesets(self):
2990 2990 self.filteredrevcache.clear()
2991 2991 obsolete.clearobscaches(self)
2992 2992 self._quick_access_changeid_invalidate()
2993 2993
2994 2994 def invalidatedirstate(self):
2995 2995 """Invalidates the dirstate, causing the next call to dirstate
2996 2996 to check if it was modified since the last time it was read,
2997 2997 rereading it if it has.
2998 2998
2999 2999 This is different to dirstate.invalidate() that it doesn't always
3000 3000 rereads the dirstate. Use dirstate.invalidate() if you want to
3001 3001 explicitly read the dirstate again (i.e. restoring it to a previous
3002 3002 known good state)."""
3003 3003 unfi = self.unfiltered()
3004 3004 if 'dirstate' in unfi.__dict__:
3005 3005 assert not self.dirstate.is_changing_any
3006 3006 del unfi.__dict__['dirstate']
3007 3007
3008 3008 def invalidate(self, clearfilecache=False):
3009 3009 """Invalidates both store and non-store parts other than dirstate
3010 3010
3011 3011 If a transaction is running, invalidation of store is omitted,
3012 3012 because discarding in-memory changes might cause inconsistency
3013 3013 (e.g. incomplete fncache causes unintentional failure, but
3014 3014 redundant one doesn't).
3015 3015 """
3016 3016 unfiltered = self.unfiltered() # all file caches are stored unfiltered
3017 3017 for k in list(self._filecache.keys()):
3018 3018 if (
3019 3019 k == b'changelog'
3020 3020 and self.currenttransaction()
3021 3021 and self.changelog._delayed
3022 3022 ):
3023 3023 # The changelog object may store unwritten revisions. We don't
3024 3024 # want to lose them.
3025 3025 # TODO: Solve the problem instead of working around it.
3026 3026 continue
3027 3027
3028 3028 if clearfilecache:
3029 3029 del self._filecache[k]
3030 3030 try:
3031 3031 # XXX ideally, the key would be a unicode string to match the
3032 3032 # fact it refers to an attribut name. However changing this was
3033 3033 # a bit a scope creep compared to the series cleaning up
3034 3034 # del/set/getattr so we kept thing simple here.
3035 3035 delattr(unfiltered, pycompat.sysstr(k))
3036 3036 except AttributeError:
3037 3037 pass
3038 3038 self.invalidatecaches()
3039 3039 if not self.currenttransaction():
3040 3040 # TODO: Changing contents of store outside transaction
3041 3041 # causes inconsistency. We should make in-memory store
3042 3042 # changes detectable, and abort if changed.
3043 3043 self.store.invalidatecaches()
3044 3044
3045 3045 def invalidateall(self):
3046 3046 """Fully invalidates both store and non-store parts, causing the
3047 3047 subsequent operation to reread any outside changes."""
3048 3048 # extension should hook this to invalidate its caches
3049 3049 self.invalidate()
3050 3050 self.invalidatedirstate()
3051 3051
3052 3052 @unfilteredmethod
3053 3053 def _refreshfilecachestats(self, tr):
3054 3054 """Reload stats of cached files so that they are flagged as valid"""
3055 3055 for k, ce in self._filecache.items():
3056 3056 k = pycompat.sysstr(k)
3057 3057 if k == 'dirstate' or k not in self.__dict__:
3058 3058 continue
3059 3059 ce.refresh()
3060 3060
3061 3061 def _lock(
3062 3062 self,
3063 3063 vfs,
3064 3064 lockname,
3065 3065 wait,
3066 3066 releasefn,
3067 3067 acquirefn,
3068 3068 desc,
3069 3069 ):
3070 3070 timeout = 0
3071 3071 warntimeout = 0
3072 3072 if wait:
3073 3073 timeout = self.ui.configint(b"ui", b"timeout")
3074 3074 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
3075 3075 # internal config: ui.signal-safe-lock
3076 3076 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
3077 3077
3078 3078 l = lockmod.trylock(
3079 3079 self.ui,
3080 3080 vfs,
3081 3081 lockname,
3082 3082 timeout,
3083 3083 warntimeout,
3084 3084 releasefn=releasefn,
3085 3085 acquirefn=acquirefn,
3086 3086 desc=desc,
3087 3087 signalsafe=signalsafe,
3088 3088 )
3089 3089 return l
3090 3090
3091 3091 def _afterlock(self, callback):
3092 3092 """add a callback to be run when the repository is fully unlocked
3093 3093
3094 3094 The callback will be executed when the outermost lock is released
3095 3095 (with wlock being higher level than 'lock')."""
3096 3096 for ref in (self._wlockref, self._lockref):
3097 3097 l = ref and ref()
3098 3098 if l and l.held:
3099 3099 l.postrelease.append(callback)
3100 3100 break
3101 3101 else: # no lock have been found.
3102 3102 callback(True)
3103 3103
3104 3104 def lock(self, wait=True):
3105 3105 """Lock the repository store (.hg/store) and return a weak reference
3106 3106 to the lock. Use this before modifying the store (e.g. committing or
3107 3107 stripping). If you are opening a transaction, get a lock as well.)
3108 3108
3109 3109 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3110 3110 'wlock' first to avoid a dead-lock hazard."""
3111 3111 l = self._currentlock(self._lockref)
3112 3112 if l is not None:
3113 3113 l.lock()
3114 3114 return l
3115 3115
3116 3116 l = self._lock(
3117 3117 vfs=self.svfs,
3118 3118 lockname=b"lock",
3119 3119 wait=wait,
3120 3120 releasefn=None,
3121 3121 acquirefn=self.invalidate,
3122 3122 desc=_(b'repository %s') % self.origroot,
3123 3123 )
3124 3124 self._lockref = weakref.ref(l)
3125 3125 return l
3126 3126
3127 3127 def wlock(self, wait=True):
3128 3128 """Lock the non-store parts of the repository (everything under
3129 3129 .hg except .hg/store) and return a weak reference to the lock.
3130 3130
3131 3131 Use this before modifying files in .hg.
3132 3132
3133 3133 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3134 3134 'wlock' first to avoid a dead-lock hazard."""
3135 3135 l = self._wlockref() if self._wlockref else None
3136 3136 if l is not None and l.held:
3137 3137 l.lock()
3138 3138 return l
3139 3139
3140 3140 # We do not need to check for non-waiting lock acquisition. Such
3141 3141 # acquisition would not cause dead-lock as they would just fail.
3142 3142 if wait and (
3143 3143 self.ui.configbool(b'devel', b'all-warnings')
3144 3144 or self.ui.configbool(b'devel', b'check-locks')
3145 3145 ):
3146 3146 if self._currentlock(self._lockref) is not None:
3147 3147 self.ui.develwarn(b'"wlock" acquired after "lock"')
3148 3148
3149 3149 def unlock():
3150 3150 if self.dirstate.is_changing_any:
3151 3151 msg = b"wlock release in the middle of a changing parents"
3152 3152 self.ui.develwarn(msg)
3153 3153 self.dirstate.invalidate()
3154 3154 else:
3155 3155 if self.dirstate._dirty:
3156 3156 msg = b"dirty dirstate on wlock release"
3157 3157 self.ui.develwarn(msg)
3158 3158 self.dirstate.write(None)
3159 3159
3160 3160 unfi = self.unfiltered()
3161 3161 if 'dirstate' in unfi.__dict__:
3162 3162 del unfi.__dict__['dirstate']
3163 3163
3164 3164 l = self._lock(
3165 3165 self.vfs,
3166 3166 b"wlock",
3167 3167 wait,
3168 3168 unlock,
3169 3169 self.invalidatedirstate,
3170 3170 _(b'working directory of %s') % self.origroot,
3171 3171 )
3172 3172 self._wlockref = weakref.ref(l)
3173 3173 return l
3174 3174
3175 3175 def _currentlock(self, lockref):
3176 3176 """Returns the lock if it's held, or None if it's not."""
3177 3177 if lockref is None:
3178 3178 return None
3179 3179 l = lockref()
3180 3180 if l is None or not l.held:
3181 3181 return None
3182 3182 return l
3183 3183
3184 3184 def currentwlock(self):
3185 3185 """Returns the wlock if it's held, or None if it's not."""
3186 3186 return self._currentlock(self._wlockref)
3187 3187
3188 3188 def currentlock(self):
3189 3189 """Returns the lock if it's held, or None if it's not."""
3190 3190 return self._currentlock(self._lockref)
3191 3191
3192 3192 def checkcommitpatterns(self, wctx, match, status, fail):
3193 3193 """check for commit arguments that aren't committable"""
3194 3194 if match.isexact() or match.prefix():
3195 3195 matched = set(status.modified + status.added + status.removed)
3196 3196
3197 3197 for f in match.files():
3198 3198 f = self.dirstate.normalize(f)
3199 3199 if f == b'.' or f in matched or f in wctx.substate:
3200 3200 continue
3201 3201 if f in status.deleted:
3202 3202 fail(f, _(b'file not found!'))
3203 3203 # Is it a directory that exists or used to exist?
3204 3204 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
3205 3205 d = f + b'/'
3206 3206 for mf in matched:
3207 3207 if mf.startswith(d):
3208 3208 break
3209 3209 else:
3210 3210 fail(f, _(b"no match under directory!"))
3211 3211 elif f not in self.dirstate:
3212 3212 fail(f, _(b"file not tracked!"))
3213 3213
3214 3214 @unfilteredmethod
3215 3215 def commit(
3216 3216 self,
3217 3217 text=b"",
3218 3218 user=None,
3219 3219 date=None,
3220 3220 match=None,
3221 3221 force=False,
3222 3222 editor=None,
3223 3223 extra=None,
3224 3224 ):
3225 3225 """Add a new revision to current repository.
3226 3226
3227 3227 Revision information is gathered from the working directory,
3228 3228 match can be used to filter the committed files. If editor is
3229 3229 supplied, it is called to get a commit message.
3230 3230 """
3231 3231 if extra is None:
3232 3232 extra = {}
3233 3233
3234 3234 def fail(f, msg):
3235 3235 raise error.InputError(b'%s: %s' % (f, msg))
3236 3236
3237 3237 if not match:
3238 3238 match = matchmod.always()
3239 3239
3240 3240 if not force:
3241 3241 match.bad = fail
3242 3242
3243 3243 # lock() for recent changelog (see issue4368)
3244 3244 with self.wlock(), self.lock():
3245 3245 wctx = self[None]
3246 3246 merge = len(wctx.parents()) > 1
3247 3247
3248 3248 if not force and merge and not match.always():
3249 3249 raise error.Abort(
3250 3250 _(
3251 3251 b'cannot partially commit a merge '
3252 3252 b'(do not specify files or patterns)'
3253 3253 )
3254 3254 )
3255 3255
3256 3256 status = self.status(match=match, clean=force)
3257 3257 if force:
3258 3258 status.modified.extend(
3259 3259 status.clean
3260 3260 ) # mq may commit clean files
3261 3261
3262 3262 # check subrepos
3263 3263 subs, commitsubs, newstate = subrepoutil.precommit(
3264 3264 self.ui, wctx, status, match, force=force
3265 3265 )
3266 3266
3267 3267 # make sure all explicit patterns are matched
3268 3268 if not force:
3269 3269 self.checkcommitpatterns(wctx, match, status, fail)
3270 3270
3271 3271 cctx = context.workingcommitctx(
3272 3272 self, status, text, user, date, extra
3273 3273 )
3274 3274
3275 3275 ms = mergestatemod.mergestate.read(self)
3276 3276 mergeutil.checkunresolved(ms)
3277 3277
3278 3278 # internal config: ui.allowemptycommit
3279 3279 if cctx.isempty() and not self.ui.configbool(
3280 3280 b'ui', b'allowemptycommit'
3281 3281 ):
3282 3282 self.ui.debug(b'nothing to commit, clearing merge state\n')
3283 3283 ms.reset()
3284 3284 return None
3285 3285
3286 3286 if merge and cctx.deleted():
3287 3287 raise error.Abort(_(b"cannot commit merge with missing files"))
3288 3288
3289 3289 if editor:
3290 3290 cctx._text = editor(self, cctx, subs)
3291 3291 edited = text != cctx._text
3292 3292
3293 3293 # Save commit message in case this transaction gets rolled back
3294 3294 # (e.g. by a pretxncommit hook). Leave the content alone on
3295 3295 # the assumption that the user will use the same editor again.
3296 3296 msg_path = self.savecommitmessage(cctx._text)
3297 3297
3298 3298 # commit subs and write new state
3299 3299 if subs:
3300 3300 uipathfn = scmutil.getuipathfn(self)
3301 3301 for s in sorted(commitsubs):
3302 3302 sub = wctx.sub(s)
3303 3303 self.ui.status(
3304 3304 _(b'committing subrepository %s\n')
3305 3305 % uipathfn(subrepoutil.subrelpath(sub))
3306 3306 )
3307 3307 sr = sub.commit(cctx._text, user, date)
3308 3308 newstate[s] = (newstate[s][0], sr)
3309 3309 subrepoutil.writestate(self, newstate)
3310 3310
3311 3311 p1, p2 = self.dirstate.parents()
3312 3312 hookp1, hookp2 = hex(p1), (p2 != self.nullid and hex(p2) or b'')
3313 3313 try:
3314 3314 self.hook(
3315 3315 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3316 3316 )
3317 3317 with self.transaction(b'commit'):
3318 3318 ret = self.commitctx(cctx, True)
3319 3319 # update bookmarks, dirstate and mergestate
3320 3320 bookmarks.update(self, [p1, p2], ret)
3321 3321 cctx.markcommitted(ret)
3322 3322 ms.reset()
3323 3323 except: # re-raises
3324 3324 if edited:
3325 3325 self.ui.write(
3326 3326 _(b'note: commit message saved in %s\n') % msg_path
3327 3327 )
3328 3328 self.ui.write(
3329 3329 _(
3330 3330 b"note: use 'hg commit --logfile "
3331 3331 b"%s --edit' to reuse it\n"
3332 3332 )
3333 3333 % msg_path
3334 3334 )
3335 3335 raise
3336 3336
3337 3337 def commithook(unused_success):
3338 3338 # hack for command that use a temporary commit (eg: histedit)
3339 3339 # temporary commit got stripped before hook release
3340 3340 if self.changelog.hasnode(ret):
3341 3341 self.hook(
3342 3342 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3343 3343 )
3344 3344
3345 3345 self._afterlock(commithook)
3346 3346 return ret
3347 3347
3348 3348 @unfilteredmethod
3349 3349 def commitctx(self, ctx, error=False, origctx=None):
3350 3350 return commit.commitctx(self, ctx, error=error, origctx=origctx)
3351 3351
3352 3352 @unfilteredmethod
3353 3353 def destroying(self):
3354 3354 """Inform the repository that nodes are about to be destroyed.
3355 3355 Intended for use by strip and rollback, so there's a common
3356 3356 place for anything that has to be done before destroying history.
3357 3357
3358 3358 This is mostly useful for saving state that is in memory and waiting
3359 3359 to be flushed when the current lock is released. Because a call to
3360 3360 destroyed is imminent, the repo will be invalidated causing those
3361 3361 changes to stay in memory (waiting for the next unlock), or vanish
3362 3362 completely.
3363 3363 """
3364 3364 # When using the same lock to commit and strip, the phasecache is left
3365 3365 # dirty after committing. Then when we strip, the repo is invalidated,
3366 3366 # causing those changes to disappear.
3367 3367 if '_phasecache' in vars(self):
3368 3368 self._phasecache.write()
3369 3369
3370 3370 @unfilteredmethod
3371 3371 def destroyed(self):
3372 3372 """Inform the repository that nodes have been destroyed.
3373 3373 Intended for use by strip and rollback, so there's a common
3374 3374 place for anything that has to be done after destroying history.
3375 3375 """
3376 3376 # When one tries to:
3377 3377 # 1) destroy nodes thus calling this method (e.g. strip)
3378 3378 # 2) use phasecache somewhere (e.g. commit)
3379 3379 #
3380 3380 # then 2) will fail because the phasecache contains nodes that were
3381 3381 # removed. We can either remove phasecache from the filecache,
3382 3382 # causing it to reload next time it is accessed, or simply filter
3383 3383 # the removed nodes now and write the updated cache.
3384 3384 self._phasecache.filterunknown(self)
3385 3385 self._phasecache.write()
3386 3386
3387 3387 # refresh all repository caches
3388 3388 self.updatecaches()
3389 3389
3390 3390 # Ensure the persistent tag cache is updated. Doing it now
3391 3391 # means that the tag cache only has to worry about destroyed
3392 3392 # heads immediately after a strip/rollback. That in turn
3393 3393 # guarantees that "cachetip == currenttip" (comparing both rev
3394 3394 # and node) always means no nodes have been added or destroyed.
3395 3395
3396 3396 # XXX this is suboptimal when qrefresh'ing: we strip the current
3397 3397 # head, refresh the tag cache, then immediately add a new head.
3398 3398 # But I think doing it this way is necessary for the "instant
3399 3399 # tag cache retrieval" case to work.
3400 3400 self.invalidate()
3401 3401
3402 3402 def status(
3403 3403 self,
3404 3404 node1=b'.',
3405 3405 node2=None,
3406 3406 match=None,
3407 3407 ignored=False,
3408 3408 clean=False,
3409 3409 unknown=False,
3410 3410 listsubrepos=False,
3411 3411 ):
3412 3412 '''a convenience method that calls node1.status(node2)'''
3413 3413 return self[node1].status(
3414 3414 node2, match, ignored, clean, unknown, listsubrepos
3415 3415 )
3416 3416
3417 3417 def addpostdsstatus(self, ps):
3418 3418 """Add a callback to run within the wlock, at the point at which status
3419 3419 fixups happen.
3420 3420
3421 3421 On status completion, callback(wctx, status) will be called with the
3422 3422 wlock held, unless the dirstate has changed from underneath or the wlock
3423 3423 couldn't be grabbed.
3424 3424
3425 3425 Callbacks should not capture and use a cached copy of the dirstate --
3426 3426 it might change in the meanwhile. Instead, they should access the
3427 3427 dirstate via wctx.repo().dirstate.
3428 3428
3429 3429 This list is emptied out after each status run -- extensions should
3430 3430 make sure it adds to this list each time dirstate.status is called.
3431 3431 Extensions should also make sure they don't call this for statuses
3432 3432 that don't involve the dirstate.
3433 3433 """
3434 3434
3435 3435 # The list is located here for uniqueness reasons -- it is actually
3436 3436 # managed by the workingctx, but that isn't unique per-repo.
3437 3437 self._postdsstatus.append(ps)
3438 3438
3439 3439 def postdsstatus(self):
3440 3440 """Used by workingctx to get the list of post-dirstate-status hooks."""
3441 3441 return self._postdsstatus
3442 3442
3443 3443 def clearpostdsstatus(self):
3444 3444 """Used by workingctx to clear post-dirstate-status hooks."""
3445 3445 del self._postdsstatus[:]
3446 3446
3447 3447 def heads(self, start=None):
3448 3448 if start is None:
3449 3449 cl = self.changelog
3450 3450 headrevs = reversed(cl.headrevs())
3451 3451 return [cl.node(rev) for rev in headrevs]
3452 3452
3453 3453 heads = self.changelog.heads(start)
3454 3454 # sort the output in rev descending order
3455 3455 return sorted(heads, key=self.changelog.rev, reverse=True)
3456 3456
3457 3457 def branchheads(self, branch=None, start=None, closed=False):
3458 3458 """return a (possibly filtered) list of heads for the given branch
3459 3459
3460 3460 Heads are returned in topological order, from newest to oldest.
3461 3461 If branch is None, use the dirstate branch.
3462 3462 If start is not None, return only heads reachable from start.
3463 3463 If closed is True, return heads that are marked as closed as well.
3464 3464 """
3465 3465 if branch is None:
3466 3466 branch = self[None].branch()
3467 3467 branches = self.branchmap()
3468 3468 if not branches.hasbranch(branch):
3469 3469 return []
3470 3470 # the cache returns heads ordered lowest to highest
3471 3471 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3472 3472 if start is not None:
3473 3473 # filter out the heads that cannot be reached from startrev
3474 3474 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3475 3475 bheads = [h for h in bheads if h in fbheads]
3476 3476 return bheads
3477 3477
3478 3478 def branches(self, nodes):
3479 3479 if not nodes:
3480 3480 nodes = [self.changelog.tip()]
3481 3481 b = []
3482 3482 for n in nodes:
3483 3483 t = n
3484 3484 while True:
3485 3485 p = self.changelog.parents(n)
3486 3486 if p[1] != self.nullid or p[0] == self.nullid:
3487 3487 b.append((t, n, p[0], p[1]))
3488 3488 break
3489 3489 n = p[0]
3490 3490 return b
3491 3491
3492 3492 def between(self, pairs):
3493 3493 r = []
3494 3494
3495 3495 for top, bottom in pairs:
3496 3496 n, l, i = top, [], 0
3497 3497 f = 1
3498 3498
3499 3499 while n != bottom and n != self.nullid:
3500 3500 p = self.changelog.parents(n)[0]
3501 3501 if i == f:
3502 3502 l.append(n)
3503 3503 f = f * 2
3504 3504 n = p
3505 3505 i += 1
3506 3506
3507 3507 r.append(l)
3508 3508
3509 3509 return r
3510 3510
3511 3511 def checkpush(self, pushop):
3512 3512 """Extensions can override this function if additional checks have
3513 3513 to be performed before pushing, or call it if they override push
3514 3514 command.
3515 3515 """
3516 3516
3517 3517 @unfilteredpropertycache
3518 3518 def prepushoutgoinghooks(self):
3519 3519 """Return util.hooks consists of a pushop with repo, remote, outgoing
3520 3520 methods, which are called before pushing changesets.
3521 3521 """
3522 3522 return util.hooks()
3523 3523
3524 3524 def pushkey(self, namespace, key, old, new):
3525 3525 try:
3526 3526 tr = self.currenttransaction()
3527 3527 hookargs = {}
3528 3528 if tr is not None:
3529 3529 hookargs.update(tr.hookargs)
3530 3530 hookargs = pycompat.strkwargs(hookargs)
3531 3531 hookargs['namespace'] = namespace
3532 3532 hookargs['key'] = key
3533 3533 hookargs['old'] = old
3534 3534 hookargs['new'] = new
3535 3535 self.hook(b'prepushkey', throw=True, **hookargs)
3536 3536 except error.HookAbort as exc:
3537 3537 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3538 3538 if exc.hint:
3539 3539 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3540 3540 return False
3541 3541 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3542 3542 ret = pushkey.push(self, namespace, key, old, new)
3543 3543
3544 3544 def runhook(unused_success):
3545 3545 self.hook(
3546 3546 b'pushkey',
3547 3547 namespace=namespace,
3548 3548 key=key,
3549 3549 old=old,
3550 3550 new=new,
3551 3551 ret=ret,
3552 3552 )
3553 3553
3554 3554 self._afterlock(runhook)
3555 3555 return ret
3556 3556
3557 3557 def listkeys(self, namespace):
3558 3558 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3559 3559 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3560 3560 values = pushkey.list(self, namespace)
3561 3561 self.hook(b'listkeys', namespace=namespace, values=values)
3562 3562 return values
3563 3563
3564 3564 def debugwireargs(self, one, two, three=None, four=None, five=None):
3565 3565 '''used to test argument passing over the wire'''
3566 3566 return b"%s %s %s %s %s" % (
3567 3567 one,
3568 3568 two,
3569 3569 pycompat.bytestr(three),
3570 3570 pycompat.bytestr(four),
3571 3571 pycompat.bytestr(five),
3572 3572 )
3573 3573
3574 3574 def savecommitmessage(self, text):
3575 3575 fp = self.vfs(b'last-message.txt', b'wb')
3576 3576 try:
3577 3577 fp.write(text)
3578 3578 finally:
3579 3579 fp.close()
3580 3580 return self.pathto(fp.name[len(self.root) + 1 :])
3581 3581
3582 3582 def register_wanted_sidedata(self, category):
3583 3583 if repository.REPO_FEATURE_SIDE_DATA not in self.features:
3584 3584 # Only revlogv2 repos can want sidedata.
3585 3585 return
3586 3586 self._wanted_sidedata.add(pycompat.bytestr(category))
3587 3587
3588 3588 def register_sidedata_computer(
3589 3589 self, kind, category, keys, computer, flags, replace=False
3590 3590 ):
3591 3591 if kind not in revlogconst.ALL_KINDS:
3592 3592 msg = _(b"unexpected revlog kind '%s'.")
3593 3593 raise error.ProgrammingError(msg % kind)
3594 3594 category = pycompat.bytestr(category)
3595 3595 already_registered = category in self._sidedata_computers.get(kind, [])
3596 3596 if already_registered and not replace:
3597 3597 msg = _(
3598 3598 b"cannot register a sidedata computer twice for category '%s'."
3599 3599 )
3600 3600 raise error.ProgrammingError(msg % category)
3601 3601 if replace and not already_registered:
3602 3602 msg = _(
3603 3603 b"cannot replace a sidedata computer that isn't registered "
3604 3604 b"for category '%s'."
3605 3605 )
3606 3606 raise error.ProgrammingError(msg % category)
3607 3607 self._sidedata_computers.setdefault(kind, {})
3608 3608 self._sidedata_computers[kind][category] = (keys, computer, flags)
3609 3609
3610 3610
3611 3611 def undoname(fn: bytes) -> bytes:
3612 3612 base, name = os.path.split(fn)
3613 3613 assert name.startswith(b'journal')
3614 3614 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3615 3615
3616 3616
3617 3617 def instance(ui, path: bytes, create, intents=None, createopts=None):
3618 3618 # prevent cyclic import localrepo -> upgrade -> localrepo
3619 3619 from . import upgrade
3620 3620
3621 3621 localpath = urlutil.urllocalpath(path)
3622 3622 if create:
3623 3623 createrepository(ui, localpath, createopts=createopts)
3624 3624
3625 3625 def repo_maker():
3626 3626 return makelocalrepository(ui, localpath, intents=intents)
3627 3627
3628 3628 repo = repo_maker()
3629 3629 repo = upgrade.may_auto_upgrade(repo, repo_maker)
3630 3630 return repo
3631 3631
3632 3632
3633 3633 def islocal(path: bytes) -> bool:
3634 3634 return True
3635 3635
3636 3636
3637 3637 def defaultcreateopts(ui, createopts=None):
3638 3638 """Populate the default creation options for a repository.
3639 3639
3640 3640 A dictionary of explicitly requested creation options can be passed
3641 3641 in. Missing keys will be populated.
3642 3642 """
3643 3643 createopts = dict(createopts or {})
3644 3644
3645 3645 if b'backend' not in createopts:
3646 3646 # experimental config: storage.new-repo-backend
3647 3647 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3648 3648
3649 3649 return createopts
3650 3650
3651 3651
3652 3652 def clone_requirements(ui, createopts, srcrepo):
3653 3653 """clone the requirements of a local repo for a local clone
3654 3654
3655 3655 The store requirements are unchanged while the working copy requirements
3656 3656 depends on the configuration
3657 3657 """
3658 3658 target_requirements = set()
3659 3659 if not srcrepo.requirements:
3660 3660 # this is a legacy revlog "v0" repository, we cannot do anything fancy
3661 3661 # with it.
3662 3662 return target_requirements
3663 3663 createopts = defaultcreateopts(ui, createopts=createopts)
3664 3664 for r in newreporequirements(ui, createopts):
3665 3665 if r in requirementsmod.WORKING_DIR_REQUIREMENTS:
3666 3666 target_requirements.add(r)
3667 3667
3668 3668 for r in srcrepo.requirements:
3669 3669 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS:
3670 3670 target_requirements.add(r)
3671 3671 return target_requirements
3672 3672
3673 3673
3674 3674 def newreporequirements(ui, createopts):
3675 3675 """Determine the set of requirements for a new local repository.
3676 3676
3677 3677 Extensions can wrap this function to specify custom requirements for
3678 3678 new repositories.
3679 3679 """
3680 3680
3681 3681 if b'backend' not in createopts:
3682 3682 raise error.ProgrammingError(
3683 3683 b'backend key not present in createopts; '
3684 3684 b'was defaultcreateopts() called?'
3685 3685 )
3686 3686
3687 3687 if createopts[b'backend'] != b'revlogv1':
3688 3688 raise error.Abort(
3689 3689 _(
3690 3690 b'unable to determine repository requirements for '
3691 3691 b'storage backend: %s'
3692 3692 )
3693 3693 % createopts[b'backend']
3694 3694 )
3695 3695
3696 3696 requirements = {requirementsmod.REVLOGV1_REQUIREMENT}
3697 3697 if ui.configbool(b'format', b'usestore'):
3698 3698 requirements.add(requirementsmod.STORE_REQUIREMENT)
3699 3699 if ui.configbool(b'format', b'usefncache'):
3700 3700 requirements.add(requirementsmod.FNCACHE_REQUIREMENT)
3701 3701 if ui.configbool(b'format', b'dotencode'):
3702 3702 requirements.add(requirementsmod.DOTENCODE_REQUIREMENT)
3703 3703
3704 3704 compengines = ui.configlist(b'format', b'revlog-compression')
3705 3705 for compengine in compengines:
3706 3706 if compengine in util.compengines:
3707 3707 engine = util.compengines[compengine]
3708 3708 if engine.available() and engine.revlogheader():
3709 3709 break
3710 3710 else:
3711 3711 raise error.Abort(
3712 3712 _(
3713 3713 b'compression engines %s defined by '
3714 3714 b'format.revlog-compression not available'
3715 3715 )
3716 3716 % b', '.join(b'"%s"' % e for e in compengines),
3717 3717 hint=_(
3718 3718 b'run "hg debuginstall" to list available '
3719 3719 b'compression engines'
3720 3720 ),
3721 3721 )
3722 3722
3723 3723 # zlib is the historical default and doesn't need an explicit requirement.
3724 3724 if compengine == b'zstd':
3725 3725 requirements.add(b'revlog-compression-zstd')
3726 3726 elif compengine != b'zlib':
3727 3727 requirements.add(b'exp-compression-%s' % compengine)
3728 3728
3729 3729 if scmutil.gdinitconfig(ui):
3730 3730 requirements.add(requirementsmod.GENERALDELTA_REQUIREMENT)
3731 3731 if ui.configbool(b'format', b'sparse-revlog'):
3732 3732 requirements.add(requirementsmod.SPARSEREVLOG_REQUIREMENT)
3733 3733
3734 3734 # experimental config: format.use-dirstate-v2
3735 3735 # Keep this logic in sync with `has_dirstate_v2()` in `tests/hghave.py`
3736 3736 if ui.configbool(b'format', b'use-dirstate-v2'):
3737 3737 requirements.add(requirementsmod.DIRSTATE_V2_REQUIREMENT)
3738 3738
3739 3739 # experimental config: format.exp-use-copies-side-data-changeset
3740 3740 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3741 3741 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3742 3742 requirements.add(requirementsmod.COPIESSDC_REQUIREMENT)
3743 3743 if ui.configbool(b'experimental', b'treemanifest'):
3744 3744 requirements.add(requirementsmod.TREEMANIFEST_REQUIREMENT)
3745 3745
3746 3746 changelogv2 = ui.config(b'format', b'exp-use-changelog-v2')
3747 3747 if changelogv2 == b'enable-unstable-format-and-corrupt-my-data':
3748 3748 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3749 3749
3750 3750 revlogv2 = ui.config(b'experimental', b'revlogv2')
3751 3751 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3752 3752 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3753 3753 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3754 3754 # experimental config: format.internal-phase
3755 3755 if ui.configbool(b'format', b'use-internal-phase'):
3756 3756 requirements.add(requirementsmod.INTERNAL_PHASE_REQUIREMENT)
3757 3757
3758 3758 # experimental config: format.exp-archived-phase
3759 3759 if ui.configbool(b'format', b'exp-archived-phase'):
3760 3760 requirements.add(requirementsmod.ARCHIVED_PHASE_REQUIREMENT)
3761 3761
3762 3762 if createopts.get(b'narrowfiles'):
3763 3763 requirements.add(requirementsmod.NARROW_REQUIREMENT)
3764 3764
3765 3765 if createopts.get(b'lfs'):
3766 3766 requirements.add(b'lfs')
3767 3767
3768 3768 if ui.configbool(b'format', b'bookmarks-in-store'):
3769 3769 requirements.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3770 3770
3771 3771 # The feature is disabled unless a fast implementation is available.
3772 3772 persistent_nodemap_default = policy.importrust('revlog') is not None
3773 3773 if ui.configbool(
3774 3774 b'format', b'use-persistent-nodemap', persistent_nodemap_default
3775 3775 ):
3776 3776 requirements.add(requirementsmod.NODEMAP_REQUIREMENT)
3777 3777
3778 3778 # if share-safe is enabled, let's create the new repository with the new
3779 3779 # requirement
3780 3780 if ui.configbool(b'format', b'use-share-safe'):
3781 3781 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
3782 3782
3783 3783 # if we are creating a share-repoΒΉ we have to handle requirement
3784 3784 # differently.
3785 3785 #
3786 3786 # [1] (i.e. reusing the store from another repository, just having a
3787 3787 # working copy)
3788 3788 if b'sharedrepo' in createopts:
3789 3789 source_requirements = set(createopts[b'sharedrepo'].requirements)
3790 3790
3791 3791 if requirementsmod.SHARESAFE_REQUIREMENT not in source_requirements:
3792 3792 # share to an old school repository, we have to copy the
3793 3793 # requirements and hope for the best.
3794 3794 requirements = source_requirements
3795 3795 else:
3796 3796 # We have control on the working copy only, so "copy" the non
3797 3797 # working copy part over, ignoring previous logic.
3798 3798 to_drop = set()
3799 3799 for req in requirements:
3800 3800 if req in requirementsmod.WORKING_DIR_REQUIREMENTS:
3801 3801 continue
3802 3802 if req in source_requirements:
3803 3803 continue
3804 3804 to_drop.add(req)
3805 3805 requirements -= to_drop
3806 3806 requirements |= source_requirements
3807 3807
3808 3808 if createopts.get(b'sharedrelative'):
3809 3809 requirements.add(requirementsmod.RELATIVE_SHARED_REQUIREMENT)
3810 3810 else:
3811 3811 requirements.add(requirementsmod.SHARED_REQUIREMENT)
3812 3812
3813 3813 if ui.configbool(b'format', b'use-dirstate-tracked-hint'):
3814 3814 version = ui.configint(b'format', b'use-dirstate-tracked-hint.version')
3815 3815 msg = _(b"ignoring unknown tracked key version: %d\n")
3816 3816 hint = _(
3817 3817 b"see `hg help config.format.use-dirstate-tracked-hint-version"
3818 3818 )
3819 3819 if version != 1:
3820 3820 ui.warn(msg % version, hint=hint)
3821 3821 else:
3822 3822 requirements.add(requirementsmod.DIRSTATE_TRACKED_HINT_V1)
3823 3823
3824 3824 return requirements
3825 3825
3826 3826
3827 3827 def checkrequirementscompat(ui, requirements):
3828 3828 """Checks compatibility of repository requirements enabled and disabled.
3829 3829
3830 3830 Returns a set of requirements which needs to be dropped because dependend
3831 3831 requirements are not enabled. Also warns users about it"""
3832 3832
3833 3833 dropped = set()
3834 3834
3835 3835 if requirementsmod.STORE_REQUIREMENT not in requirements:
3836 3836 if requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT in requirements:
3837 3837 ui.warn(
3838 3838 _(
3839 3839 b'ignoring enabled \'format.bookmarks-in-store\' config '
3840 3840 b'beacuse it is incompatible with disabled '
3841 3841 b'\'format.usestore\' config\n'
3842 3842 )
3843 3843 )
3844 3844 dropped.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3845 3845
3846 3846 if (
3847 3847 requirementsmod.SHARED_REQUIREMENT in requirements
3848 3848 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
3849 3849 ):
3850 3850 raise error.Abort(
3851 3851 _(
3852 3852 b"cannot create shared repository as source was created"
3853 3853 b" with 'format.usestore' config disabled"
3854 3854 )
3855 3855 )
3856 3856
3857 3857 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
3858 3858 if ui.hasconfig(b'format', b'use-share-safe'):
3859 3859 msg = _(
3860 3860 b"ignoring enabled 'format.use-share-safe' config because "
3861 3861 b"it is incompatible with disabled 'format.usestore'"
3862 3862 b" config\n"
3863 3863 )
3864 3864 ui.warn(msg)
3865 3865 dropped.add(requirementsmod.SHARESAFE_REQUIREMENT)
3866 3866
3867 3867 return dropped
3868 3868
3869 3869
3870 3870 def filterknowncreateopts(ui, createopts):
3871 3871 """Filters a dict of repo creation options against options that are known.
3872 3872
3873 3873 Receives a dict of repo creation options and returns a dict of those
3874 3874 options that we don't know how to handle.
3875 3875
3876 3876 This function is called as part of repository creation. If the
3877 3877 returned dict contains any items, repository creation will not
3878 3878 be allowed, as it means there was a request to create a repository
3879 3879 with options not recognized by loaded code.
3880 3880
3881 3881 Extensions can wrap this function to filter out creation options
3882 3882 they know how to handle.
3883 3883 """
3884 3884 known = {
3885 3885 b'backend',
3886 3886 b'lfs',
3887 3887 b'narrowfiles',
3888 3888 b'sharedrepo',
3889 3889 b'sharedrelative',
3890 3890 b'shareditems',
3891 3891 b'shallowfilestore',
3892 3892 }
3893 3893
3894 3894 return {k: v for k, v in createopts.items() if k not in known}
3895 3895
3896 3896
3897 3897 def createrepository(ui, path: bytes, createopts=None, requirements=None):
3898 3898 """Create a new repository in a vfs.
3899 3899
3900 3900 ``path`` path to the new repo's working directory.
3901 3901 ``createopts`` options for the new repository.
3902 3902 ``requirement`` predefined set of requirements.
3903 3903 (incompatible with ``createopts``)
3904 3904
3905 3905 The following keys for ``createopts`` are recognized:
3906 3906
3907 3907 backend
3908 3908 The storage backend to use.
3909 3909 lfs
3910 3910 Repository will be created with ``lfs`` requirement. The lfs extension
3911 3911 will automatically be loaded when the repository is accessed.
3912 3912 narrowfiles
3913 3913 Set up repository to support narrow file storage.
3914 3914 sharedrepo
3915 3915 Repository object from which storage should be shared.
3916 3916 sharedrelative
3917 3917 Boolean indicating if the path to the shared repo should be
3918 3918 stored as relative. By default, the pointer to the "parent" repo
3919 3919 is stored as an absolute path.
3920 3920 shareditems
3921 3921 Set of items to share to the new repository (in addition to storage).
3922 3922 shallowfilestore
3923 3923 Indicates that storage for files should be shallow (not all ancestor
3924 3924 revisions are known).
3925 3925 """
3926 3926
3927 3927 if requirements is not None:
3928 3928 if createopts is not None:
3929 3929 msg = b'cannot specify both createopts and requirements'
3930 3930 raise error.ProgrammingError(msg)
3931 3931 createopts = {}
3932 3932 else:
3933 3933 createopts = defaultcreateopts(ui, createopts=createopts)
3934 3934
3935 3935 unknownopts = filterknowncreateopts(ui, createopts)
3936 3936
3937 3937 if not isinstance(unknownopts, dict):
3938 3938 raise error.ProgrammingError(
3939 3939 b'filterknowncreateopts() did not return a dict'
3940 3940 )
3941 3941
3942 3942 if unknownopts:
3943 3943 raise error.Abort(
3944 3944 _(
3945 3945 b'unable to create repository because of unknown '
3946 3946 b'creation option: %s'
3947 3947 )
3948 3948 % b', '.join(sorted(unknownopts)),
3949 3949 hint=_(b'is a required extension not loaded?'),
3950 3950 )
3951 3951
3952 3952 requirements = newreporequirements(ui, createopts=createopts)
3953 3953 requirements -= checkrequirementscompat(ui, requirements)
3954 3954
3955 3955 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3956 3956
3957 3957 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3958 3958 if hgvfs.exists():
3959 3959 raise error.RepoError(_(b'repository %s already exists') % path)
3960 3960
3961 3961 if b'sharedrepo' in createopts:
3962 3962 sharedpath = createopts[b'sharedrepo'].sharedpath
3963 3963
3964 3964 if createopts.get(b'sharedrelative'):
3965 3965 try:
3966 3966 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3967 3967 sharedpath = util.pconvert(sharedpath)
3968 3968 except (IOError, ValueError) as e:
3969 3969 # ValueError is raised on Windows if the drive letters differ
3970 3970 # on each path.
3971 3971 raise error.Abort(
3972 3972 _(b'cannot calculate relative path'),
3973 3973 hint=stringutil.forcebytestr(e),
3974 3974 )
3975 3975
3976 3976 if not wdirvfs.exists():
3977 3977 wdirvfs.makedirs()
3978 3978
3979 3979 hgvfs.makedir(notindexed=True)
3980 3980 if b'sharedrepo' not in createopts:
3981 3981 hgvfs.mkdir(b'cache')
3982 3982 hgvfs.mkdir(b'wcache')
3983 3983
3984 3984 has_store = requirementsmod.STORE_REQUIREMENT in requirements
3985 3985 if has_store and b'sharedrepo' not in createopts:
3986 3986 hgvfs.mkdir(b'store')
3987 3987
3988 3988 # We create an invalid changelog outside the store so very old
3989 3989 # Mercurial versions (which didn't know about the requirements
3990 3990 # file) encounter an error on reading the changelog. This
3991 3991 # effectively locks out old clients and prevents them from
3992 3992 # mucking with a repo in an unknown format.
3993 3993 #
3994 3994 # The revlog header has version 65535, which won't be recognized by
3995 3995 # such old clients.
3996 3996 hgvfs.append(
3997 3997 b'00changelog.i',
3998 3998 b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
3999 3999 b'layout',
4000 4000 )
4001 4001
4002 4002 # Filter the requirements into working copy and store ones
4003 4003 wcreq, storereq = scmutil.filterrequirements(requirements)
4004 4004 # write working copy ones
4005 4005 scmutil.writerequires(hgvfs, wcreq)
4006 4006 # If there are store requirements and the current repository
4007 4007 # is not a shared one, write stored requirements
4008 4008 # For new shared repository, we don't need to write the store
4009 4009 # requirements as they are already present in store requires
4010 4010 if storereq and b'sharedrepo' not in createopts:
4011 4011 storevfs = vfsmod.vfs(hgvfs.join(b'store'), cacheaudited=True)
4012 4012 scmutil.writerequires(storevfs, storereq)
4013 4013
4014 4014 # Write out file telling readers where to find the shared store.
4015 4015 if b'sharedrepo' in createopts:
4016 4016 hgvfs.write(b'sharedpath', sharedpath)
4017 4017
4018 4018 if createopts.get(b'shareditems'):
4019 4019 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
4020 4020 hgvfs.write(b'shared', shared)
4021 4021
4022 4022
4023 4023 def poisonrepository(repo):
4024 4024 """Poison a repository instance so it can no longer be used."""
4025 4025 # Perform any cleanup on the instance.
4026 4026 repo.close()
4027 4027
4028 4028 # Our strategy is to replace the type of the object with one that
4029 4029 # has all attribute lookups result in error.
4030 4030 #
4031 4031 # But we have to allow the close() method because some constructors
4032 4032 # of repos call close() on repo references.
4033 4033 class poisonedrepository:
4034 4034 def __getattribute__(self, item):
4035 4035 if item == 'close':
4036 4036 return object.__getattribute__(self, item)
4037 4037
4038 4038 raise error.ProgrammingError(
4039 4039 b'repo instances should not be used after unshare'
4040 4040 )
4041 4041
4042 4042 def close(self):
4043 4043 pass
4044 4044
4045 4045 # We may have a repoview, which intercepts __setattr__. So be sure
4046 4046 # we operate at the lowest level possible.
4047 4047 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,3747 +1,3745 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 return self.delta_config.general_delta
496 496
497 497 @property
498 498 def _checkambig(self):
499 499 """temporary compatibility proxy"""
500 500 return self.data_config.check_ambig
501 501
502 502 @property
503 503 def _mmaplargeindex(self):
504 504 """temporary compatibility proxy"""
505 505 return self.data_config.mmap_large_index
506 506
507 507 @property
508 508 def _censorable(self):
509 509 """temporary compatibility proxy"""
510 510 return self.feature_config.censorable
511 511
512 512 @property
513 513 def _chunkcachesize(self):
514 514 """temporary compatibility proxy"""
515 515 return self.data_config.chunk_cache_size
516 516
517 517 @property
518 518 def _maxchainlen(self):
519 519 """temporary compatibility proxy"""
520 520 return self.delta_config.max_chain_len
521 521
522 522 @property
523 523 def _deltabothparents(self):
524 524 """temporary compatibility proxy"""
525 525 return self.delta_config.delta_both_parents
526 526
527 527 @property
528 528 def _candidate_group_chunk_size(self):
529 529 """temporary compatibility proxy"""
530 530 return self.delta_config.candidate_group_chunk_size
531 531
532 532 @property
533 533 def _debug_delta(self):
534 534 """temporary compatibility proxy"""
535 535 return self.delta_config.debug_delta
536 536
537 537 @property
538 538 def _compengine(self):
539 539 """temporary compatibility proxy"""
540 540 return self.feature_config.compression_engine
541 541
542 542 @property
543 543 def _compengineopts(self):
544 544 """temporary compatibility proxy"""
545 545 return self.feature_config.compression_engine_options
546 546
547 547 @property
548 548 def _maxdeltachainspan(self):
549 549 """temporary compatibility proxy"""
550 550 return self.delta_config.max_deltachain_span
551 551
552 552 @property
553 553 def _withsparseread(self):
554 554 """temporary compatibility proxy"""
555 555 return self.data_config.with_sparse_read
556 556
557 557 @property
558 558 def _sparserevlog(self):
559 559 """temporary compatibility proxy"""
560 560 return self.delta_config.sparse_revlog
561 561
562 562 @property
563 563 def hassidedata(self):
564 564 """temporary compatibility proxy"""
565 565 return self.feature_config.has_side_data
566 566
567 567 @property
568 568 def _srdensitythreshold(self):
569 569 """temporary compatibility proxy"""
570 570 return self.data_config.sr_density_threshold
571 571
572 572 @property
573 573 def _srmingapsize(self):
574 574 """temporary compatibility proxy"""
575 575 return self.data_config.sr_min_gap_size
576 576
577 577 @property
578 578 def _compute_rank(self):
579 579 """temporary compatibility proxy"""
580 580 return self.feature_config.compute_rank
581 581
582 582 @property
583 583 def canonical_parent_order(self):
584 584 """temporary compatibility proxy"""
585 585 return self.feature_config.canonical_parent_order
586 586
587 587 @property
588 588 def _lazydelta(self):
589 589 """temporary compatibility proxy"""
590 590 return self.delta_config.lazy_delta
591 591
592 592 @property
593 593 def _lazydeltabase(self):
594 594 """temporary compatibility proxy"""
595 595 return self.delta_config.lazy_delta_base
596 596
597 597 def _init_opts(self):
598 598 """process options (from above/config) to setup associated default revlog mode
599 599
600 600 These values might be affected when actually reading on disk information.
601 601
602 602 The relevant values are returned for use in _loadindex().
603 603
604 604 * newversionflags:
605 605 version header to use if we need to create a new revlog
606 606
607 607 * mmapindexthreshold:
608 608 minimal index size for start to use mmap
609 609
610 610 * force_nodemap:
611 611 force the usage of a "development" version of the nodemap code
612 612 """
613 613 mmapindexthreshold = None
614 614 opts = self.opener.options
615 615
616 616 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
617 617 new_header = CHANGELOGV2
618 618 compute_rank = opts.get(b'changelogv2.compute-rank', True)
619 619 self.feature_config.compute_rank = compute_rank
620 620 elif b'revlogv2' in opts:
621 621 new_header = REVLOGV2
622 622 elif b'revlogv1' in opts:
623 623 new_header = REVLOGV1 | FLAG_INLINE_DATA
624 624 if b'generaldelta' in opts:
625 625 new_header |= FLAG_GENERALDELTA
626 626 elif b'revlogv0' in self.opener.options:
627 627 new_header = REVLOGV0
628 628 else:
629 629 new_header = REVLOG_DEFAULT_VERSION
630 630
631 if b'chunkcachesize' in opts:
632 self.data_config.chunk_cache_size = opts[b'chunkcachesize']
633 631 if b'maxchainlen' in opts:
634 632 self.delta_config.max_chain_len = opts[b'maxchainlen']
635 633 if b'deltabothparents' in opts:
636 634 self.delta_config.delta_both_parents = opts[b'deltabothparents']
637 635 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
638 636 if dps_cgds:
639 637 self.delta_config.candidate_group_chunk_size = dps_cgds
640 638 if b'lazydelta' in opts:
641 639 self.delta_config.lazy_delta = bool(opts[b'lazydelta'])
642 640 if self._lazydelta and b'lazydeltabase' in opts:
643 641 self.delta_config.lazy_delta_base = opts[b'lazydeltabase']
644 642 if b'debug-delta' in opts:
645 643 self.delta_config.debug_delta = opts[b'debug-delta']
646 644 if b'compengine' in opts:
647 645 self.feature_config.compression_engine = opts[b'compengine']
648 646 comp_engine_opts = self.feature_config.compression_engine_options
649 647 if b'zlib.level' in opts:
650 648 comp_engine_opts[b'zlib.level'] = opts[b'zlib.level']
651 649 if b'zstd.level' in opts:
652 650 comp_engine_opts[b'zstd.level'] = opts[b'zstd.level']
653 651 if b'maxdeltachainspan' in opts:
654 652 self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan']
655 653 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
656 654 mmapindexthreshold = opts[b'mmapindexthreshold']
657 655 self.data_config.mmap_index_threshold = mmapindexthreshold
658 656 if b'sparse-revlog' in opts:
659 657 self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog'])
660 658 if self.delta_config.sparse_revlog:
661 659 # sparse-revlog forces sparse-read
662 660 self.data_config.with_sparse_read = True
663 661 elif b'with-sparse-read' in opts:
664 662 self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
665 663 if b'sparse-read-density-threshold' in opts:
666 664 self.data_config.sr_density_threshold = opts[
667 665 b'sparse-read-density-threshold'
668 666 ]
669 667 if b'sparse-read-min-gap-size' in opts:
670 668 self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
671 669 if opts.get(b'enableellipsis'):
672 670 self.feature_config.enable_ellipsis = True
673 671 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
674 672
675 673 # revlog v0 doesn't have flag processors
676 674 for flag, processor in opts.get(b'flagprocessors', {}).items():
677 675 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
678 676
679 677 chunk_cache_size = self.data_config.chunk_cache_size
680 678 if chunk_cache_size <= 0:
681 679 raise error.RevlogError(
682 680 _(b'revlog chunk cache size %r is not greater than 0')
683 681 % chunk_cache_size
684 682 )
685 683 elif chunk_cache_size & (chunk_cache_size - 1):
686 684 raise error.RevlogError(
687 685 _(b'revlog chunk cache size %r is not a power of 2')
688 686 % chunk_cache_size
689 687 )
690 688 force_nodemap = opts.get(b'devel-force-nodemap', False)
691 689 return new_header, mmapindexthreshold, force_nodemap
692 690
693 691 def _get_data(self, filepath, mmap_threshold, size=None):
694 692 """return a file content with or without mmap
695 693
696 694 If the file is missing return the empty string"""
697 695 try:
698 696 with self.opener(filepath) as fp:
699 697 if mmap_threshold is not None:
700 698 file_size = self.opener.fstat(fp).st_size
701 699 if file_size >= mmap_threshold:
702 700 if size is not None:
703 701 # avoid potentiel mmap crash
704 702 size = min(file_size, size)
705 703 # TODO: should .close() to release resources without
706 704 # relying on Python GC
707 705 if size is None:
708 706 return util.buffer(util.mmapread(fp))
709 707 else:
710 708 return util.buffer(util.mmapread(fp, size))
711 709 if size is None:
712 710 return fp.read()
713 711 else:
714 712 return fp.read(size)
715 713 except FileNotFoundError:
716 714 return b''
717 715
718 716 def get_streams(self, max_linkrev, force_inline=False):
719 717 """return a list of streams that represent this revlog
720 718
721 719 This is used by stream-clone to do bytes to bytes copies of a repository.
722 720
723 721 This streams data for all revisions that refer to a changelog revision up
724 722 to `max_linkrev`.
725 723
726 724 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
727 725
728 726 It returns is a list of three-tuple:
729 727
730 728 [
731 729 (filename, bytes_stream, stream_size),
732 730 …
733 731 ]
734 732 """
735 733 n = len(self)
736 734 index = self.index
737 735 while n > 0:
738 736 linkrev = index[n - 1][4]
739 737 if linkrev < max_linkrev:
740 738 break
741 739 # note: this loop will rarely go through multiple iterations, since
742 740 # it only traverses commits created during the current streaming
743 741 # pull operation.
744 742 #
745 743 # If this become a problem, using a binary search should cap the
746 744 # runtime of this.
747 745 n = n - 1
748 746 if n == 0:
749 747 # no data to send
750 748 return []
751 749 index_size = n * index.entry_size
752 750 data_size = self.end(n - 1)
753 751
754 752 # XXX we might have been split (or stripped) since the object
755 753 # initialization, We need to close this race too, but having a way to
756 754 # pre-open the file we feed to the revlog and never closing them before
757 755 # we are done streaming.
758 756
759 757 if self._inline:
760 758
761 759 def get_stream():
762 760 with self._indexfp() as fp:
763 761 yield None
764 762 size = index_size + data_size
765 763 if size <= 65536:
766 764 yield fp.read(size)
767 765 else:
768 766 yield from util.filechunkiter(fp, limit=size)
769 767
770 768 inline_stream = get_stream()
771 769 next(inline_stream)
772 770 return [
773 771 (self._indexfile, inline_stream, index_size + data_size),
774 772 ]
775 773 elif force_inline:
776 774
777 775 def get_stream():
778 776 with self.reading():
779 777 yield None
780 778
781 779 for rev in range(n):
782 780 idx = self.index.entry_binary(rev)
783 781 if rev == 0 and self._docket is None:
784 782 # re-inject the inline flag
785 783 header = self._format_flags
786 784 header |= self._format_version
787 785 header |= FLAG_INLINE_DATA
788 786 header = self.index.pack_header(header)
789 787 idx = header + idx
790 788 yield idx
791 789 yield self._getsegmentforrevs(rev, rev)[1]
792 790
793 791 inline_stream = get_stream()
794 792 next(inline_stream)
795 793 return [
796 794 (self._indexfile, inline_stream, index_size + data_size),
797 795 ]
798 796 else:
799 797
800 798 def get_index_stream():
801 799 with self._indexfp() as fp:
802 800 yield None
803 801 if index_size <= 65536:
804 802 yield fp.read(index_size)
805 803 else:
806 804 yield from util.filechunkiter(fp, limit=index_size)
807 805
808 806 def get_data_stream():
809 807 with self._datafp() as fp:
810 808 yield None
811 809 if data_size <= 65536:
812 810 yield fp.read(data_size)
813 811 else:
814 812 yield from util.filechunkiter(fp, limit=data_size)
815 813
816 814 index_stream = get_index_stream()
817 815 next(index_stream)
818 816 data_stream = get_data_stream()
819 817 next(data_stream)
820 818 return [
821 819 (self._datafile, data_stream, data_size),
822 820 (self._indexfile, index_stream, index_size),
823 821 ]
824 822
825 823 def _loadindex(self, docket=None):
826 824
827 825 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
828 826
829 827 if self.postfix is not None:
830 828 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
831 829 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
832 830 entry_point = b'%s.i.a' % self.radix
833 831 elif self._try_split and self.opener.exists(self._split_index_file):
834 832 entry_point = self._split_index_file
835 833 else:
836 834 entry_point = b'%s.i' % self.radix
837 835
838 836 if docket is not None:
839 837 self._docket = docket
840 838 self._docket_file = entry_point
841 839 else:
842 840 self._initempty = True
843 841 entry_data = self._get_data(entry_point, mmapindexthreshold)
844 842 if len(entry_data) > 0:
845 843 header = INDEX_HEADER.unpack(entry_data[:4])[0]
846 844 self._initempty = False
847 845 else:
848 846 header = new_header
849 847
850 848 self._format_flags = header & ~0xFFFF
851 849 self._format_version = header & 0xFFFF
852 850
853 851 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
854 852 if supported_flags is None:
855 853 msg = _(b'unknown version (%d) in revlog %s')
856 854 msg %= (self._format_version, self.display_id)
857 855 raise error.RevlogError(msg)
858 856 elif self._format_flags & ~supported_flags:
859 857 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
860 858 display_flag = self._format_flags >> 16
861 859 msg %= (display_flag, self._format_version, self.display_id)
862 860 raise error.RevlogError(msg)
863 861
864 862 features = FEATURES_BY_VERSION[self._format_version]
865 863 self._inline = features[b'inline'](self._format_flags)
866 864 self.delta_config.general_delta = features[b'generaldelta'](
867 865 self._format_flags
868 866 )
869 867 self.feature_config.has_side_data = features[b'sidedata']
870 868
871 869 if not features[b'docket']:
872 870 self._indexfile = entry_point
873 871 index_data = entry_data
874 872 else:
875 873 self._docket_file = entry_point
876 874 if self._initempty:
877 875 self._docket = docketutil.default_docket(self, header)
878 876 else:
879 877 self._docket = docketutil.parse_docket(
880 878 self, entry_data, use_pending=self._trypending
881 879 )
882 880
883 881 if self._docket is not None:
884 882 self._indexfile = self._docket.index_filepath()
885 883 index_data = b''
886 884 index_size = self._docket.index_end
887 885 if index_size > 0:
888 886 index_data = self._get_data(
889 887 self._indexfile, mmapindexthreshold, size=index_size
890 888 )
891 889 if len(index_data) < index_size:
892 890 msg = _(b'too few index data for %s: got %d, expected %d')
893 891 msg %= (self.display_id, len(index_data), index_size)
894 892 raise error.RevlogError(msg)
895 893
896 894 self._inline = False
897 895 # generaldelta implied by version 2 revlogs.
898 896 self.delta_config.general_delta = True
899 897 # the logic for persistent nodemap will be dealt with within the
900 898 # main docket, so disable it for now.
901 899 self._nodemap_file = None
902 900
903 901 if self._docket is not None:
904 902 self._datafile = self._docket.data_filepath()
905 903 self._sidedatafile = self._docket.sidedata_filepath()
906 904 elif self.postfix is None:
907 905 self._datafile = b'%s.d' % self.radix
908 906 else:
909 907 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
910 908
911 909 self.nodeconstants = sha1nodeconstants
912 910 self.nullid = self.nodeconstants.nullid
913 911
914 912 # sparse-revlog can't be on without general-delta (issue6056)
915 913 if not self._generaldelta:
916 914 self.delta_config.sparse_revlog = False
917 915
918 916 self._storedeltachains = True
919 917
920 918 devel_nodemap = (
921 919 self._nodemap_file
922 920 and force_nodemap
923 921 and parse_index_v1_nodemap is not None
924 922 )
925 923
926 924 use_rust_index = False
927 925 if rustrevlog is not None:
928 926 if self._nodemap_file is not None:
929 927 use_rust_index = True
930 928 else:
931 929 use_rust_index = self.opener.options.get(b'rust.index')
932 930
933 931 self._parse_index = parse_index_v1
934 932 if self._format_version == REVLOGV0:
935 933 self._parse_index = revlogv0.parse_index_v0
936 934 elif self._format_version == REVLOGV2:
937 935 self._parse_index = parse_index_v2
938 936 elif self._format_version == CHANGELOGV2:
939 937 self._parse_index = parse_index_cl_v2
940 938 elif devel_nodemap:
941 939 self._parse_index = parse_index_v1_nodemap
942 940 elif use_rust_index:
943 941 self._parse_index = parse_index_v1_mixed
944 942 try:
945 943 d = self._parse_index(index_data, self._inline)
946 944 index, chunkcache = d
947 945 use_nodemap = (
948 946 not self._inline
949 947 and self._nodemap_file is not None
950 948 and hasattr(index, 'update_nodemap_data')
951 949 )
952 950 if use_nodemap:
953 951 nodemap_data = nodemaputil.persisted_data(self)
954 952 if nodemap_data is not None:
955 953 docket = nodemap_data[0]
956 954 if (
957 955 len(d[0]) > docket.tip_rev
958 956 and d[0][docket.tip_rev][7] == docket.tip_node
959 957 ):
960 958 # no changelog tampering
961 959 self._nodemap_docket = docket
962 960 index.update_nodemap_data(*nodemap_data)
963 961 except (ValueError, IndexError):
964 962 raise error.RevlogError(
965 963 _(b"index %s is corrupted") % self.display_id
966 964 )
967 965 self.index = index
968 966 self._segmentfile = randomaccessfile.randomaccessfile(
969 967 self.opener,
970 968 (self._indexfile if self._inline else self._datafile),
971 969 self._chunkcachesize,
972 970 chunkcache,
973 971 )
974 972 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
975 973 self.opener,
976 974 self._sidedatafile,
977 975 self._chunkcachesize,
978 976 )
979 977 # revnum -> (chain-length, sum-delta-length)
980 978 self._chaininfocache = util.lrucachedict(500)
981 979 # revlog header -> revlog compressor
982 980 self._decompressors = {}
983 981
984 982 def get_revlog(self):
985 983 """simple function to mirror API of other not-really-revlog API"""
986 984 return self
987 985
988 986 @util.propertycache
989 987 def revlog_kind(self):
990 988 return self.target[0]
991 989
992 990 @util.propertycache
993 991 def display_id(self):
994 992 """The public facing "ID" of the revlog that we use in message"""
995 993 if self.revlog_kind == KIND_FILELOG:
996 994 # Reference the file without the "data/" prefix, so it is familiar
997 995 # to the user.
998 996 return self.target[1]
999 997 else:
1000 998 return self.radix
1001 999
1002 1000 def _get_decompressor(self, t):
1003 1001 try:
1004 1002 compressor = self._decompressors[t]
1005 1003 except KeyError:
1006 1004 try:
1007 1005 engine = util.compengines.forrevlogheader(t)
1008 1006 compressor = engine.revlogcompressor(self._compengineopts)
1009 1007 self._decompressors[t] = compressor
1010 1008 except KeyError:
1011 1009 raise error.RevlogError(
1012 1010 _(b'unknown compression type %s') % binascii.hexlify(t)
1013 1011 )
1014 1012 return compressor
1015 1013
1016 1014 @util.propertycache
1017 1015 def _compressor(self):
1018 1016 engine = util.compengines[self._compengine]
1019 1017 return engine.revlogcompressor(self._compengineopts)
1020 1018
1021 1019 @util.propertycache
1022 1020 def _decompressor(self):
1023 1021 """the default decompressor"""
1024 1022 if self._docket is None:
1025 1023 return None
1026 1024 t = self._docket.default_compression_header
1027 1025 c = self._get_decompressor(t)
1028 1026 return c.decompress
1029 1027
1030 1028 def _indexfp(self):
1031 1029 """file object for the revlog's index file"""
1032 1030 return self.opener(self._indexfile, mode=b"r")
1033 1031
1034 1032 def __index_write_fp(self):
1035 1033 # You should not use this directly and use `_writing` instead
1036 1034 try:
1037 1035 f = self.opener(
1038 1036 self._indexfile, mode=b"r+", checkambig=self._checkambig
1039 1037 )
1040 1038 if self._docket is None:
1041 1039 f.seek(0, os.SEEK_END)
1042 1040 else:
1043 1041 f.seek(self._docket.index_end, os.SEEK_SET)
1044 1042 return f
1045 1043 except FileNotFoundError:
1046 1044 return self.opener(
1047 1045 self._indexfile, mode=b"w+", checkambig=self._checkambig
1048 1046 )
1049 1047
1050 1048 def __index_new_fp(self):
1051 1049 # You should not use this unless you are upgrading from inline revlog
1052 1050 return self.opener(
1053 1051 self._indexfile,
1054 1052 mode=b"w",
1055 1053 checkambig=self._checkambig,
1056 1054 atomictemp=True,
1057 1055 )
1058 1056
1059 1057 def _datafp(self, mode=b'r'):
1060 1058 """file object for the revlog's data file"""
1061 1059 return self.opener(self._datafile, mode=mode)
1062 1060
1063 1061 @contextlib.contextmanager
1064 1062 def _sidedatareadfp(self):
1065 1063 """file object suitable to read sidedata"""
1066 1064 if self._writinghandles:
1067 1065 yield self._writinghandles[2]
1068 1066 else:
1069 1067 with self.opener(self._sidedatafile) as fp:
1070 1068 yield fp
1071 1069
1072 1070 def tiprev(self):
1073 1071 return len(self.index) - 1
1074 1072
1075 1073 def tip(self):
1076 1074 return self.node(self.tiprev())
1077 1075
1078 1076 def __contains__(self, rev):
1079 1077 return 0 <= rev < len(self)
1080 1078
1081 1079 def __len__(self):
1082 1080 return len(self.index)
1083 1081
1084 1082 def __iter__(self):
1085 1083 return iter(range(len(self)))
1086 1084
1087 1085 def revs(self, start=0, stop=None):
1088 1086 """iterate over all rev in this revlog (from start to stop)"""
1089 1087 return storageutil.iterrevs(len(self), start=start, stop=stop)
1090 1088
1091 1089 def hasnode(self, node):
1092 1090 try:
1093 1091 self.rev(node)
1094 1092 return True
1095 1093 except KeyError:
1096 1094 return False
1097 1095
1098 1096 def _candelta(self, baserev, rev):
1099 1097 """whether two revisions (baserev, rev) can be delta-ed or not"""
1100 1098 # Disable delta if either rev requires a content-changing flag
1101 1099 # processor (ex. LFS). This is because such flag processor can alter
1102 1100 # the rawtext content that the delta will be based on, and two clients
1103 1101 # could have a same revlog node with different flags (i.e. different
1104 1102 # rawtext contents) and the delta could be incompatible.
1105 1103 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1106 1104 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1107 1105 ):
1108 1106 return False
1109 1107 return True
1110 1108
1111 1109 def update_caches(self, transaction):
1112 1110 """update on disk cache
1113 1111
1114 1112 If a transaction is passed, the update may be delayed to transaction
1115 1113 commit."""
1116 1114 if self._nodemap_file is not None:
1117 1115 if transaction is None:
1118 1116 nodemaputil.update_persistent_nodemap(self)
1119 1117 else:
1120 1118 nodemaputil.setup_persistent_nodemap(transaction, self)
1121 1119
1122 1120 def clearcaches(self):
1123 1121 """Clear in-memory caches"""
1124 1122 self._revisioncache = None
1125 1123 self._chainbasecache.clear()
1126 1124 self._segmentfile.clear_cache()
1127 1125 self._segmentfile_sidedata.clear_cache()
1128 1126 self._pcache = {}
1129 1127 self._nodemap_docket = None
1130 1128 self.index.clearcaches()
1131 1129 # The python code is the one responsible for validating the docket, we
1132 1130 # end up having to refresh it here.
1133 1131 use_nodemap = (
1134 1132 not self._inline
1135 1133 and self._nodemap_file is not None
1136 1134 and hasattr(self.index, 'update_nodemap_data')
1137 1135 )
1138 1136 if use_nodemap:
1139 1137 nodemap_data = nodemaputil.persisted_data(self)
1140 1138 if nodemap_data is not None:
1141 1139 self._nodemap_docket = nodemap_data[0]
1142 1140 self.index.update_nodemap_data(*nodemap_data)
1143 1141
1144 1142 def rev(self, node):
1145 1143 """return the revision number associated with a <nodeid>"""
1146 1144 try:
1147 1145 return self.index.rev(node)
1148 1146 except TypeError:
1149 1147 raise
1150 1148 except error.RevlogError:
1151 1149 # parsers.c radix tree lookup failed
1152 1150 if (
1153 1151 node == self.nodeconstants.wdirid
1154 1152 or node in self.nodeconstants.wdirfilenodeids
1155 1153 ):
1156 1154 raise error.WdirUnsupported
1157 1155 raise error.LookupError(node, self.display_id, _(b'no node'))
1158 1156
1159 1157 # Accessors for index entries.
1160 1158
1161 1159 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1162 1160 # are flags.
1163 1161 def start(self, rev):
1164 1162 return int(self.index[rev][0] >> 16)
1165 1163
1166 1164 def sidedata_cut_off(self, rev):
1167 1165 sd_cut_off = self.index[rev][8]
1168 1166 if sd_cut_off != 0:
1169 1167 return sd_cut_off
1170 1168 # This is some annoying dance, because entries without sidedata
1171 1169 # currently use 0 as their ofsset. (instead of previous-offset +
1172 1170 # previous-size)
1173 1171 #
1174 1172 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1175 1173 # In the meantime, we need this.
1176 1174 while 0 <= rev:
1177 1175 e = self.index[rev]
1178 1176 if e[9] != 0:
1179 1177 return e[8] + e[9]
1180 1178 rev -= 1
1181 1179 return 0
1182 1180
1183 1181 def flags(self, rev):
1184 1182 return self.index[rev][0] & 0xFFFF
1185 1183
1186 1184 def length(self, rev):
1187 1185 return self.index[rev][1]
1188 1186
1189 1187 def sidedata_length(self, rev):
1190 1188 if not self.hassidedata:
1191 1189 return 0
1192 1190 return self.index[rev][9]
1193 1191
1194 1192 def rawsize(self, rev):
1195 1193 """return the length of the uncompressed text for a given revision"""
1196 1194 l = self.index[rev][2]
1197 1195 if l >= 0:
1198 1196 return l
1199 1197
1200 1198 t = self.rawdata(rev)
1201 1199 return len(t)
1202 1200
1203 1201 def size(self, rev):
1204 1202 """length of non-raw text (processed by a "read" flag processor)"""
1205 1203 # fast path: if no "read" flag processor could change the content,
1206 1204 # size is rawsize. note: ELLIPSIS is known to not change the content.
1207 1205 flags = self.flags(rev)
1208 1206 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1209 1207 return self.rawsize(rev)
1210 1208
1211 1209 return len(self.revision(rev))
1212 1210
1213 1211 def fast_rank(self, rev):
1214 1212 """Return the rank of a revision if already known, or None otherwise.
1215 1213
1216 1214 The rank of a revision is the size of the sub-graph it defines as a
1217 1215 head. Equivalently, the rank of a revision `r` is the size of the set
1218 1216 `ancestors(r)`, `r` included.
1219 1217
1220 1218 This method returns the rank retrieved from the revlog in constant
1221 1219 time. It makes no attempt at computing unknown values for versions of
1222 1220 the revlog which do not persist the rank.
1223 1221 """
1224 1222 rank = self.index[rev][ENTRY_RANK]
1225 1223 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1226 1224 return None
1227 1225 if rev == nullrev:
1228 1226 return 0 # convention
1229 1227 return rank
1230 1228
1231 1229 def chainbase(self, rev):
1232 1230 base = self._chainbasecache.get(rev)
1233 1231 if base is not None:
1234 1232 return base
1235 1233
1236 1234 index = self.index
1237 1235 iterrev = rev
1238 1236 base = index[iterrev][3]
1239 1237 while base != iterrev:
1240 1238 iterrev = base
1241 1239 base = index[iterrev][3]
1242 1240
1243 1241 self._chainbasecache[rev] = base
1244 1242 return base
1245 1243
1246 1244 def linkrev(self, rev):
1247 1245 return self.index[rev][4]
1248 1246
1249 1247 def parentrevs(self, rev):
1250 1248 try:
1251 1249 entry = self.index[rev]
1252 1250 except IndexError:
1253 1251 if rev == wdirrev:
1254 1252 raise error.WdirUnsupported
1255 1253 raise
1256 1254
1257 1255 if self.canonical_parent_order and entry[5] == nullrev:
1258 1256 return entry[6], entry[5]
1259 1257 else:
1260 1258 return entry[5], entry[6]
1261 1259
1262 1260 # fast parentrevs(rev) where rev isn't filtered
1263 1261 _uncheckedparentrevs = parentrevs
1264 1262
1265 1263 def node(self, rev):
1266 1264 try:
1267 1265 return self.index[rev][7]
1268 1266 except IndexError:
1269 1267 if rev == wdirrev:
1270 1268 raise error.WdirUnsupported
1271 1269 raise
1272 1270
1273 1271 # Derived from index values.
1274 1272
1275 1273 def end(self, rev):
1276 1274 return self.start(rev) + self.length(rev)
1277 1275
1278 1276 def parents(self, node):
1279 1277 i = self.index
1280 1278 d = i[self.rev(node)]
1281 1279 # inline node() to avoid function call overhead
1282 1280 if self.canonical_parent_order and d[5] == self.nullid:
1283 1281 return i[d[6]][7], i[d[5]][7]
1284 1282 else:
1285 1283 return i[d[5]][7], i[d[6]][7]
1286 1284
1287 1285 def chainlen(self, rev):
1288 1286 return self._chaininfo(rev)[0]
1289 1287
1290 1288 def _chaininfo(self, rev):
1291 1289 chaininfocache = self._chaininfocache
1292 1290 if rev in chaininfocache:
1293 1291 return chaininfocache[rev]
1294 1292 index = self.index
1295 1293 generaldelta = self._generaldelta
1296 1294 iterrev = rev
1297 1295 e = index[iterrev]
1298 1296 clen = 0
1299 1297 compresseddeltalen = 0
1300 1298 while iterrev != e[3]:
1301 1299 clen += 1
1302 1300 compresseddeltalen += e[1]
1303 1301 if generaldelta:
1304 1302 iterrev = e[3]
1305 1303 else:
1306 1304 iterrev -= 1
1307 1305 if iterrev in chaininfocache:
1308 1306 t = chaininfocache[iterrev]
1309 1307 clen += t[0]
1310 1308 compresseddeltalen += t[1]
1311 1309 break
1312 1310 e = index[iterrev]
1313 1311 else:
1314 1312 # Add text length of base since decompressing that also takes
1315 1313 # work. For cache hits the length is already included.
1316 1314 compresseddeltalen += e[1]
1317 1315 r = (clen, compresseddeltalen)
1318 1316 chaininfocache[rev] = r
1319 1317 return r
1320 1318
1321 1319 def _deltachain(self, rev, stoprev=None):
1322 1320 """Obtain the delta chain for a revision.
1323 1321
1324 1322 ``stoprev`` specifies a revision to stop at. If not specified, we
1325 1323 stop at the base of the chain.
1326 1324
1327 1325 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1328 1326 revs in ascending order and ``stopped`` is a bool indicating whether
1329 1327 ``stoprev`` was hit.
1330 1328 """
1331 1329 # Try C implementation.
1332 1330 try:
1333 1331 return self.index.deltachain(rev, stoprev, self._generaldelta)
1334 1332 except AttributeError:
1335 1333 pass
1336 1334
1337 1335 chain = []
1338 1336
1339 1337 # Alias to prevent attribute lookup in tight loop.
1340 1338 index = self.index
1341 1339 generaldelta = self._generaldelta
1342 1340
1343 1341 iterrev = rev
1344 1342 e = index[iterrev]
1345 1343 while iterrev != e[3] and iterrev != stoprev:
1346 1344 chain.append(iterrev)
1347 1345 if generaldelta:
1348 1346 iterrev = e[3]
1349 1347 else:
1350 1348 iterrev -= 1
1351 1349 e = index[iterrev]
1352 1350
1353 1351 if iterrev == stoprev:
1354 1352 stopped = True
1355 1353 else:
1356 1354 chain.append(iterrev)
1357 1355 stopped = False
1358 1356
1359 1357 chain.reverse()
1360 1358 return chain, stopped
1361 1359
1362 1360 def ancestors(self, revs, stoprev=0, inclusive=False):
1363 1361 """Generate the ancestors of 'revs' in reverse revision order.
1364 1362 Does not generate revs lower than stoprev.
1365 1363
1366 1364 See the documentation for ancestor.lazyancestors for more details."""
1367 1365
1368 1366 # first, make sure start revisions aren't filtered
1369 1367 revs = list(revs)
1370 1368 checkrev = self.node
1371 1369 for r in revs:
1372 1370 checkrev(r)
1373 1371 # and we're sure ancestors aren't filtered as well
1374 1372
1375 1373 if rustancestor is not None and self.index.rust_ext_compat:
1376 1374 lazyancestors = rustancestor.LazyAncestors
1377 1375 arg = self.index
1378 1376 else:
1379 1377 lazyancestors = ancestor.lazyancestors
1380 1378 arg = self._uncheckedparentrevs
1381 1379 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1382 1380
1383 1381 def descendants(self, revs):
1384 1382 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1385 1383
1386 1384 def findcommonmissing(self, common=None, heads=None):
1387 1385 """Return a tuple of the ancestors of common and the ancestors of heads
1388 1386 that are not ancestors of common. In revset terminology, we return the
1389 1387 tuple:
1390 1388
1391 1389 ::common, (::heads) - (::common)
1392 1390
1393 1391 The list is sorted by revision number, meaning it is
1394 1392 topologically sorted.
1395 1393
1396 1394 'heads' and 'common' are both lists of node IDs. If heads is
1397 1395 not supplied, uses all of the revlog's heads. If common is not
1398 1396 supplied, uses nullid."""
1399 1397 if common is None:
1400 1398 common = [self.nullid]
1401 1399 if heads is None:
1402 1400 heads = self.heads()
1403 1401
1404 1402 common = [self.rev(n) for n in common]
1405 1403 heads = [self.rev(n) for n in heads]
1406 1404
1407 1405 # we want the ancestors, but inclusive
1408 1406 class lazyset:
1409 1407 def __init__(self, lazyvalues):
1410 1408 self.addedvalues = set()
1411 1409 self.lazyvalues = lazyvalues
1412 1410
1413 1411 def __contains__(self, value):
1414 1412 return value in self.addedvalues or value in self.lazyvalues
1415 1413
1416 1414 def __iter__(self):
1417 1415 added = self.addedvalues
1418 1416 for r in added:
1419 1417 yield r
1420 1418 for r in self.lazyvalues:
1421 1419 if not r in added:
1422 1420 yield r
1423 1421
1424 1422 def add(self, value):
1425 1423 self.addedvalues.add(value)
1426 1424
1427 1425 def update(self, values):
1428 1426 self.addedvalues.update(values)
1429 1427
1430 1428 has = lazyset(self.ancestors(common))
1431 1429 has.add(nullrev)
1432 1430 has.update(common)
1433 1431
1434 1432 # take all ancestors from heads that aren't in has
1435 1433 missing = set()
1436 1434 visit = collections.deque(r for r in heads if r not in has)
1437 1435 while visit:
1438 1436 r = visit.popleft()
1439 1437 if r in missing:
1440 1438 continue
1441 1439 else:
1442 1440 missing.add(r)
1443 1441 for p in self.parentrevs(r):
1444 1442 if p not in has:
1445 1443 visit.append(p)
1446 1444 missing = list(missing)
1447 1445 missing.sort()
1448 1446 return has, [self.node(miss) for miss in missing]
1449 1447
1450 1448 def incrementalmissingrevs(self, common=None):
1451 1449 """Return an object that can be used to incrementally compute the
1452 1450 revision numbers of the ancestors of arbitrary sets that are not
1453 1451 ancestors of common. This is an ancestor.incrementalmissingancestors
1454 1452 object.
1455 1453
1456 1454 'common' is a list of revision numbers. If common is not supplied, uses
1457 1455 nullrev.
1458 1456 """
1459 1457 if common is None:
1460 1458 common = [nullrev]
1461 1459
1462 1460 if rustancestor is not None and self.index.rust_ext_compat:
1463 1461 return rustancestor.MissingAncestors(self.index, common)
1464 1462 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1465 1463
1466 1464 def findmissingrevs(self, common=None, heads=None):
1467 1465 """Return the revision numbers of the ancestors of heads that
1468 1466 are not ancestors of common.
1469 1467
1470 1468 More specifically, return a list of revision numbers corresponding to
1471 1469 nodes N such that every N satisfies the following constraints:
1472 1470
1473 1471 1. N is an ancestor of some node in 'heads'
1474 1472 2. N is not an ancestor of any node in 'common'
1475 1473
1476 1474 The list is sorted by revision number, meaning it is
1477 1475 topologically sorted.
1478 1476
1479 1477 'heads' and 'common' are both lists of revision numbers. If heads is
1480 1478 not supplied, uses all of the revlog's heads. If common is not
1481 1479 supplied, uses nullid."""
1482 1480 if common is None:
1483 1481 common = [nullrev]
1484 1482 if heads is None:
1485 1483 heads = self.headrevs()
1486 1484
1487 1485 inc = self.incrementalmissingrevs(common=common)
1488 1486 return inc.missingancestors(heads)
1489 1487
1490 1488 def findmissing(self, common=None, heads=None):
1491 1489 """Return the ancestors of heads that are not ancestors of common.
1492 1490
1493 1491 More specifically, return a list of nodes N such that every N
1494 1492 satisfies the following constraints:
1495 1493
1496 1494 1. N is an ancestor of some node in 'heads'
1497 1495 2. N is not an ancestor of any node in 'common'
1498 1496
1499 1497 The list is sorted by revision number, meaning it is
1500 1498 topologically sorted.
1501 1499
1502 1500 'heads' and 'common' are both lists of node IDs. If heads is
1503 1501 not supplied, uses all of the revlog's heads. If common is not
1504 1502 supplied, uses nullid."""
1505 1503 if common is None:
1506 1504 common = [self.nullid]
1507 1505 if heads is None:
1508 1506 heads = self.heads()
1509 1507
1510 1508 common = [self.rev(n) for n in common]
1511 1509 heads = [self.rev(n) for n in heads]
1512 1510
1513 1511 inc = self.incrementalmissingrevs(common=common)
1514 1512 return [self.node(r) for r in inc.missingancestors(heads)]
1515 1513
1516 1514 def nodesbetween(self, roots=None, heads=None):
1517 1515 """Return a topological path from 'roots' to 'heads'.
1518 1516
1519 1517 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1520 1518 topologically sorted list of all nodes N that satisfy both of
1521 1519 these constraints:
1522 1520
1523 1521 1. N is a descendant of some node in 'roots'
1524 1522 2. N is an ancestor of some node in 'heads'
1525 1523
1526 1524 Every node is considered to be both a descendant and an ancestor
1527 1525 of itself, so every reachable node in 'roots' and 'heads' will be
1528 1526 included in 'nodes'.
1529 1527
1530 1528 'outroots' is the list of reachable nodes in 'roots', i.e., the
1531 1529 subset of 'roots' that is returned in 'nodes'. Likewise,
1532 1530 'outheads' is the subset of 'heads' that is also in 'nodes'.
1533 1531
1534 1532 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1535 1533 unspecified, uses nullid as the only root. If 'heads' is
1536 1534 unspecified, uses list of all of the revlog's heads."""
1537 1535 nonodes = ([], [], [])
1538 1536 if roots is not None:
1539 1537 roots = list(roots)
1540 1538 if not roots:
1541 1539 return nonodes
1542 1540 lowestrev = min([self.rev(n) for n in roots])
1543 1541 else:
1544 1542 roots = [self.nullid] # Everybody's a descendant of nullid
1545 1543 lowestrev = nullrev
1546 1544 if (lowestrev == nullrev) and (heads is None):
1547 1545 # We want _all_ the nodes!
1548 1546 return (
1549 1547 [self.node(r) for r in self],
1550 1548 [self.nullid],
1551 1549 list(self.heads()),
1552 1550 )
1553 1551 if heads is None:
1554 1552 # All nodes are ancestors, so the latest ancestor is the last
1555 1553 # node.
1556 1554 highestrev = len(self) - 1
1557 1555 # Set ancestors to None to signal that every node is an ancestor.
1558 1556 ancestors = None
1559 1557 # Set heads to an empty dictionary for later discovery of heads
1560 1558 heads = {}
1561 1559 else:
1562 1560 heads = list(heads)
1563 1561 if not heads:
1564 1562 return nonodes
1565 1563 ancestors = set()
1566 1564 # Turn heads into a dictionary so we can remove 'fake' heads.
1567 1565 # Also, later we will be using it to filter out the heads we can't
1568 1566 # find from roots.
1569 1567 heads = dict.fromkeys(heads, False)
1570 1568 # Start at the top and keep marking parents until we're done.
1571 1569 nodestotag = set(heads)
1572 1570 # Remember where the top was so we can use it as a limit later.
1573 1571 highestrev = max([self.rev(n) for n in nodestotag])
1574 1572 while nodestotag:
1575 1573 # grab a node to tag
1576 1574 n = nodestotag.pop()
1577 1575 # Never tag nullid
1578 1576 if n == self.nullid:
1579 1577 continue
1580 1578 # A node's revision number represents its place in a
1581 1579 # topologically sorted list of nodes.
1582 1580 r = self.rev(n)
1583 1581 if r >= lowestrev:
1584 1582 if n not in ancestors:
1585 1583 # If we are possibly a descendant of one of the roots
1586 1584 # and we haven't already been marked as an ancestor
1587 1585 ancestors.add(n) # Mark as ancestor
1588 1586 # Add non-nullid parents to list of nodes to tag.
1589 1587 nodestotag.update(
1590 1588 [p for p in self.parents(n) if p != self.nullid]
1591 1589 )
1592 1590 elif n in heads: # We've seen it before, is it a fake head?
1593 1591 # So it is, real heads should not be the ancestors of
1594 1592 # any other heads.
1595 1593 heads.pop(n)
1596 1594 if not ancestors:
1597 1595 return nonodes
1598 1596 # Now that we have our set of ancestors, we want to remove any
1599 1597 # roots that are not ancestors.
1600 1598
1601 1599 # If one of the roots was nullid, everything is included anyway.
1602 1600 if lowestrev > nullrev:
1603 1601 # But, since we weren't, let's recompute the lowest rev to not
1604 1602 # include roots that aren't ancestors.
1605 1603
1606 1604 # Filter out roots that aren't ancestors of heads
1607 1605 roots = [root for root in roots if root in ancestors]
1608 1606 # Recompute the lowest revision
1609 1607 if roots:
1610 1608 lowestrev = min([self.rev(root) for root in roots])
1611 1609 else:
1612 1610 # No more roots? Return empty list
1613 1611 return nonodes
1614 1612 else:
1615 1613 # We are descending from nullid, and don't need to care about
1616 1614 # any other roots.
1617 1615 lowestrev = nullrev
1618 1616 roots = [self.nullid]
1619 1617 # Transform our roots list into a set.
1620 1618 descendants = set(roots)
1621 1619 # Also, keep the original roots so we can filter out roots that aren't
1622 1620 # 'real' roots (i.e. are descended from other roots).
1623 1621 roots = descendants.copy()
1624 1622 # Our topologically sorted list of output nodes.
1625 1623 orderedout = []
1626 1624 # Don't start at nullid since we don't want nullid in our output list,
1627 1625 # and if nullid shows up in descendants, empty parents will look like
1628 1626 # they're descendants.
1629 1627 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1630 1628 n = self.node(r)
1631 1629 isdescendant = False
1632 1630 if lowestrev == nullrev: # Everybody is a descendant of nullid
1633 1631 isdescendant = True
1634 1632 elif n in descendants:
1635 1633 # n is already a descendant
1636 1634 isdescendant = True
1637 1635 # This check only needs to be done here because all the roots
1638 1636 # will start being marked is descendants before the loop.
1639 1637 if n in roots:
1640 1638 # If n was a root, check if it's a 'real' root.
1641 1639 p = tuple(self.parents(n))
1642 1640 # If any of its parents are descendants, it's not a root.
1643 1641 if (p[0] in descendants) or (p[1] in descendants):
1644 1642 roots.remove(n)
1645 1643 else:
1646 1644 p = tuple(self.parents(n))
1647 1645 # A node is a descendant if either of its parents are
1648 1646 # descendants. (We seeded the dependents list with the roots
1649 1647 # up there, remember?)
1650 1648 if (p[0] in descendants) or (p[1] in descendants):
1651 1649 descendants.add(n)
1652 1650 isdescendant = True
1653 1651 if isdescendant and ((ancestors is None) or (n in ancestors)):
1654 1652 # Only include nodes that are both descendants and ancestors.
1655 1653 orderedout.append(n)
1656 1654 if (ancestors is not None) and (n in heads):
1657 1655 # We're trying to figure out which heads are reachable
1658 1656 # from roots.
1659 1657 # Mark this head as having been reached
1660 1658 heads[n] = True
1661 1659 elif ancestors is None:
1662 1660 # Otherwise, we're trying to discover the heads.
1663 1661 # Assume this is a head because if it isn't, the next step
1664 1662 # will eventually remove it.
1665 1663 heads[n] = True
1666 1664 # But, obviously its parents aren't.
1667 1665 for p in self.parents(n):
1668 1666 heads.pop(p, None)
1669 1667 heads = [head for head, flag in heads.items() if flag]
1670 1668 roots = list(roots)
1671 1669 assert orderedout
1672 1670 assert roots
1673 1671 assert heads
1674 1672 return (orderedout, roots, heads)
1675 1673
1676 1674 def headrevs(self, revs=None):
1677 1675 if revs is None:
1678 1676 try:
1679 1677 return self.index.headrevs()
1680 1678 except AttributeError:
1681 1679 return self._headrevs()
1682 1680 if rustdagop is not None and self.index.rust_ext_compat:
1683 1681 return rustdagop.headrevs(self.index, revs)
1684 1682 return dagop.headrevs(revs, self._uncheckedparentrevs)
1685 1683
1686 1684 def computephases(self, roots):
1687 1685 return self.index.computephasesmapsets(roots)
1688 1686
1689 1687 def _headrevs(self):
1690 1688 count = len(self)
1691 1689 if not count:
1692 1690 return [nullrev]
1693 1691 # we won't iter over filtered rev so nobody is a head at start
1694 1692 ishead = [0] * (count + 1)
1695 1693 index = self.index
1696 1694 for r in self:
1697 1695 ishead[r] = 1 # I may be an head
1698 1696 e = index[r]
1699 1697 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1700 1698 return [r for r, val in enumerate(ishead) if val]
1701 1699
1702 1700 def heads(self, start=None, stop=None):
1703 1701 """return the list of all nodes that have no children
1704 1702
1705 1703 if start is specified, only heads that are descendants of
1706 1704 start will be returned
1707 1705 if stop is specified, it will consider all the revs from stop
1708 1706 as if they had no children
1709 1707 """
1710 1708 if start is None and stop is None:
1711 1709 if not len(self):
1712 1710 return [self.nullid]
1713 1711 return [self.node(r) for r in self.headrevs()]
1714 1712
1715 1713 if start is None:
1716 1714 start = nullrev
1717 1715 else:
1718 1716 start = self.rev(start)
1719 1717
1720 1718 stoprevs = {self.rev(n) for n in stop or []}
1721 1719
1722 1720 revs = dagop.headrevssubset(
1723 1721 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1724 1722 )
1725 1723
1726 1724 return [self.node(rev) for rev in revs]
1727 1725
1728 1726 def children(self, node):
1729 1727 """find the children of a given node"""
1730 1728 c = []
1731 1729 p = self.rev(node)
1732 1730 for r in self.revs(start=p + 1):
1733 1731 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1734 1732 if prevs:
1735 1733 for pr in prevs:
1736 1734 if pr == p:
1737 1735 c.append(self.node(r))
1738 1736 elif p == nullrev:
1739 1737 c.append(self.node(r))
1740 1738 return c
1741 1739
1742 1740 def commonancestorsheads(self, a, b):
1743 1741 """calculate all the heads of the common ancestors of nodes a and b"""
1744 1742 a, b = self.rev(a), self.rev(b)
1745 1743 ancs = self._commonancestorsheads(a, b)
1746 1744 return pycompat.maplist(self.node, ancs)
1747 1745
1748 1746 def _commonancestorsheads(self, *revs):
1749 1747 """calculate all the heads of the common ancestors of revs"""
1750 1748 try:
1751 1749 ancs = self.index.commonancestorsheads(*revs)
1752 1750 except (AttributeError, OverflowError): # C implementation failed
1753 1751 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1754 1752 return ancs
1755 1753
1756 1754 def isancestor(self, a, b):
1757 1755 """return True if node a is an ancestor of node b
1758 1756
1759 1757 A revision is considered an ancestor of itself."""
1760 1758 a, b = self.rev(a), self.rev(b)
1761 1759 return self.isancestorrev(a, b)
1762 1760
1763 1761 def isancestorrev(self, a, b):
1764 1762 """return True if revision a is an ancestor of revision b
1765 1763
1766 1764 A revision is considered an ancestor of itself.
1767 1765
1768 1766 The implementation of this is trivial but the use of
1769 1767 reachableroots is not."""
1770 1768 if a == nullrev:
1771 1769 return True
1772 1770 elif a == b:
1773 1771 return True
1774 1772 elif a > b:
1775 1773 return False
1776 1774 return bool(self.reachableroots(a, [b], [a], includepath=False))
1777 1775
1778 1776 def reachableroots(self, minroot, heads, roots, includepath=False):
1779 1777 """return (heads(::(<roots> and <roots>::<heads>)))
1780 1778
1781 1779 If includepath is True, return (<roots>::<heads>)."""
1782 1780 try:
1783 1781 return self.index.reachableroots2(
1784 1782 minroot, heads, roots, includepath
1785 1783 )
1786 1784 except AttributeError:
1787 1785 return dagop._reachablerootspure(
1788 1786 self.parentrevs, minroot, roots, heads, includepath
1789 1787 )
1790 1788
1791 1789 def ancestor(self, a, b):
1792 1790 """calculate the "best" common ancestor of nodes a and b"""
1793 1791
1794 1792 a, b = self.rev(a), self.rev(b)
1795 1793 try:
1796 1794 ancs = self.index.ancestors(a, b)
1797 1795 except (AttributeError, OverflowError):
1798 1796 ancs = ancestor.ancestors(self.parentrevs, a, b)
1799 1797 if ancs:
1800 1798 # choose a consistent winner when there's a tie
1801 1799 return min(map(self.node, ancs))
1802 1800 return self.nullid
1803 1801
1804 1802 def _match(self, id):
1805 1803 if isinstance(id, int):
1806 1804 # rev
1807 1805 return self.node(id)
1808 1806 if len(id) == self.nodeconstants.nodelen:
1809 1807 # possibly a binary node
1810 1808 # odds of a binary node being all hex in ASCII are 1 in 10**25
1811 1809 try:
1812 1810 node = id
1813 1811 self.rev(node) # quick search the index
1814 1812 return node
1815 1813 except error.LookupError:
1816 1814 pass # may be partial hex id
1817 1815 try:
1818 1816 # str(rev)
1819 1817 rev = int(id)
1820 1818 if b"%d" % rev != id:
1821 1819 raise ValueError
1822 1820 if rev < 0:
1823 1821 rev = len(self) + rev
1824 1822 if rev < 0 or rev >= len(self):
1825 1823 raise ValueError
1826 1824 return self.node(rev)
1827 1825 except (ValueError, OverflowError):
1828 1826 pass
1829 1827 if len(id) == 2 * self.nodeconstants.nodelen:
1830 1828 try:
1831 1829 # a full hex nodeid?
1832 1830 node = bin(id)
1833 1831 self.rev(node)
1834 1832 return node
1835 1833 except (binascii.Error, error.LookupError):
1836 1834 pass
1837 1835
1838 1836 def _partialmatch(self, id):
1839 1837 # we don't care wdirfilenodeids as they should be always full hash
1840 1838 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1841 1839 ambiguous = False
1842 1840 try:
1843 1841 partial = self.index.partialmatch(id)
1844 1842 if partial and self.hasnode(partial):
1845 1843 if maybewdir:
1846 1844 # single 'ff...' match in radix tree, ambiguous with wdir
1847 1845 ambiguous = True
1848 1846 else:
1849 1847 return partial
1850 1848 elif maybewdir:
1851 1849 # no 'ff...' match in radix tree, wdir identified
1852 1850 raise error.WdirUnsupported
1853 1851 else:
1854 1852 return None
1855 1853 except error.RevlogError:
1856 1854 # parsers.c radix tree lookup gave multiple matches
1857 1855 # fast path: for unfiltered changelog, radix tree is accurate
1858 1856 if not getattr(self, 'filteredrevs', None):
1859 1857 ambiguous = True
1860 1858 # fall through to slow path that filters hidden revisions
1861 1859 except (AttributeError, ValueError):
1862 1860 # we are pure python, or key is not hex
1863 1861 pass
1864 1862 if ambiguous:
1865 1863 raise error.AmbiguousPrefixLookupError(
1866 1864 id, self.display_id, _(b'ambiguous identifier')
1867 1865 )
1868 1866
1869 1867 if id in self._pcache:
1870 1868 return self._pcache[id]
1871 1869
1872 1870 if len(id) <= 40:
1873 1871 # hex(node)[:...]
1874 1872 l = len(id) // 2 * 2 # grab an even number of digits
1875 1873 try:
1876 1874 # we're dropping the last digit, so let's check that it's hex,
1877 1875 # to avoid the expensive computation below if it's not
1878 1876 if len(id) % 2 > 0:
1879 1877 if not (id[-1] in hexdigits):
1880 1878 return None
1881 1879 prefix = bin(id[:l])
1882 1880 except binascii.Error:
1883 1881 pass
1884 1882 else:
1885 1883 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1886 1884 nl = [
1887 1885 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1888 1886 ]
1889 1887 if self.nodeconstants.nullhex.startswith(id):
1890 1888 nl.append(self.nullid)
1891 1889 if len(nl) > 0:
1892 1890 if len(nl) == 1 and not maybewdir:
1893 1891 self._pcache[id] = nl[0]
1894 1892 return nl[0]
1895 1893 raise error.AmbiguousPrefixLookupError(
1896 1894 id, self.display_id, _(b'ambiguous identifier')
1897 1895 )
1898 1896 if maybewdir:
1899 1897 raise error.WdirUnsupported
1900 1898 return None
1901 1899
1902 1900 def lookup(self, id):
1903 1901 """locate a node based on:
1904 1902 - revision number or str(revision number)
1905 1903 - nodeid or subset of hex nodeid
1906 1904 """
1907 1905 n = self._match(id)
1908 1906 if n is not None:
1909 1907 return n
1910 1908 n = self._partialmatch(id)
1911 1909 if n:
1912 1910 return n
1913 1911
1914 1912 raise error.LookupError(id, self.display_id, _(b'no match found'))
1915 1913
1916 1914 def shortest(self, node, minlength=1):
1917 1915 """Find the shortest unambiguous prefix that matches node."""
1918 1916
1919 1917 def isvalid(prefix):
1920 1918 try:
1921 1919 matchednode = self._partialmatch(prefix)
1922 1920 except error.AmbiguousPrefixLookupError:
1923 1921 return False
1924 1922 except error.WdirUnsupported:
1925 1923 # single 'ff...' match
1926 1924 return True
1927 1925 if matchednode is None:
1928 1926 raise error.LookupError(node, self.display_id, _(b'no node'))
1929 1927 return True
1930 1928
1931 1929 def maybewdir(prefix):
1932 1930 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1933 1931
1934 1932 hexnode = hex(node)
1935 1933
1936 1934 def disambiguate(hexnode, minlength):
1937 1935 """Disambiguate against wdirid."""
1938 1936 for length in range(minlength, len(hexnode) + 1):
1939 1937 prefix = hexnode[:length]
1940 1938 if not maybewdir(prefix):
1941 1939 return prefix
1942 1940
1943 1941 if not getattr(self, 'filteredrevs', None):
1944 1942 try:
1945 1943 length = max(self.index.shortest(node), minlength)
1946 1944 return disambiguate(hexnode, length)
1947 1945 except error.RevlogError:
1948 1946 if node != self.nodeconstants.wdirid:
1949 1947 raise error.LookupError(
1950 1948 node, self.display_id, _(b'no node')
1951 1949 )
1952 1950 except AttributeError:
1953 1951 # Fall through to pure code
1954 1952 pass
1955 1953
1956 1954 if node == self.nodeconstants.wdirid:
1957 1955 for length in range(minlength, len(hexnode) + 1):
1958 1956 prefix = hexnode[:length]
1959 1957 if isvalid(prefix):
1960 1958 return prefix
1961 1959
1962 1960 for length in range(minlength, len(hexnode) + 1):
1963 1961 prefix = hexnode[:length]
1964 1962 if isvalid(prefix):
1965 1963 return disambiguate(hexnode, length)
1966 1964
1967 1965 def cmp(self, node, text):
1968 1966 """compare text with a given file revision
1969 1967
1970 1968 returns True if text is different than what is stored.
1971 1969 """
1972 1970 p1, p2 = self.parents(node)
1973 1971 return storageutil.hashrevisionsha1(text, p1, p2) != node
1974 1972
1975 1973 def _getsegmentforrevs(self, startrev, endrev):
1976 1974 """Obtain a segment of raw data corresponding to a range of revisions.
1977 1975
1978 1976 Accepts the start and end revisions and an optional already-open
1979 1977 file handle to be used for reading. If the file handle is read, its
1980 1978 seek position will not be preserved.
1981 1979
1982 1980 Requests for data may be satisfied by a cache.
1983 1981
1984 1982 Returns a 2-tuple of (offset, data) for the requested range of
1985 1983 revisions. Offset is the integer offset from the beginning of the
1986 1984 revlog and data is a str or buffer of the raw byte data.
1987 1985
1988 1986 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1989 1987 to determine where each revision's data begins and ends.
1990 1988 """
1991 1989 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1992 1990 # (functions are expensive).
1993 1991 index = self.index
1994 1992 istart = index[startrev]
1995 1993 start = int(istart[0] >> 16)
1996 1994 if startrev == endrev:
1997 1995 end = start + istart[1]
1998 1996 else:
1999 1997 iend = index[endrev]
2000 1998 end = int(iend[0] >> 16) + iend[1]
2001 1999
2002 2000 if self._inline:
2003 2001 start += (startrev + 1) * self.index.entry_size
2004 2002 end += (endrev + 1) * self.index.entry_size
2005 2003 length = end - start
2006 2004
2007 2005 return start, self._segmentfile.read_chunk(start, length)
2008 2006
2009 2007 def _chunk(self, rev):
2010 2008 """Obtain a single decompressed chunk for a revision.
2011 2009
2012 2010 Accepts an integer revision and an optional already-open file handle
2013 2011 to be used for reading. If used, the seek position of the file will not
2014 2012 be preserved.
2015 2013
2016 2014 Returns a str holding uncompressed data for the requested revision.
2017 2015 """
2018 2016 compression_mode = self.index[rev][10]
2019 2017 data = self._getsegmentforrevs(rev, rev)[1]
2020 2018 if compression_mode == COMP_MODE_PLAIN:
2021 2019 return data
2022 2020 elif compression_mode == COMP_MODE_DEFAULT:
2023 2021 return self._decompressor(data)
2024 2022 elif compression_mode == COMP_MODE_INLINE:
2025 2023 return self.decompress(data)
2026 2024 else:
2027 2025 msg = b'unknown compression mode %d'
2028 2026 msg %= compression_mode
2029 2027 raise error.RevlogError(msg)
2030 2028
2031 2029 def _chunks(self, revs, targetsize=None):
2032 2030 """Obtain decompressed chunks for the specified revisions.
2033 2031
2034 2032 Accepts an iterable of numeric revisions that are assumed to be in
2035 2033 ascending order. Also accepts an optional already-open file handle
2036 2034 to be used for reading. If used, the seek position of the file will
2037 2035 not be preserved.
2038 2036
2039 2037 This function is similar to calling ``self._chunk()`` multiple times,
2040 2038 but is faster.
2041 2039
2042 2040 Returns a list with decompressed data for each requested revision.
2043 2041 """
2044 2042 if not revs:
2045 2043 return []
2046 2044 start = self.start
2047 2045 length = self.length
2048 2046 inline = self._inline
2049 2047 iosize = self.index.entry_size
2050 2048 buffer = util.buffer
2051 2049
2052 2050 l = []
2053 2051 ladd = l.append
2054 2052
2055 2053 if not self._withsparseread:
2056 2054 slicedchunks = (revs,)
2057 2055 else:
2058 2056 slicedchunks = deltautil.slicechunk(
2059 2057 self, revs, targetsize=targetsize
2060 2058 )
2061 2059
2062 2060 for revschunk in slicedchunks:
2063 2061 firstrev = revschunk[0]
2064 2062 # Skip trailing revisions with empty diff
2065 2063 for lastrev in revschunk[::-1]:
2066 2064 if length(lastrev) != 0:
2067 2065 break
2068 2066
2069 2067 try:
2070 2068 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2071 2069 except OverflowError:
2072 2070 # issue4215 - we can't cache a run of chunks greater than
2073 2071 # 2G on Windows
2074 2072 return [self._chunk(rev) for rev in revschunk]
2075 2073
2076 2074 decomp = self.decompress
2077 2075 # self._decompressor might be None, but will not be used in that case
2078 2076 def_decomp = self._decompressor
2079 2077 for rev in revschunk:
2080 2078 chunkstart = start(rev)
2081 2079 if inline:
2082 2080 chunkstart += (rev + 1) * iosize
2083 2081 chunklength = length(rev)
2084 2082 comp_mode = self.index[rev][10]
2085 2083 c = buffer(data, chunkstart - offset, chunklength)
2086 2084 if comp_mode == COMP_MODE_PLAIN:
2087 2085 ladd(c)
2088 2086 elif comp_mode == COMP_MODE_INLINE:
2089 2087 ladd(decomp(c))
2090 2088 elif comp_mode == COMP_MODE_DEFAULT:
2091 2089 ladd(def_decomp(c))
2092 2090 else:
2093 2091 msg = b'unknown compression mode %d'
2094 2092 msg %= comp_mode
2095 2093 raise error.RevlogError(msg)
2096 2094
2097 2095 return l
2098 2096
2099 2097 def deltaparent(self, rev):
2100 2098 """return deltaparent of the given revision"""
2101 2099 base = self.index[rev][3]
2102 2100 if base == rev:
2103 2101 return nullrev
2104 2102 elif self._generaldelta:
2105 2103 return base
2106 2104 else:
2107 2105 return rev - 1
2108 2106
2109 2107 def issnapshot(self, rev):
2110 2108 """tells whether rev is a snapshot"""
2111 2109 if not self._sparserevlog:
2112 2110 return self.deltaparent(rev) == nullrev
2113 2111 elif hasattr(self.index, 'issnapshot'):
2114 2112 # directly assign the method to cache the testing and access
2115 2113 self.issnapshot = self.index.issnapshot
2116 2114 return self.issnapshot(rev)
2117 2115 if rev == nullrev:
2118 2116 return True
2119 2117 entry = self.index[rev]
2120 2118 base = entry[3]
2121 2119 if base == rev:
2122 2120 return True
2123 2121 if base == nullrev:
2124 2122 return True
2125 2123 p1 = entry[5]
2126 2124 while self.length(p1) == 0:
2127 2125 b = self.deltaparent(p1)
2128 2126 if b == p1:
2129 2127 break
2130 2128 p1 = b
2131 2129 p2 = entry[6]
2132 2130 while self.length(p2) == 0:
2133 2131 b = self.deltaparent(p2)
2134 2132 if b == p2:
2135 2133 break
2136 2134 p2 = b
2137 2135 if base == p1 or base == p2:
2138 2136 return False
2139 2137 return self.issnapshot(base)
2140 2138
2141 2139 def snapshotdepth(self, rev):
2142 2140 """number of snapshot in the chain before this one"""
2143 2141 if not self.issnapshot(rev):
2144 2142 raise error.ProgrammingError(b'revision %d not a snapshot')
2145 2143 return len(self._deltachain(rev)[0]) - 1
2146 2144
2147 2145 def revdiff(self, rev1, rev2):
2148 2146 """return or calculate a delta between two revisions
2149 2147
2150 2148 The delta calculated is in binary form and is intended to be written to
2151 2149 revlog data directly. So this function needs raw revision data.
2152 2150 """
2153 2151 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2154 2152 return bytes(self._chunk(rev2))
2155 2153
2156 2154 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2157 2155
2158 2156 def revision(self, nodeorrev):
2159 2157 """return an uncompressed revision of a given node or revision
2160 2158 number.
2161 2159 """
2162 2160 return self._revisiondata(nodeorrev)
2163 2161
2164 2162 def sidedata(self, nodeorrev):
2165 2163 """a map of extra data related to the changeset but not part of the hash
2166 2164
2167 2165 This function currently return a dictionary. However, more advanced
2168 2166 mapping object will likely be used in the future for a more
2169 2167 efficient/lazy code.
2170 2168 """
2171 2169 # deal with <nodeorrev> argument type
2172 2170 if isinstance(nodeorrev, int):
2173 2171 rev = nodeorrev
2174 2172 else:
2175 2173 rev = self.rev(nodeorrev)
2176 2174 return self._sidedata(rev)
2177 2175
2178 2176 def _revisiondata(self, nodeorrev, raw=False):
2179 2177 # deal with <nodeorrev> argument type
2180 2178 if isinstance(nodeorrev, int):
2181 2179 rev = nodeorrev
2182 2180 node = self.node(rev)
2183 2181 else:
2184 2182 node = nodeorrev
2185 2183 rev = None
2186 2184
2187 2185 # fast path the special `nullid` rev
2188 2186 if node == self.nullid:
2189 2187 return b""
2190 2188
2191 2189 # ``rawtext`` is the text as stored inside the revlog. Might be the
2192 2190 # revision or might need to be processed to retrieve the revision.
2193 2191 rev, rawtext, validated = self._rawtext(node, rev)
2194 2192
2195 2193 if raw and validated:
2196 2194 # if we don't want to process the raw text and that raw
2197 2195 # text is cached, we can exit early.
2198 2196 return rawtext
2199 2197 if rev is None:
2200 2198 rev = self.rev(node)
2201 2199 # the revlog's flag for this revision
2202 2200 # (usually alter its state or content)
2203 2201 flags = self.flags(rev)
2204 2202
2205 2203 if validated and flags == REVIDX_DEFAULT_FLAGS:
2206 2204 # no extra flags set, no flag processor runs, text = rawtext
2207 2205 return rawtext
2208 2206
2209 2207 if raw:
2210 2208 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2211 2209 text = rawtext
2212 2210 else:
2213 2211 r = flagutil.processflagsread(self, rawtext, flags)
2214 2212 text, validatehash = r
2215 2213 if validatehash:
2216 2214 self.checkhash(text, node, rev=rev)
2217 2215 if not validated:
2218 2216 self._revisioncache = (node, rev, rawtext)
2219 2217
2220 2218 return text
2221 2219
2222 2220 def _rawtext(self, node, rev):
2223 2221 """return the possibly unvalidated rawtext for a revision
2224 2222
2225 2223 returns (rev, rawtext, validated)
2226 2224 """
2227 2225
2228 2226 # revision in the cache (could be useful to apply delta)
2229 2227 cachedrev = None
2230 2228 # An intermediate text to apply deltas to
2231 2229 basetext = None
2232 2230
2233 2231 # Check if we have the entry in cache
2234 2232 # The cache entry looks like (node, rev, rawtext)
2235 2233 if self._revisioncache:
2236 2234 if self._revisioncache[0] == node:
2237 2235 return (rev, self._revisioncache[2], True)
2238 2236 cachedrev = self._revisioncache[1]
2239 2237
2240 2238 if rev is None:
2241 2239 rev = self.rev(node)
2242 2240
2243 2241 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2244 2242 if stopped:
2245 2243 basetext = self._revisioncache[2]
2246 2244
2247 2245 # drop cache to save memory, the caller is expected to
2248 2246 # update self._revisioncache after validating the text
2249 2247 self._revisioncache = None
2250 2248
2251 2249 targetsize = None
2252 2250 rawsize = self.index[rev][2]
2253 2251 if 0 <= rawsize:
2254 2252 targetsize = 4 * rawsize
2255 2253
2256 2254 bins = self._chunks(chain, targetsize=targetsize)
2257 2255 if basetext is None:
2258 2256 basetext = bytes(bins[0])
2259 2257 bins = bins[1:]
2260 2258
2261 2259 rawtext = mdiff.patches(basetext, bins)
2262 2260 del basetext # let us have a chance to free memory early
2263 2261 return (rev, rawtext, False)
2264 2262
2265 2263 def _sidedata(self, rev):
2266 2264 """Return the sidedata for a given revision number."""
2267 2265 index_entry = self.index[rev]
2268 2266 sidedata_offset = index_entry[8]
2269 2267 sidedata_size = index_entry[9]
2270 2268
2271 2269 if self._inline:
2272 2270 sidedata_offset += self.index.entry_size * (1 + rev)
2273 2271 if sidedata_size == 0:
2274 2272 return {}
2275 2273
2276 2274 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2277 2275 filename = self._sidedatafile
2278 2276 end = self._docket.sidedata_end
2279 2277 offset = sidedata_offset
2280 2278 length = sidedata_size
2281 2279 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2282 2280 raise error.RevlogError(m)
2283 2281
2284 2282 comp_segment = self._segmentfile_sidedata.read_chunk(
2285 2283 sidedata_offset, sidedata_size
2286 2284 )
2287 2285
2288 2286 comp = self.index[rev][11]
2289 2287 if comp == COMP_MODE_PLAIN:
2290 2288 segment = comp_segment
2291 2289 elif comp == COMP_MODE_DEFAULT:
2292 2290 segment = self._decompressor(comp_segment)
2293 2291 elif comp == COMP_MODE_INLINE:
2294 2292 segment = self.decompress(comp_segment)
2295 2293 else:
2296 2294 msg = b'unknown compression mode %d'
2297 2295 msg %= comp
2298 2296 raise error.RevlogError(msg)
2299 2297
2300 2298 sidedata = sidedatautil.deserialize_sidedata(segment)
2301 2299 return sidedata
2302 2300
2303 2301 def rawdata(self, nodeorrev):
2304 2302 """return an uncompressed raw data of a given node or revision number."""
2305 2303 return self._revisiondata(nodeorrev, raw=True)
2306 2304
2307 2305 def hash(self, text, p1, p2):
2308 2306 """Compute a node hash.
2309 2307
2310 2308 Available as a function so that subclasses can replace the hash
2311 2309 as needed.
2312 2310 """
2313 2311 return storageutil.hashrevisionsha1(text, p1, p2)
2314 2312
2315 2313 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2316 2314 """Check node hash integrity.
2317 2315
2318 2316 Available as a function so that subclasses can extend hash mismatch
2319 2317 behaviors as needed.
2320 2318 """
2321 2319 try:
2322 2320 if p1 is None and p2 is None:
2323 2321 p1, p2 = self.parents(node)
2324 2322 if node != self.hash(text, p1, p2):
2325 2323 # Clear the revision cache on hash failure. The revision cache
2326 2324 # only stores the raw revision and clearing the cache does have
2327 2325 # the side-effect that we won't have a cache hit when the raw
2328 2326 # revision data is accessed. But this case should be rare and
2329 2327 # it is extra work to teach the cache about the hash
2330 2328 # verification state.
2331 2329 if self._revisioncache and self._revisioncache[0] == node:
2332 2330 self._revisioncache = None
2333 2331
2334 2332 revornode = rev
2335 2333 if revornode is None:
2336 2334 revornode = templatefilters.short(hex(node))
2337 2335 raise error.RevlogError(
2338 2336 _(b"integrity check failed on %s:%s")
2339 2337 % (self.display_id, pycompat.bytestr(revornode))
2340 2338 )
2341 2339 except error.RevlogError:
2342 2340 if self._censorable and storageutil.iscensoredtext(text):
2343 2341 raise error.CensoredNodeError(self.display_id, node, text)
2344 2342 raise
2345 2343
2346 2344 @property
2347 2345 def _split_index_file(self):
2348 2346 """the path where to expect the index of an ongoing splitting operation
2349 2347
2350 2348 The file will only exist if a splitting operation is in progress, but
2351 2349 it is always expected at the same location."""
2352 2350 parts = self.radix.split(b'/')
2353 2351 if len(parts) > 1:
2354 2352 # adds a '-s' prefix to the ``data/` or `meta/` base
2355 2353 head = parts[0] + b'-s'
2356 2354 mids = parts[1:-1]
2357 2355 tail = parts[-1] + b'.i'
2358 2356 pieces = [head] + mids + [tail]
2359 2357 return b'/'.join(pieces)
2360 2358 else:
2361 2359 # the revlog is stored at the root of the store (changelog or
2362 2360 # manifest), no risk of collision.
2363 2361 return self.radix + b'.i.s'
2364 2362
2365 2363 def _enforceinlinesize(self, tr, side_write=True):
2366 2364 """Check if the revlog is too big for inline and convert if so.
2367 2365
2368 2366 This should be called after revisions are added to the revlog. If the
2369 2367 revlog has grown too large to be an inline revlog, it will convert it
2370 2368 to use multiple index and data files.
2371 2369 """
2372 2370 tiprev = len(self) - 1
2373 2371 total_size = self.start(tiprev) + self.length(tiprev)
2374 2372 if not self._inline or total_size < _maxinline:
2375 2373 return
2376 2374
2377 2375 troffset = tr.findoffset(self._indexfile)
2378 2376 if troffset is None:
2379 2377 raise error.RevlogError(
2380 2378 _(b"%s not found in the transaction") % self._indexfile
2381 2379 )
2382 2380 if troffset:
2383 2381 tr.addbackup(self._indexfile, for_offset=True)
2384 2382 tr.add(self._datafile, 0)
2385 2383
2386 2384 existing_handles = False
2387 2385 if self._writinghandles is not None:
2388 2386 existing_handles = True
2389 2387 fp = self._writinghandles[0]
2390 2388 fp.flush()
2391 2389 fp.close()
2392 2390 # We can't use the cached file handle after close(). So prevent
2393 2391 # its usage.
2394 2392 self._writinghandles = None
2395 2393 self._segmentfile.writing_handle = None
2396 2394 # No need to deal with sidedata writing handle as it is only
2397 2395 # relevant with revlog-v2 which is never inline, not reaching
2398 2396 # this code
2399 2397 if side_write:
2400 2398 old_index_file_path = self._indexfile
2401 2399 new_index_file_path = self._split_index_file
2402 2400 opener = self.opener
2403 2401 weak_self = weakref.ref(self)
2404 2402
2405 2403 # the "split" index replace the real index when the transaction is finalized
2406 2404 def finalize_callback(tr):
2407 2405 opener.rename(
2408 2406 new_index_file_path,
2409 2407 old_index_file_path,
2410 2408 checkambig=True,
2411 2409 )
2412 2410 maybe_self = weak_self()
2413 2411 if maybe_self is not None:
2414 2412 maybe_self._indexfile = old_index_file_path
2415 2413
2416 2414 def abort_callback(tr):
2417 2415 maybe_self = weak_self()
2418 2416 if maybe_self is not None:
2419 2417 maybe_self._indexfile = old_index_file_path
2420 2418
2421 2419 tr.registertmp(new_index_file_path)
2422 2420 if self.target[1] is not None:
2423 2421 callback_id = b'000-revlog-split-%d-%s' % self.target
2424 2422 else:
2425 2423 callback_id = b'000-revlog-split-%d' % self.target[0]
2426 2424 tr.addfinalize(callback_id, finalize_callback)
2427 2425 tr.addabort(callback_id, abort_callback)
2428 2426
2429 2427 new_dfh = self._datafp(b'w+')
2430 2428 new_dfh.truncate(0) # drop any potentially existing data
2431 2429 try:
2432 2430 with self.reading():
2433 2431 for r in self:
2434 2432 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2435 2433 new_dfh.flush()
2436 2434
2437 2435 if side_write:
2438 2436 self._indexfile = new_index_file_path
2439 2437 with self.__index_new_fp() as fp:
2440 2438 self._format_flags &= ~FLAG_INLINE_DATA
2441 2439 self._inline = False
2442 2440 for i in self:
2443 2441 e = self.index.entry_binary(i)
2444 2442 if i == 0 and self._docket is None:
2445 2443 header = self._format_flags | self._format_version
2446 2444 header = self.index.pack_header(header)
2447 2445 e = header + e
2448 2446 fp.write(e)
2449 2447 if self._docket is not None:
2450 2448 self._docket.index_end = fp.tell()
2451 2449
2452 2450 # If we don't use side-write, the temp file replace the real
2453 2451 # index when we exit the context manager
2454 2452
2455 2453 nodemaputil.setup_persistent_nodemap(tr, self)
2456 2454 self._segmentfile = randomaccessfile.randomaccessfile(
2457 2455 self.opener,
2458 2456 self._datafile,
2459 2457 self._chunkcachesize,
2460 2458 )
2461 2459
2462 2460 if existing_handles:
2463 2461 # switched from inline to conventional reopen the index
2464 2462 ifh = self.__index_write_fp()
2465 2463 self._writinghandles = (ifh, new_dfh, None)
2466 2464 self._segmentfile.writing_handle = new_dfh
2467 2465 new_dfh = None
2468 2466 # No need to deal with sidedata writing handle as it is only
2469 2467 # relevant with revlog-v2 which is never inline, not reaching
2470 2468 # this code
2471 2469 finally:
2472 2470 if new_dfh is not None:
2473 2471 new_dfh.close()
2474 2472
2475 2473 def _nodeduplicatecallback(self, transaction, node):
2476 2474 """called when trying to add a node already stored."""
2477 2475
2478 2476 @contextlib.contextmanager
2479 2477 def reading(self):
2480 2478 """Context manager that keeps data and sidedata files open for reading"""
2481 2479 if len(self.index) == 0:
2482 2480 yield # nothing to be read
2483 2481 else:
2484 2482 with self._segmentfile.reading():
2485 2483 with self._segmentfile_sidedata.reading():
2486 2484 yield
2487 2485
2488 2486 @contextlib.contextmanager
2489 2487 def _writing(self, transaction):
2490 2488 if self._trypending:
2491 2489 msg = b'try to write in a `trypending` revlog: %s'
2492 2490 msg %= self.display_id
2493 2491 raise error.ProgrammingError(msg)
2494 2492 if self._writinghandles is not None:
2495 2493 yield
2496 2494 else:
2497 2495 ifh = dfh = sdfh = None
2498 2496 try:
2499 2497 r = len(self)
2500 2498 # opening the data file.
2501 2499 dsize = 0
2502 2500 if r:
2503 2501 dsize = self.end(r - 1)
2504 2502 dfh = None
2505 2503 if not self._inline:
2506 2504 try:
2507 2505 dfh = self._datafp(b"r+")
2508 2506 if self._docket is None:
2509 2507 dfh.seek(0, os.SEEK_END)
2510 2508 else:
2511 2509 dfh.seek(self._docket.data_end, os.SEEK_SET)
2512 2510 except FileNotFoundError:
2513 2511 dfh = self._datafp(b"w+")
2514 2512 transaction.add(self._datafile, dsize)
2515 2513 if self._sidedatafile is not None:
2516 2514 # revlog-v2 does not inline, help Pytype
2517 2515 assert dfh is not None
2518 2516 try:
2519 2517 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2520 2518 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2521 2519 except FileNotFoundError:
2522 2520 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2523 2521 transaction.add(
2524 2522 self._sidedatafile, self._docket.sidedata_end
2525 2523 )
2526 2524
2527 2525 # opening the index file.
2528 2526 isize = r * self.index.entry_size
2529 2527 ifh = self.__index_write_fp()
2530 2528 if self._inline:
2531 2529 transaction.add(self._indexfile, dsize + isize)
2532 2530 else:
2533 2531 transaction.add(self._indexfile, isize)
2534 2532 # exposing all file handle for writing.
2535 2533 self._writinghandles = (ifh, dfh, sdfh)
2536 2534 self._segmentfile.writing_handle = ifh if self._inline else dfh
2537 2535 self._segmentfile_sidedata.writing_handle = sdfh
2538 2536 yield
2539 2537 if self._docket is not None:
2540 2538 self._write_docket(transaction)
2541 2539 finally:
2542 2540 self._writinghandles = None
2543 2541 self._segmentfile.writing_handle = None
2544 2542 self._segmentfile_sidedata.writing_handle = None
2545 2543 if dfh is not None:
2546 2544 dfh.close()
2547 2545 if sdfh is not None:
2548 2546 sdfh.close()
2549 2547 # closing the index file last to avoid exposing referent to
2550 2548 # potential unflushed data content.
2551 2549 if ifh is not None:
2552 2550 ifh.close()
2553 2551
2554 2552 def _write_docket(self, transaction):
2555 2553 """write the current docket on disk
2556 2554
2557 2555 Exist as a method to help changelog to implement transaction logic
2558 2556
2559 2557 We could also imagine using the same transaction logic for all revlog
2560 2558 since docket are cheap."""
2561 2559 self._docket.write(transaction)
2562 2560
2563 2561 def addrevision(
2564 2562 self,
2565 2563 text,
2566 2564 transaction,
2567 2565 link,
2568 2566 p1,
2569 2567 p2,
2570 2568 cachedelta=None,
2571 2569 node=None,
2572 2570 flags=REVIDX_DEFAULT_FLAGS,
2573 2571 deltacomputer=None,
2574 2572 sidedata=None,
2575 2573 ):
2576 2574 """add a revision to the log
2577 2575
2578 2576 text - the revision data to add
2579 2577 transaction - the transaction object used for rollback
2580 2578 link - the linkrev data to add
2581 2579 p1, p2 - the parent nodeids of the revision
2582 2580 cachedelta - an optional precomputed delta
2583 2581 node - nodeid of revision; typically node is not specified, and it is
2584 2582 computed by default as hash(text, p1, p2), however subclasses might
2585 2583 use different hashing method (and override checkhash() in such case)
2586 2584 flags - the known flags to set on the revision
2587 2585 deltacomputer - an optional deltacomputer instance shared between
2588 2586 multiple calls
2589 2587 """
2590 2588 if link == nullrev:
2591 2589 raise error.RevlogError(
2592 2590 _(b"attempted to add linkrev -1 to %s") % self.display_id
2593 2591 )
2594 2592
2595 2593 if sidedata is None:
2596 2594 sidedata = {}
2597 2595 elif sidedata and not self.hassidedata:
2598 2596 raise error.ProgrammingError(
2599 2597 _(b"trying to add sidedata to a revlog who don't support them")
2600 2598 )
2601 2599
2602 2600 if flags:
2603 2601 node = node or self.hash(text, p1, p2)
2604 2602
2605 2603 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2606 2604
2607 2605 # If the flag processor modifies the revision data, ignore any provided
2608 2606 # cachedelta.
2609 2607 if rawtext != text:
2610 2608 cachedelta = None
2611 2609
2612 2610 if len(rawtext) > _maxentrysize:
2613 2611 raise error.RevlogError(
2614 2612 _(
2615 2613 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2616 2614 )
2617 2615 % (self.display_id, len(rawtext))
2618 2616 )
2619 2617
2620 2618 node = node or self.hash(rawtext, p1, p2)
2621 2619 rev = self.index.get_rev(node)
2622 2620 if rev is not None:
2623 2621 return rev
2624 2622
2625 2623 if validatehash:
2626 2624 self.checkhash(rawtext, node, p1=p1, p2=p2)
2627 2625
2628 2626 return self.addrawrevision(
2629 2627 rawtext,
2630 2628 transaction,
2631 2629 link,
2632 2630 p1,
2633 2631 p2,
2634 2632 node,
2635 2633 flags,
2636 2634 cachedelta=cachedelta,
2637 2635 deltacomputer=deltacomputer,
2638 2636 sidedata=sidedata,
2639 2637 )
2640 2638
2641 2639 def addrawrevision(
2642 2640 self,
2643 2641 rawtext,
2644 2642 transaction,
2645 2643 link,
2646 2644 p1,
2647 2645 p2,
2648 2646 node,
2649 2647 flags,
2650 2648 cachedelta=None,
2651 2649 deltacomputer=None,
2652 2650 sidedata=None,
2653 2651 ):
2654 2652 """add a raw revision with known flags, node and parents
2655 2653 useful when reusing a revision not stored in this revlog (ex: received
2656 2654 over wire, or read from an external bundle).
2657 2655 """
2658 2656 with self._writing(transaction):
2659 2657 return self._addrevision(
2660 2658 node,
2661 2659 rawtext,
2662 2660 transaction,
2663 2661 link,
2664 2662 p1,
2665 2663 p2,
2666 2664 flags,
2667 2665 cachedelta,
2668 2666 deltacomputer=deltacomputer,
2669 2667 sidedata=sidedata,
2670 2668 )
2671 2669
2672 2670 def compress(self, data):
2673 2671 """Generate a possibly-compressed representation of data."""
2674 2672 if not data:
2675 2673 return b'', data
2676 2674
2677 2675 compressed = self._compressor.compress(data)
2678 2676
2679 2677 if compressed:
2680 2678 # The revlog compressor added the header in the returned data.
2681 2679 return b'', compressed
2682 2680
2683 2681 if data[0:1] == b'\0':
2684 2682 return b'', data
2685 2683 return b'u', data
2686 2684
2687 2685 def decompress(self, data):
2688 2686 """Decompress a revlog chunk.
2689 2687
2690 2688 The chunk is expected to begin with a header identifying the
2691 2689 format type so it can be routed to an appropriate decompressor.
2692 2690 """
2693 2691 if not data:
2694 2692 return data
2695 2693
2696 2694 # Revlogs are read much more frequently than they are written and many
2697 2695 # chunks only take microseconds to decompress, so performance is
2698 2696 # important here.
2699 2697 #
2700 2698 # We can make a few assumptions about revlogs:
2701 2699 #
2702 2700 # 1) the majority of chunks will be compressed (as opposed to inline
2703 2701 # raw data).
2704 2702 # 2) decompressing *any* data will likely by at least 10x slower than
2705 2703 # returning raw inline data.
2706 2704 # 3) we want to prioritize common and officially supported compression
2707 2705 # engines
2708 2706 #
2709 2707 # It follows that we want to optimize for "decompress compressed data
2710 2708 # when encoded with common and officially supported compression engines"
2711 2709 # case over "raw data" and "data encoded by less common or non-official
2712 2710 # compression engines." That is why we have the inline lookup first
2713 2711 # followed by the compengines lookup.
2714 2712 #
2715 2713 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2716 2714 # compressed chunks. And this matters for changelog and manifest reads.
2717 2715 t = data[0:1]
2718 2716
2719 2717 if t == b'x':
2720 2718 try:
2721 2719 return _zlibdecompress(data)
2722 2720 except zlib.error as e:
2723 2721 raise error.RevlogError(
2724 2722 _(b'revlog decompress error: %s')
2725 2723 % stringutil.forcebytestr(e)
2726 2724 )
2727 2725 # '\0' is more common than 'u' so it goes first.
2728 2726 elif t == b'\0':
2729 2727 return data
2730 2728 elif t == b'u':
2731 2729 return util.buffer(data, 1)
2732 2730
2733 2731 compressor = self._get_decompressor(t)
2734 2732
2735 2733 return compressor.decompress(data)
2736 2734
2737 2735 def _addrevision(
2738 2736 self,
2739 2737 node,
2740 2738 rawtext,
2741 2739 transaction,
2742 2740 link,
2743 2741 p1,
2744 2742 p2,
2745 2743 flags,
2746 2744 cachedelta,
2747 2745 alwayscache=False,
2748 2746 deltacomputer=None,
2749 2747 sidedata=None,
2750 2748 ):
2751 2749 """internal function to add revisions to the log
2752 2750
2753 2751 see addrevision for argument descriptions.
2754 2752
2755 2753 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2756 2754
2757 2755 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2758 2756 be used.
2759 2757
2760 2758 invariants:
2761 2759 - rawtext is optional (can be None); if not set, cachedelta must be set.
2762 2760 if both are set, they must correspond to each other.
2763 2761 """
2764 2762 if node == self.nullid:
2765 2763 raise error.RevlogError(
2766 2764 _(b"%s: attempt to add null revision") % self.display_id
2767 2765 )
2768 2766 if (
2769 2767 node == self.nodeconstants.wdirid
2770 2768 or node in self.nodeconstants.wdirfilenodeids
2771 2769 ):
2772 2770 raise error.RevlogError(
2773 2771 _(b"%s: attempt to add wdir revision") % self.display_id
2774 2772 )
2775 2773 if self._writinghandles is None:
2776 2774 msg = b'adding revision outside `revlog._writing` context'
2777 2775 raise error.ProgrammingError(msg)
2778 2776
2779 2777 btext = [rawtext]
2780 2778
2781 2779 curr = len(self)
2782 2780 prev = curr - 1
2783 2781
2784 2782 offset = self._get_data_offset(prev)
2785 2783
2786 2784 if self._concurrencychecker:
2787 2785 ifh, dfh, sdfh = self._writinghandles
2788 2786 # XXX no checking for the sidedata file
2789 2787 if self._inline:
2790 2788 # offset is "as if" it were in the .d file, so we need to add on
2791 2789 # the size of the entry metadata.
2792 2790 self._concurrencychecker(
2793 2791 ifh, self._indexfile, offset + curr * self.index.entry_size
2794 2792 )
2795 2793 else:
2796 2794 # Entries in the .i are a consistent size.
2797 2795 self._concurrencychecker(
2798 2796 ifh, self._indexfile, curr * self.index.entry_size
2799 2797 )
2800 2798 self._concurrencychecker(dfh, self._datafile, offset)
2801 2799
2802 2800 p1r, p2r = self.rev(p1), self.rev(p2)
2803 2801
2804 2802 # full versions are inserted when the needed deltas
2805 2803 # become comparable to the uncompressed text
2806 2804 if rawtext is None:
2807 2805 # need rawtext size, before changed by flag processors, which is
2808 2806 # the non-raw size. use revlog explicitly to avoid filelog's extra
2809 2807 # logic that might remove metadata size.
2810 2808 textlen = mdiff.patchedsize(
2811 2809 revlog.size(self, cachedelta[0]), cachedelta[1]
2812 2810 )
2813 2811 else:
2814 2812 textlen = len(rawtext)
2815 2813
2816 2814 if deltacomputer is None:
2817 2815 write_debug = None
2818 2816 if self._debug_delta:
2819 2817 write_debug = transaction._report
2820 2818 deltacomputer = deltautil.deltacomputer(
2821 2819 self, write_debug=write_debug
2822 2820 )
2823 2821
2824 2822 if cachedelta is not None and len(cachedelta) == 2:
2825 2823 # If the cached delta has no information about how it should be
2826 2824 # reused, add the default reuse instruction according to the
2827 2825 # revlog's configuration.
2828 2826 if self._generaldelta and self._lazydeltabase:
2829 2827 delta_base_reuse = DELTA_BASE_REUSE_TRY
2830 2828 else:
2831 2829 delta_base_reuse = DELTA_BASE_REUSE_NO
2832 2830 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2833 2831
2834 2832 revinfo = revlogutils.revisioninfo(
2835 2833 node,
2836 2834 p1,
2837 2835 p2,
2838 2836 btext,
2839 2837 textlen,
2840 2838 cachedelta,
2841 2839 flags,
2842 2840 )
2843 2841
2844 2842 deltainfo = deltacomputer.finddeltainfo(revinfo)
2845 2843
2846 2844 compression_mode = COMP_MODE_INLINE
2847 2845 if self._docket is not None:
2848 2846 default_comp = self._docket.default_compression_header
2849 2847 r = deltautil.delta_compression(default_comp, deltainfo)
2850 2848 compression_mode, deltainfo = r
2851 2849
2852 2850 sidedata_compression_mode = COMP_MODE_INLINE
2853 2851 if sidedata and self.hassidedata:
2854 2852 sidedata_compression_mode = COMP_MODE_PLAIN
2855 2853 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2856 2854 sidedata_offset = self._docket.sidedata_end
2857 2855 h, comp_sidedata = self.compress(serialized_sidedata)
2858 2856 if (
2859 2857 h != b'u'
2860 2858 and comp_sidedata[0:1] != b'\0'
2861 2859 and len(comp_sidedata) < len(serialized_sidedata)
2862 2860 ):
2863 2861 assert not h
2864 2862 if (
2865 2863 comp_sidedata[0:1]
2866 2864 == self._docket.default_compression_header
2867 2865 ):
2868 2866 sidedata_compression_mode = COMP_MODE_DEFAULT
2869 2867 serialized_sidedata = comp_sidedata
2870 2868 else:
2871 2869 sidedata_compression_mode = COMP_MODE_INLINE
2872 2870 serialized_sidedata = comp_sidedata
2873 2871 else:
2874 2872 serialized_sidedata = b""
2875 2873 # Don't store the offset if the sidedata is empty, that way
2876 2874 # we can easily detect empty sidedata and they will be no different
2877 2875 # than ones we manually add.
2878 2876 sidedata_offset = 0
2879 2877
2880 2878 rank = RANK_UNKNOWN
2881 2879 if self._compute_rank:
2882 2880 if (p1r, p2r) == (nullrev, nullrev):
2883 2881 rank = 1
2884 2882 elif p1r != nullrev and p2r == nullrev:
2885 2883 rank = 1 + self.fast_rank(p1r)
2886 2884 elif p1r == nullrev and p2r != nullrev:
2887 2885 rank = 1 + self.fast_rank(p2r)
2888 2886 else: # merge node
2889 2887 if rustdagop is not None and self.index.rust_ext_compat:
2890 2888 rank = rustdagop.rank(self.index, p1r, p2r)
2891 2889 else:
2892 2890 pmin, pmax = sorted((p1r, p2r))
2893 2891 rank = 1 + self.fast_rank(pmax)
2894 2892 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2895 2893
2896 2894 e = revlogutils.entry(
2897 2895 flags=flags,
2898 2896 data_offset=offset,
2899 2897 data_compressed_length=deltainfo.deltalen,
2900 2898 data_uncompressed_length=textlen,
2901 2899 data_compression_mode=compression_mode,
2902 2900 data_delta_base=deltainfo.base,
2903 2901 link_rev=link,
2904 2902 parent_rev_1=p1r,
2905 2903 parent_rev_2=p2r,
2906 2904 node_id=node,
2907 2905 sidedata_offset=sidedata_offset,
2908 2906 sidedata_compressed_length=len(serialized_sidedata),
2909 2907 sidedata_compression_mode=sidedata_compression_mode,
2910 2908 rank=rank,
2911 2909 )
2912 2910
2913 2911 self.index.append(e)
2914 2912 entry = self.index.entry_binary(curr)
2915 2913 if curr == 0 and self._docket is None:
2916 2914 header = self._format_flags | self._format_version
2917 2915 header = self.index.pack_header(header)
2918 2916 entry = header + entry
2919 2917 self._writeentry(
2920 2918 transaction,
2921 2919 entry,
2922 2920 deltainfo.data,
2923 2921 link,
2924 2922 offset,
2925 2923 serialized_sidedata,
2926 2924 sidedata_offset,
2927 2925 )
2928 2926
2929 2927 rawtext = btext[0]
2930 2928
2931 2929 if alwayscache and rawtext is None:
2932 2930 rawtext = deltacomputer.buildtext(revinfo)
2933 2931
2934 2932 if type(rawtext) == bytes: # only accept immutable objects
2935 2933 self._revisioncache = (node, curr, rawtext)
2936 2934 self._chainbasecache[curr] = deltainfo.chainbase
2937 2935 return curr
2938 2936
2939 2937 def _get_data_offset(self, prev):
2940 2938 """Returns the current offset in the (in-transaction) data file.
2941 2939 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2942 2940 file to store that information: since sidedata can be rewritten to the
2943 2941 end of the data file within a transaction, you can have cases where, for
2944 2942 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2945 2943 to `n - 1`'s sidedata being written after `n`'s data.
2946 2944
2947 2945 TODO cache this in a docket file before getting out of experimental."""
2948 2946 if self._docket is None:
2949 2947 return self.end(prev)
2950 2948 else:
2951 2949 return self._docket.data_end
2952 2950
2953 2951 def _writeentry(
2954 2952 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2955 2953 ):
2956 2954 # Files opened in a+ mode have inconsistent behavior on various
2957 2955 # platforms. Windows requires that a file positioning call be made
2958 2956 # when the file handle transitions between reads and writes. See
2959 2957 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2960 2958 # platforms, Python or the platform itself can be buggy. Some versions
2961 2959 # of Solaris have been observed to not append at the end of the file
2962 2960 # if the file was seeked to before the end. See issue4943 for more.
2963 2961 #
2964 2962 # We work around this issue by inserting a seek() before writing.
2965 2963 # Note: This is likely not necessary on Python 3. However, because
2966 2964 # the file handle is reused for reads and may be seeked there, we need
2967 2965 # to be careful before changing this.
2968 2966 if self._writinghandles is None:
2969 2967 msg = b'adding revision outside `revlog._writing` context'
2970 2968 raise error.ProgrammingError(msg)
2971 2969 ifh, dfh, sdfh = self._writinghandles
2972 2970 if self._docket is None:
2973 2971 ifh.seek(0, os.SEEK_END)
2974 2972 else:
2975 2973 ifh.seek(self._docket.index_end, os.SEEK_SET)
2976 2974 if dfh:
2977 2975 if self._docket is None:
2978 2976 dfh.seek(0, os.SEEK_END)
2979 2977 else:
2980 2978 dfh.seek(self._docket.data_end, os.SEEK_SET)
2981 2979 if sdfh:
2982 2980 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2983 2981
2984 2982 curr = len(self) - 1
2985 2983 if not self._inline:
2986 2984 transaction.add(self._datafile, offset)
2987 2985 if self._sidedatafile:
2988 2986 transaction.add(self._sidedatafile, sidedata_offset)
2989 2987 transaction.add(self._indexfile, curr * len(entry))
2990 2988 if data[0]:
2991 2989 dfh.write(data[0])
2992 2990 dfh.write(data[1])
2993 2991 if sidedata:
2994 2992 sdfh.write(sidedata)
2995 2993 ifh.write(entry)
2996 2994 else:
2997 2995 offset += curr * self.index.entry_size
2998 2996 transaction.add(self._indexfile, offset)
2999 2997 ifh.write(entry)
3000 2998 ifh.write(data[0])
3001 2999 ifh.write(data[1])
3002 3000 assert not sidedata
3003 3001 self._enforceinlinesize(transaction)
3004 3002 if self._docket is not None:
3005 3003 # revlog-v2 always has 3 writing handles, help Pytype
3006 3004 wh1 = self._writinghandles[0]
3007 3005 wh2 = self._writinghandles[1]
3008 3006 wh3 = self._writinghandles[2]
3009 3007 assert wh1 is not None
3010 3008 assert wh2 is not None
3011 3009 assert wh3 is not None
3012 3010 self._docket.index_end = wh1.tell()
3013 3011 self._docket.data_end = wh2.tell()
3014 3012 self._docket.sidedata_end = wh3.tell()
3015 3013
3016 3014 nodemaputil.setup_persistent_nodemap(transaction, self)
3017 3015
3018 3016 def addgroup(
3019 3017 self,
3020 3018 deltas,
3021 3019 linkmapper,
3022 3020 transaction,
3023 3021 alwayscache=False,
3024 3022 addrevisioncb=None,
3025 3023 duplicaterevisioncb=None,
3026 3024 debug_info=None,
3027 3025 delta_base_reuse_policy=None,
3028 3026 ):
3029 3027 """
3030 3028 add a delta group
3031 3029
3032 3030 given a set of deltas, add them to the revision log. the
3033 3031 first delta is against its parent, which should be in our
3034 3032 log, the rest are against the previous delta.
3035 3033
3036 3034 If ``addrevisioncb`` is defined, it will be called with arguments of
3037 3035 this revlog and the node that was added.
3038 3036 """
3039 3037
3040 3038 if self._adding_group:
3041 3039 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3042 3040
3043 3041 # read the default delta-base reuse policy from revlog config if the
3044 3042 # group did not specify one.
3045 3043 if delta_base_reuse_policy is None:
3046 3044 if self._generaldelta and self._lazydeltabase:
3047 3045 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3048 3046 else:
3049 3047 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3050 3048
3051 3049 self._adding_group = True
3052 3050 empty = True
3053 3051 try:
3054 3052 with self._writing(transaction):
3055 3053 write_debug = None
3056 3054 if self._debug_delta:
3057 3055 write_debug = transaction._report
3058 3056 deltacomputer = deltautil.deltacomputer(
3059 3057 self,
3060 3058 write_debug=write_debug,
3061 3059 debug_info=debug_info,
3062 3060 )
3063 3061 # loop through our set of deltas
3064 3062 for data in deltas:
3065 3063 (
3066 3064 node,
3067 3065 p1,
3068 3066 p2,
3069 3067 linknode,
3070 3068 deltabase,
3071 3069 delta,
3072 3070 flags,
3073 3071 sidedata,
3074 3072 ) = data
3075 3073 link = linkmapper(linknode)
3076 3074 flags = flags or REVIDX_DEFAULT_FLAGS
3077 3075
3078 3076 rev = self.index.get_rev(node)
3079 3077 if rev is not None:
3080 3078 # this can happen if two branches make the same change
3081 3079 self._nodeduplicatecallback(transaction, rev)
3082 3080 if duplicaterevisioncb:
3083 3081 duplicaterevisioncb(self, rev)
3084 3082 empty = False
3085 3083 continue
3086 3084
3087 3085 for p in (p1, p2):
3088 3086 if not self.index.has_node(p):
3089 3087 raise error.LookupError(
3090 3088 p, self.radix, _(b'unknown parent')
3091 3089 )
3092 3090
3093 3091 if not self.index.has_node(deltabase):
3094 3092 raise error.LookupError(
3095 3093 deltabase, self.display_id, _(b'unknown delta base')
3096 3094 )
3097 3095
3098 3096 baserev = self.rev(deltabase)
3099 3097
3100 3098 if baserev != nullrev and self.iscensored(baserev):
3101 3099 # if base is censored, delta must be full replacement in a
3102 3100 # single patch operation
3103 3101 hlen = struct.calcsize(b">lll")
3104 3102 oldlen = self.rawsize(baserev)
3105 3103 newlen = len(delta) - hlen
3106 3104 if delta[:hlen] != mdiff.replacediffheader(
3107 3105 oldlen, newlen
3108 3106 ):
3109 3107 raise error.CensoredBaseError(
3110 3108 self.display_id, self.node(baserev)
3111 3109 )
3112 3110
3113 3111 if not flags and self._peek_iscensored(baserev, delta):
3114 3112 flags |= REVIDX_ISCENSORED
3115 3113
3116 3114 # We assume consumers of addrevisioncb will want to retrieve
3117 3115 # the added revision, which will require a call to
3118 3116 # revision(). revision() will fast path if there is a cache
3119 3117 # hit. So, we tell _addrevision() to always cache in this case.
3120 3118 # We're only using addgroup() in the context of changegroup
3121 3119 # generation so the revision data can always be handled as raw
3122 3120 # by the flagprocessor.
3123 3121 rev = self._addrevision(
3124 3122 node,
3125 3123 None,
3126 3124 transaction,
3127 3125 link,
3128 3126 p1,
3129 3127 p2,
3130 3128 flags,
3131 3129 (baserev, delta, delta_base_reuse_policy),
3132 3130 alwayscache=alwayscache,
3133 3131 deltacomputer=deltacomputer,
3134 3132 sidedata=sidedata,
3135 3133 )
3136 3134
3137 3135 if addrevisioncb:
3138 3136 addrevisioncb(self, rev)
3139 3137 empty = False
3140 3138 finally:
3141 3139 self._adding_group = False
3142 3140 return not empty
3143 3141
3144 3142 def iscensored(self, rev):
3145 3143 """Check if a file revision is censored."""
3146 3144 if not self._censorable:
3147 3145 return False
3148 3146
3149 3147 return self.flags(rev) & REVIDX_ISCENSORED
3150 3148
3151 3149 def _peek_iscensored(self, baserev, delta):
3152 3150 """Quickly check if a delta produces a censored revision."""
3153 3151 if not self._censorable:
3154 3152 return False
3155 3153
3156 3154 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3157 3155
3158 3156 def getstrippoint(self, minlink):
3159 3157 """find the minimum rev that must be stripped to strip the linkrev
3160 3158
3161 3159 Returns a tuple containing the minimum rev and a set of all revs that
3162 3160 have linkrevs that will be broken by this strip.
3163 3161 """
3164 3162 return storageutil.resolvestripinfo(
3165 3163 minlink,
3166 3164 len(self) - 1,
3167 3165 self.headrevs(),
3168 3166 self.linkrev,
3169 3167 self.parentrevs,
3170 3168 )
3171 3169
3172 3170 def strip(self, minlink, transaction):
3173 3171 """truncate the revlog on the first revision with a linkrev >= minlink
3174 3172
3175 3173 This function is called when we're stripping revision minlink and
3176 3174 its descendants from the repository.
3177 3175
3178 3176 We have to remove all revisions with linkrev >= minlink, because
3179 3177 the equivalent changelog revisions will be renumbered after the
3180 3178 strip.
3181 3179
3182 3180 So we truncate the revlog on the first of these revisions, and
3183 3181 trust that the caller has saved the revisions that shouldn't be
3184 3182 removed and that it'll re-add them after this truncation.
3185 3183 """
3186 3184 if len(self) == 0:
3187 3185 return
3188 3186
3189 3187 rev, _ = self.getstrippoint(minlink)
3190 3188 if rev == len(self):
3191 3189 return
3192 3190
3193 3191 # first truncate the files on disk
3194 3192 data_end = self.start(rev)
3195 3193 if not self._inline:
3196 3194 transaction.add(self._datafile, data_end)
3197 3195 end = rev * self.index.entry_size
3198 3196 else:
3199 3197 end = data_end + (rev * self.index.entry_size)
3200 3198
3201 3199 if self._sidedatafile:
3202 3200 sidedata_end = self.sidedata_cut_off(rev)
3203 3201 transaction.add(self._sidedatafile, sidedata_end)
3204 3202
3205 3203 transaction.add(self._indexfile, end)
3206 3204 if self._docket is not None:
3207 3205 # XXX we could, leverage the docket while stripping. However it is
3208 3206 # not powerfull enough at the time of this comment
3209 3207 self._docket.index_end = end
3210 3208 self._docket.data_end = data_end
3211 3209 self._docket.sidedata_end = sidedata_end
3212 3210 self._docket.write(transaction, stripping=True)
3213 3211
3214 3212 # then reset internal state in memory to forget those revisions
3215 3213 self._revisioncache = None
3216 3214 self._chaininfocache = util.lrucachedict(500)
3217 3215 self._segmentfile.clear_cache()
3218 3216 self._segmentfile_sidedata.clear_cache()
3219 3217
3220 3218 del self.index[rev:-1]
3221 3219
3222 3220 def checksize(self):
3223 3221 """Check size of index and data files
3224 3222
3225 3223 return a (dd, di) tuple.
3226 3224 - dd: extra bytes for the "data" file
3227 3225 - di: extra bytes for the "index" file
3228 3226
3229 3227 A healthy revlog will return (0, 0).
3230 3228 """
3231 3229 expected = 0
3232 3230 if len(self):
3233 3231 expected = max(0, self.end(len(self) - 1))
3234 3232
3235 3233 try:
3236 3234 with self._datafp() as f:
3237 3235 f.seek(0, io.SEEK_END)
3238 3236 actual = f.tell()
3239 3237 dd = actual - expected
3240 3238 except FileNotFoundError:
3241 3239 dd = 0
3242 3240
3243 3241 try:
3244 3242 f = self.opener(self._indexfile)
3245 3243 f.seek(0, io.SEEK_END)
3246 3244 actual = f.tell()
3247 3245 f.close()
3248 3246 s = self.index.entry_size
3249 3247 i = max(0, actual // s)
3250 3248 di = actual - (i * s)
3251 3249 if self._inline:
3252 3250 databytes = 0
3253 3251 for r in self:
3254 3252 databytes += max(0, self.length(r))
3255 3253 dd = 0
3256 3254 di = actual - len(self) * s - databytes
3257 3255 except FileNotFoundError:
3258 3256 di = 0
3259 3257
3260 3258 return (dd, di)
3261 3259
3262 3260 def files(self):
3263 3261 res = [self._indexfile]
3264 3262 if self._docket_file is None:
3265 3263 if not self._inline:
3266 3264 res.append(self._datafile)
3267 3265 else:
3268 3266 res.append(self._docket_file)
3269 3267 res.extend(self._docket.old_index_filepaths(include_empty=False))
3270 3268 if self._docket.data_end:
3271 3269 res.append(self._datafile)
3272 3270 res.extend(self._docket.old_data_filepaths(include_empty=False))
3273 3271 if self._docket.sidedata_end:
3274 3272 res.append(self._sidedatafile)
3275 3273 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3276 3274 return res
3277 3275
3278 3276 def emitrevisions(
3279 3277 self,
3280 3278 nodes,
3281 3279 nodesorder=None,
3282 3280 revisiondata=False,
3283 3281 assumehaveparentrevisions=False,
3284 3282 deltamode=repository.CG_DELTAMODE_STD,
3285 3283 sidedata_helpers=None,
3286 3284 debug_info=None,
3287 3285 ):
3288 3286 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3289 3287 raise error.ProgrammingError(
3290 3288 b'unhandled value for nodesorder: %s' % nodesorder
3291 3289 )
3292 3290
3293 3291 if nodesorder is None and not self._generaldelta:
3294 3292 nodesorder = b'storage'
3295 3293
3296 3294 if (
3297 3295 not self._storedeltachains
3298 3296 and deltamode != repository.CG_DELTAMODE_PREV
3299 3297 ):
3300 3298 deltamode = repository.CG_DELTAMODE_FULL
3301 3299
3302 3300 return storageutil.emitrevisions(
3303 3301 self,
3304 3302 nodes,
3305 3303 nodesorder,
3306 3304 revlogrevisiondelta,
3307 3305 deltaparentfn=self.deltaparent,
3308 3306 candeltafn=self._candelta,
3309 3307 rawsizefn=self.rawsize,
3310 3308 revdifffn=self.revdiff,
3311 3309 flagsfn=self.flags,
3312 3310 deltamode=deltamode,
3313 3311 revisiondata=revisiondata,
3314 3312 assumehaveparentrevisions=assumehaveparentrevisions,
3315 3313 sidedata_helpers=sidedata_helpers,
3316 3314 debug_info=debug_info,
3317 3315 )
3318 3316
3319 3317 DELTAREUSEALWAYS = b'always'
3320 3318 DELTAREUSESAMEREVS = b'samerevs'
3321 3319 DELTAREUSENEVER = b'never'
3322 3320
3323 3321 DELTAREUSEFULLADD = b'fulladd'
3324 3322
3325 3323 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3326 3324
3327 3325 def clone(
3328 3326 self,
3329 3327 tr,
3330 3328 destrevlog,
3331 3329 addrevisioncb=None,
3332 3330 deltareuse=DELTAREUSESAMEREVS,
3333 3331 forcedeltabothparents=None,
3334 3332 sidedata_helpers=None,
3335 3333 ):
3336 3334 """Copy this revlog to another, possibly with format changes.
3337 3335
3338 3336 The destination revlog will contain the same revisions and nodes.
3339 3337 However, it may not be bit-for-bit identical due to e.g. delta encoding
3340 3338 differences.
3341 3339
3342 3340 The ``deltareuse`` argument control how deltas from the existing revlog
3343 3341 are preserved in the destination revlog. The argument can have the
3344 3342 following values:
3345 3343
3346 3344 DELTAREUSEALWAYS
3347 3345 Deltas will always be reused (if possible), even if the destination
3348 3346 revlog would not select the same revisions for the delta. This is the
3349 3347 fastest mode of operation.
3350 3348 DELTAREUSESAMEREVS
3351 3349 Deltas will be reused if the destination revlog would pick the same
3352 3350 revisions for the delta. This mode strikes a balance between speed
3353 3351 and optimization.
3354 3352 DELTAREUSENEVER
3355 3353 Deltas will never be reused. This is the slowest mode of execution.
3356 3354 This mode can be used to recompute deltas (e.g. if the diff/delta
3357 3355 algorithm changes).
3358 3356 DELTAREUSEFULLADD
3359 3357 Revision will be re-added as if their were new content. This is
3360 3358 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3361 3359 eg: large file detection and handling.
3362 3360
3363 3361 Delta computation can be slow, so the choice of delta reuse policy can
3364 3362 significantly affect run time.
3365 3363
3366 3364 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3367 3365 two extremes. Deltas will be reused if they are appropriate. But if the
3368 3366 delta could choose a better revision, it will do so. This means if you
3369 3367 are converting a non-generaldelta revlog to a generaldelta revlog,
3370 3368 deltas will be recomputed if the delta's parent isn't a parent of the
3371 3369 revision.
3372 3370
3373 3371 In addition to the delta policy, the ``forcedeltabothparents``
3374 3372 argument controls whether to force compute deltas against both parents
3375 3373 for merges. By default, the current default is used.
3376 3374
3377 3375 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3378 3376 `sidedata_helpers`.
3379 3377 """
3380 3378 if deltareuse not in self.DELTAREUSEALL:
3381 3379 raise ValueError(
3382 3380 _(b'value for deltareuse invalid: %s') % deltareuse
3383 3381 )
3384 3382
3385 3383 if len(destrevlog):
3386 3384 raise ValueError(_(b'destination revlog is not empty'))
3387 3385
3388 3386 if getattr(self, 'filteredrevs', None):
3389 3387 raise ValueError(_(b'source revlog has filtered revisions'))
3390 3388 if getattr(destrevlog, 'filteredrevs', None):
3391 3389 raise ValueError(_(b'destination revlog has filtered revisions'))
3392 3390
3393 3391 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3394 3392 # if possible.
3395 3393 old_delta_config = destrevlog.delta_config
3396 3394 destrevlog.delta_config = destrevlog.delta_config.copy()
3397 3395
3398 3396 try:
3399 3397 if deltareuse == self.DELTAREUSEALWAYS:
3400 3398 destrevlog.delta_config.lazy_delta_base = True
3401 3399 destrevlog.delta_config.lazy_delta = True
3402 3400 elif deltareuse == self.DELTAREUSESAMEREVS:
3403 3401 destrevlog.delta_config.lazy_delta_base = False
3404 3402 destrevlog.delta_config.lazy_delta = True
3405 3403 elif deltareuse == self.DELTAREUSENEVER:
3406 3404 destrevlog.delta_config.lazy_delta_base = False
3407 3405 destrevlog.delta_config.lazy_delta = False
3408 3406
3409 3407 delta_both_parents = (
3410 3408 forcedeltabothparents or old_delta_config.delta_both_parents
3411 3409 )
3412 3410 destrevlog.delta_config.delta_both_parents = delta_both_parents
3413 3411
3414 3412 with self.reading():
3415 3413 self._clone(
3416 3414 tr,
3417 3415 destrevlog,
3418 3416 addrevisioncb,
3419 3417 deltareuse,
3420 3418 forcedeltabothparents,
3421 3419 sidedata_helpers,
3422 3420 )
3423 3421
3424 3422 finally:
3425 3423 destrevlog.delta_config = old_delta_config
3426 3424
3427 3425 def _clone(
3428 3426 self,
3429 3427 tr,
3430 3428 destrevlog,
3431 3429 addrevisioncb,
3432 3430 deltareuse,
3433 3431 forcedeltabothparents,
3434 3432 sidedata_helpers,
3435 3433 ):
3436 3434 """perform the core duty of `revlog.clone` after parameter processing"""
3437 3435 write_debug = None
3438 3436 if self._debug_delta:
3439 3437 write_debug = tr._report
3440 3438 deltacomputer = deltautil.deltacomputer(
3441 3439 destrevlog,
3442 3440 write_debug=write_debug,
3443 3441 )
3444 3442 index = self.index
3445 3443 for rev in self:
3446 3444 entry = index[rev]
3447 3445
3448 3446 # Some classes override linkrev to take filtered revs into
3449 3447 # account. Use raw entry from index.
3450 3448 flags = entry[0] & 0xFFFF
3451 3449 linkrev = entry[4]
3452 3450 p1 = index[entry[5]][7]
3453 3451 p2 = index[entry[6]][7]
3454 3452 node = entry[7]
3455 3453
3456 3454 # (Possibly) reuse the delta from the revlog if allowed and
3457 3455 # the revlog chunk is a delta.
3458 3456 cachedelta = None
3459 3457 rawtext = None
3460 3458 if deltareuse == self.DELTAREUSEFULLADD:
3461 3459 text = self._revisiondata(rev)
3462 3460 sidedata = self.sidedata(rev)
3463 3461
3464 3462 if sidedata_helpers is not None:
3465 3463 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3466 3464 self, sidedata_helpers, sidedata, rev
3467 3465 )
3468 3466 flags = flags | new_flags[0] & ~new_flags[1]
3469 3467
3470 3468 destrevlog.addrevision(
3471 3469 text,
3472 3470 tr,
3473 3471 linkrev,
3474 3472 p1,
3475 3473 p2,
3476 3474 cachedelta=cachedelta,
3477 3475 node=node,
3478 3476 flags=flags,
3479 3477 deltacomputer=deltacomputer,
3480 3478 sidedata=sidedata,
3481 3479 )
3482 3480 else:
3483 3481 if destrevlog._lazydelta:
3484 3482 dp = self.deltaparent(rev)
3485 3483 if dp != nullrev:
3486 3484 cachedelta = (dp, bytes(self._chunk(rev)))
3487 3485
3488 3486 sidedata = None
3489 3487 if not cachedelta:
3490 3488 rawtext = self._revisiondata(rev)
3491 3489 sidedata = self.sidedata(rev)
3492 3490 if sidedata is None:
3493 3491 sidedata = self.sidedata(rev)
3494 3492
3495 3493 if sidedata_helpers is not None:
3496 3494 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3497 3495 self, sidedata_helpers, sidedata, rev
3498 3496 )
3499 3497 flags = flags | new_flags[0] & ~new_flags[1]
3500 3498
3501 3499 with destrevlog._writing(tr):
3502 3500 destrevlog._addrevision(
3503 3501 node,
3504 3502 rawtext,
3505 3503 tr,
3506 3504 linkrev,
3507 3505 p1,
3508 3506 p2,
3509 3507 flags,
3510 3508 cachedelta,
3511 3509 deltacomputer=deltacomputer,
3512 3510 sidedata=sidedata,
3513 3511 )
3514 3512
3515 3513 if addrevisioncb:
3516 3514 addrevisioncb(self, rev, node)
3517 3515
3518 3516 def censorrevision(self, tr, censornode, tombstone=b''):
3519 3517 if self._format_version == REVLOGV0:
3520 3518 raise error.RevlogError(
3521 3519 _(b'cannot censor with version %d revlogs')
3522 3520 % self._format_version
3523 3521 )
3524 3522 elif self._format_version == REVLOGV1:
3525 3523 rewrite.v1_censor(self, tr, censornode, tombstone)
3526 3524 else:
3527 3525 rewrite.v2_censor(self, tr, censornode, tombstone)
3528 3526
3529 3527 def verifyintegrity(self, state):
3530 3528 """Verifies the integrity of the revlog.
3531 3529
3532 3530 Yields ``revlogproblem`` instances describing problems that are
3533 3531 found.
3534 3532 """
3535 3533 dd, di = self.checksize()
3536 3534 if dd:
3537 3535 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3538 3536 if di:
3539 3537 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3540 3538
3541 3539 version = self._format_version
3542 3540
3543 3541 # The verifier tells us what version revlog we should be.
3544 3542 if version != state[b'expectedversion']:
3545 3543 yield revlogproblem(
3546 3544 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3547 3545 % (self.display_id, version, state[b'expectedversion'])
3548 3546 )
3549 3547
3550 3548 state[b'skipread'] = set()
3551 3549 state[b'safe_renamed'] = set()
3552 3550
3553 3551 for rev in self:
3554 3552 node = self.node(rev)
3555 3553
3556 3554 # Verify contents. 4 cases to care about:
3557 3555 #
3558 3556 # common: the most common case
3559 3557 # rename: with a rename
3560 3558 # meta: file content starts with b'\1\n', the metadata
3561 3559 # header defined in filelog.py, but without a rename
3562 3560 # ext: content stored externally
3563 3561 #
3564 3562 # More formally, their differences are shown below:
3565 3563 #
3566 3564 # | common | rename | meta | ext
3567 3565 # -------------------------------------------------------
3568 3566 # flags() | 0 | 0 | 0 | not 0
3569 3567 # renamed() | False | True | False | ?
3570 3568 # rawtext[0:2]=='\1\n'| False | True | True | ?
3571 3569 #
3572 3570 # "rawtext" means the raw text stored in revlog data, which
3573 3571 # could be retrieved by "rawdata(rev)". "text"
3574 3572 # mentioned below is "revision(rev)".
3575 3573 #
3576 3574 # There are 3 different lengths stored physically:
3577 3575 # 1. L1: rawsize, stored in revlog index
3578 3576 # 2. L2: len(rawtext), stored in revlog data
3579 3577 # 3. L3: len(text), stored in revlog data if flags==0, or
3580 3578 # possibly somewhere else if flags!=0
3581 3579 #
3582 3580 # L1 should be equal to L2. L3 could be different from them.
3583 3581 # "text" may or may not affect commit hash depending on flag
3584 3582 # processors (see flagutil.addflagprocessor).
3585 3583 #
3586 3584 # | common | rename | meta | ext
3587 3585 # -------------------------------------------------
3588 3586 # rawsize() | L1 | L1 | L1 | L1
3589 3587 # size() | L1 | L2-LM | L1(*) | L1 (?)
3590 3588 # len(rawtext) | L2 | L2 | L2 | L2
3591 3589 # len(text) | L2 | L2 | L2 | L3
3592 3590 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3593 3591 #
3594 3592 # LM: length of metadata, depending on rawtext
3595 3593 # (*): not ideal, see comment in filelog.size
3596 3594 # (?): could be "- len(meta)" if the resolved content has
3597 3595 # rename metadata
3598 3596 #
3599 3597 # Checks needed to be done:
3600 3598 # 1. length check: L1 == L2, in all cases.
3601 3599 # 2. hash check: depending on flag processor, we may need to
3602 3600 # use either "text" (external), or "rawtext" (in revlog).
3603 3601
3604 3602 try:
3605 3603 skipflags = state.get(b'skipflags', 0)
3606 3604 if skipflags:
3607 3605 skipflags &= self.flags(rev)
3608 3606
3609 3607 _verify_revision(self, skipflags, state, node)
3610 3608
3611 3609 l1 = self.rawsize(rev)
3612 3610 l2 = len(self.rawdata(node))
3613 3611
3614 3612 if l1 != l2:
3615 3613 yield revlogproblem(
3616 3614 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3617 3615 node=node,
3618 3616 )
3619 3617
3620 3618 except error.CensoredNodeError:
3621 3619 if state[b'erroroncensored']:
3622 3620 yield revlogproblem(
3623 3621 error=_(b'censored file data'), node=node
3624 3622 )
3625 3623 state[b'skipread'].add(node)
3626 3624 except Exception as e:
3627 3625 yield revlogproblem(
3628 3626 error=_(b'unpacking %s: %s')
3629 3627 % (short(node), stringutil.forcebytestr(e)),
3630 3628 node=node,
3631 3629 )
3632 3630 state[b'skipread'].add(node)
3633 3631
3634 3632 def storageinfo(
3635 3633 self,
3636 3634 exclusivefiles=False,
3637 3635 sharedfiles=False,
3638 3636 revisionscount=False,
3639 3637 trackedsize=False,
3640 3638 storedsize=False,
3641 3639 ):
3642 3640 d = {}
3643 3641
3644 3642 if exclusivefiles:
3645 3643 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3646 3644 if not self._inline:
3647 3645 d[b'exclusivefiles'].append((self.opener, self._datafile))
3648 3646
3649 3647 if sharedfiles:
3650 3648 d[b'sharedfiles'] = []
3651 3649
3652 3650 if revisionscount:
3653 3651 d[b'revisionscount'] = len(self)
3654 3652
3655 3653 if trackedsize:
3656 3654 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3657 3655
3658 3656 if storedsize:
3659 3657 d[b'storedsize'] = sum(
3660 3658 self.opener.stat(path).st_size for path in self.files()
3661 3659 )
3662 3660
3663 3661 return d
3664 3662
3665 3663 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3666 3664 if not self.hassidedata:
3667 3665 return
3668 3666 # revlog formats with sidedata support does not support inline
3669 3667 assert not self._inline
3670 3668 if not helpers[1] and not helpers[2]:
3671 3669 # Nothing to generate or remove
3672 3670 return
3673 3671
3674 3672 new_entries = []
3675 3673 # append the new sidedata
3676 3674 with self._writing(transaction):
3677 3675 ifh, dfh, sdfh = self._writinghandles
3678 3676 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3679 3677
3680 3678 current_offset = sdfh.tell()
3681 3679 for rev in range(startrev, endrev + 1):
3682 3680 entry = self.index[rev]
3683 3681 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3684 3682 store=self,
3685 3683 sidedata_helpers=helpers,
3686 3684 sidedata={},
3687 3685 rev=rev,
3688 3686 )
3689 3687
3690 3688 serialized_sidedata = sidedatautil.serialize_sidedata(
3691 3689 new_sidedata
3692 3690 )
3693 3691
3694 3692 sidedata_compression_mode = COMP_MODE_INLINE
3695 3693 if serialized_sidedata and self.hassidedata:
3696 3694 sidedata_compression_mode = COMP_MODE_PLAIN
3697 3695 h, comp_sidedata = self.compress(serialized_sidedata)
3698 3696 if (
3699 3697 h != b'u'
3700 3698 and comp_sidedata[0] != b'\0'
3701 3699 and len(comp_sidedata) < len(serialized_sidedata)
3702 3700 ):
3703 3701 assert not h
3704 3702 if (
3705 3703 comp_sidedata[0]
3706 3704 == self._docket.default_compression_header
3707 3705 ):
3708 3706 sidedata_compression_mode = COMP_MODE_DEFAULT
3709 3707 serialized_sidedata = comp_sidedata
3710 3708 else:
3711 3709 sidedata_compression_mode = COMP_MODE_INLINE
3712 3710 serialized_sidedata = comp_sidedata
3713 3711 if entry[8] != 0 or entry[9] != 0:
3714 3712 # rewriting entries that already have sidedata is not
3715 3713 # supported yet, because it introduces garbage data in the
3716 3714 # revlog.
3717 3715 msg = b"rewriting existing sidedata is not supported yet"
3718 3716 raise error.Abort(msg)
3719 3717
3720 3718 # Apply (potential) flags to add and to remove after running
3721 3719 # the sidedata helpers
3722 3720 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3723 3721 entry_update = (
3724 3722 current_offset,
3725 3723 len(serialized_sidedata),
3726 3724 new_offset_flags,
3727 3725 sidedata_compression_mode,
3728 3726 )
3729 3727
3730 3728 # the sidedata computation might have move the file cursors around
3731 3729 sdfh.seek(current_offset, os.SEEK_SET)
3732 3730 sdfh.write(serialized_sidedata)
3733 3731 new_entries.append(entry_update)
3734 3732 current_offset += len(serialized_sidedata)
3735 3733 self._docket.sidedata_end = sdfh.tell()
3736 3734
3737 3735 # rewrite the new index entries
3738 3736 ifh.seek(startrev * self.index.entry_size)
3739 3737 for i, e in enumerate(new_entries):
3740 3738 rev = startrev + i
3741 3739 self.index.replace_sidedata_info(rev, *e)
3742 3740 packed = self.index.entry_binary(rev)
3743 3741 if rev == 0 and self._docket is None:
3744 3742 header = self._format_flags | self._format_version
3745 3743 header = self.index.pack_header(header)
3746 3744 packed = header + packed
3747 3745 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now