##// END OF EJS Templates
revlog: skip opener option to pass delta_both_parents value...
marmoute -
r51926:cd16b689 default
parent child Browse files
Show More
@@ -1,4047 +1,4046 b''
1 1 # localrepo.py - read/write repository class for mercurial
2 2 # coding: utf-8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 import functools
11 11 import os
12 12 import random
13 13 import re
14 14 import sys
15 15 import time
16 16 import weakref
17 17
18 18 from concurrent import futures
19 19 from typing import (
20 20 Optional,
21 21 )
22 22
23 23 from .i18n import _
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullrev,
28 28 sha1nodeconstants,
29 29 short,
30 30 )
31 31 from . import (
32 32 bookmarks,
33 33 branchmap,
34 34 bundle2,
35 35 bundlecaches,
36 36 changegroup,
37 37 color,
38 38 commit,
39 39 context,
40 40 dirstate,
41 41 discovery,
42 42 encoding,
43 43 error,
44 44 exchange,
45 45 extensions,
46 46 filelog,
47 47 hook,
48 48 lock as lockmod,
49 49 match as matchmod,
50 50 mergestate as mergestatemod,
51 51 mergeutil,
52 52 namespaces,
53 53 narrowspec,
54 54 obsolete,
55 55 pathutil,
56 56 phases,
57 57 policy,
58 58 pushkey,
59 59 pycompat,
60 60 rcutil,
61 61 repoview,
62 62 requirements as requirementsmod,
63 63 revlog,
64 64 revset,
65 65 revsetlang,
66 66 scmutil,
67 67 sparse,
68 68 store as storemod,
69 69 subrepoutil,
70 70 tags as tagsmod,
71 71 transaction,
72 72 txnutil,
73 73 util,
74 74 vfs as vfsmod,
75 75 wireprototypes,
76 76 )
77 77
78 78 from .interfaces import (
79 79 repository,
80 80 util as interfaceutil,
81 81 )
82 82
83 83 from .utils import (
84 84 hashutil,
85 85 procutil,
86 86 stringutil,
87 87 urlutil,
88 88 )
89 89
90 90 from .revlogutils import (
91 91 concurrency_checker as revlogchecker,
92 92 constants as revlogconst,
93 93 sidedata as sidedatamod,
94 94 )
95 95
96 96 release = lockmod.release
97 97 urlerr = util.urlerr
98 98 urlreq = util.urlreq
99 99
100 100 RE_SKIP_DIRSTATE_ROLLBACK = re.compile(
101 101 b"^((dirstate|narrowspec.dirstate).*|branch$)"
102 102 )
103 103
104 104 # set of (path, vfs-location) tuples. vfs-location is:
105 105 # - 'plain for vfs relative paths
106 106 # - '' for svfs relative paths
107 107 _cachedfiles = set()
108 108
109 109
110 110 class _basefilecache(scmutil.filecache):
111 111 """All filecache usage on repo are done for logic that should be unfiltered"""
112 112
113 113 def __get__(self, repo, type=None):
114 114 if repo is None:
115 115 return self
116 116 # proxy to unfiltered __dict__ since filtered repo has no entry
117 117 unfi = repo.unfiltered()
118 118 try:
119 119 return unfi.__dict__[self.sname]
120 120 except KeyError:
121 121 pass
122 122 return super(_basefilecache, self).__get__(unfi, type)
123 123
124 124 def set(self, repo, value):
125 125 return super(_basefilecache, self).set(repo.unfiltered(), value)
126 126
127 127
128 128 class repofilecache(_basefilecache):
129 129 """filecache for files in .hg but outside of .hg/store"""
130 130
131 131 def __init__(self, *paths):
132 132 super(repofilecache, self).__init__(*paths)
133 133 for path in paths:
134 134 _cachedfiles.add((path, b'plain'))
135 135
136 136 def join(self, obj, fname):
137 137 return obj.vfs.join(fname)
138 138
139 139
140 140 class storecache(_basefilecache):
141 141 """filecache for files in the store"""
142 142
143 143 def __init__(self, *paths):
144 144 super(storecache, self).__init__(*paths)
145 145 for path in paths:
146 146 _cachedfiles.add((path, b''))
147 147
148 148 def join(self, obj, fname):
149 149 return obj.sjoin(fname)
150 150
151 151
152 152 class changelogcache(storecache):
153 153 """filecache for the changelog"""
154 154
155 155 def __init__(self):
156 156 super(changelogcache, self).__init__()
157 157 _cachedfiles.add((b'00changelog.i', b''))
158 158 _cachedfiles.add((b'00changelog.n', b''))
159 159
160 160 def tracked_paths(self, obj):
161 161 paths = [self.join(obj, b'00changelog.i')]
162 162 if obj.store.opener.options.get(b'persistent-nodemap', False):
163 163 paths.append(self.join(obj, b'00changelog.n'))
164 164 return paths
165 165
166 166
167 167 class manifestlogcache(storecache):
168 168 """filecache for the manifestlog"""
169 169
170 170 def __init__(self):
171 171 super(manifestlogcache, self).__init__()
172 172 _cachedfiles.add((b'00manifest.i', b''))
173 173 _cachedfiles.add((b'00manifest.n', b''))
174 174
175 175 def tracked_paths(self, obj):
176 176 paths = [self.join(obj, b'00manifest.i')]
177 177 if obj.store.opener.options.get(b'persistent-nodemap', False):
178 178 paths.append(self.join(obj, b'00manifest.n'))
179 179 return paths
180 180
181 181
182 182 class mixedrepostorecache(_basefilecache):
183 183 """filecache for a mix files in .hg/store and outside"""
184 184
185 185 def __init__(self, *pathsandlocations):
186 186 # scmutil.filecache only uses the path for passing back into our
187 187 # join(), so we can safely pass a list of paths and locations
188 188 super(mixedrepostorecache, self).__init__(*pathsandlocations)
189 189 _cachedfiles.update(pathsandlocations)
190 190
191 191 def join(self, obj, fnameandlocation):
192 192 fname, location = fnameandlocation
193 193 if location == b'plain':
194 194 return obj.vfs.join(fname)
195 195 else:
196 196 if location != b'':
197 197 raise error.ProgrammingError(
198 198 b'unexpected location: %s' % location
199 199 )
200 200 return obj.sjoin(fname)
201 201
202 202
203 203 def isfilecached(repo, name):
204 204 """check if a repo has already cached "name" filecache-ed property
205 205
206 206 This returns (cachedobj-or-None, iscached) tuple.
207 207 """
208 208 cacheentry = repo.unfiltered()._filecache.get(name, None)
209 209 if not cacheentry:
210 210 return None, False
211 211 return cacheentry.obj, True
212 212
213 213
214 214 class unfilteredpropertycache(util.propertycache):
215 215 """propertycache that apply to unfiltered repo only"""
216 216
217 217 def __get__(self, repo, type=None):
218 218 unfi = repo.unfiltered()
219 219 if unfi is repo:
220 220 return super(unfilteredpropertycache, self).__get__(unfi)
221 221 return getattr(unfi, self.name)
222 222
223 223
224 224 class filteredpropertycache(util.propertycache):
225 225 """propertycache that must take filtering in account"""
226 226
227 227 def cachevalue(self, obj, value):
228 228 object.__setattr__(obj, self.name, value)
229 229
230 230
231 231 def hasunfilteredcache(repo, name):
232 232 """check if a repo has an unfilteredpropertycache value for <name>"""
233 233 return name in vars(repo.unfiltered())
234 234
235 235
236 236 def unfilteredmethod(orig):
237 237 """decorate method that always need to be run on unfiltered version"""
238 238
239 239 @functools.wraps(orig)
240 240 def wrapper(repo, *args, **kwargs):
241 241 return orig(repo.unfiltered(), *args, **kwargs)
242 242
243 243 return wrapper
244 244
245 245
246 246 moderncaps = {
247 247 b'lookup',
248 248 b'branchmap',
249 249 b'pushkey',
250 250 b'known',
251 251 b'getbundle',
252 252 b'unbundle',
253 253 }
254 254 legacycaps = moderncaps.union({b'changegroupsubset'})
255 255
256 256
257 257 @interfaceutil.implementer(repository.ipeercommandexecutor)
258 258 class localcommandexecutor:
259 259 def __init__(self, peer):
260 260 self._peer = peer
261 261 self._sent = False
262 262 self._closed = False
263 263
264 264 def __enter__(self):
265 265 return self
266 266
267 267 def __exit__(self, exctype, excvalue, exctb):
268 268 self.close()
269 269
270 270 def callcommand(self, command, args):
271 271 if self._sent:
272 272 raise error.ProgrammingError(
273 273 b'callcommand() cannot be used after sendcommands()'
274 274 )
275 275
276 276 if self._closed:
277 277 raise error.ProgrammingError(
278 278 b'callcommand() cannot be used after close()'
279 279 )
280 280
281 281 # We don't need to support anything fancy. Just call the named
282 282 # method on the peer and return a resolved future.
283 283 fn = getattr(self._peer, pycompat.sysstr(command))
284 284
285 285 f = futures.Future()
286 286
287 287 try:
288 288 result = fn(**pycompat.strkwargs(args))
289 289 except Exception:
290 290 pycompat.future_set_exception_info(f, sys.exc_info()[1:])
291 291 else:
292 292 f.set_result(result)
293 293
294 294 return f
295 295
296 296 def sendcommands(self):
297 297 self._sent = True
298 298
299 299 def close(self):
300 300 self._closed = True
301 301
302 302
303 303 @interfaceutil.implementer(repository.ipeercommands)
304 304 class localpeer(repository.peer):
305 305 '''peer for a local repo; reflects only the most recent API'''
306 306
307 307 def __init__(self, repo, caps=None, path=None, remotehidden=False):
308 308 super(localpeer, self).__init__(
309 309 repo.ui, path=path, remotehidden=remotehidden
310 310 )
311 311
312 312 if caps is None:
313 313 caps = moderncaps.copy()
314 314 if remotehidden:
315 315 self._repo = repo.filtered(b'served.hidden')
316 316 else:
317 317 self._repo = repo.filtered(b'served')
318 318 if repo._wanted_sidedata:
319 319 formatted = bundle2.format_remote_wanted_sidedata(repo)
320 320 caps.add(b'exp-wanted-sidedata=' + formatted)
321 321
322 322 self._caps = repo._restrictcapabilities(caps)
323 323
324 324 # Begin of _basepeer interface.
325 325
326 326 def url(self):
327 327 return self._repo.url()
328 328
329 329 def local(self):
330 330 return self._repo
331 331
332 332 def canpush(self):
333 333 return True
334 334
335 335 def close(self):
336 336 self._repo.close()
337 337
338 338 # End of _basepeer interface.
339 339
340 340 # Begin of _basewirecommands interface.
341 341
342 342 def branchmap(self):
343 343 return self._repo.branchmap()
344 344
345 345 def capabilities(self):
346 346 return self._caps
347 347
348 348 def get_cached_bundle_inline(self, path):
349 349 # not needed with local peer
350 350 raise NotImplementedError
351 351
352 352 def clonebundles(self):
353 353 return bundlecaches.get_manifest(self._repo)
354 354
355 355 def debugwireargs(self, one, two, three=None, four=None, five=None):
356 356 """Used to test argument passing over the wire"""
357 357 return b"%s %s %s %s %s" % (
358 358 one,
359 359 two,
360 360 pycompat.bytestr(three),
361 361 pycompat.bytestr(four),
362 362 pycompat.bytestr(five),
363 363 )
364 364
365 365 def getbundle(
366 366 self,
367 367 source,
368 368 heads=None,
369 369 common=None,
370 370 bundlecaps=None,
371 371 remote_sidedata=None,
372 372 **kwargs
373 373 ):
374 374 chunks = exchange.getbundlechunks(
375 375 self._repo,
376 376 source,
377 377 heads=heads,
378 378 common=common,
379 379 bundlecaps=bundlecaps,
380 380 remote_sidedata=remote_sidedata,
381 381 **kwargs
382 382 )[1]
383 383 cb = util.chunkbuffer(chunks)
384 384
385 385 if exchange.bundle2requested(bundlecaps):
386 386 # When requesting a bundle2, getbundle returns a stream to make the
387 387 # wire level function happier. We need to build a proper object
388 388 # from it in local peer.
389 389 return bundle2.getunbundler(self.ui, cb)
390 390 else:
391 391 return changegroup.getunbundler(b'01', cb, None)
392 392
393 393 def heads(self):
394 394 return self._repo.heads()
395 395
396 396 def known(self, nodes):
397 397 return self._repo.known(nodes)
398 398
399 399 def listkeys(self, namespace):
400 400 return self._repo.listkeys(namespace)
401 401
402 402 def lookup(self, key):
403 403 return self._repo.lookup(key)
404 404
405 405 def pushkey(self, namespace, key, old, new):
406 406 return self._repo.pushkey(namespace, key, old, new)
407 407
408 408 def stream_out(self):
409 409 raise error.Abort(_(b'cannot perform stream clone against local peer'))
410 410
411 411 def unbundle(self, bundle, heads, url):
412 412 """apply a bundle on a repo
413 413
414 414 This function handles the repo locking itself."""
415 415 try:
416 416 try:
417 417 bundle = exchange.readbundle(self.ui, bundle, None)
418 418 ret = exchange.unbundle(self._repo, bundle, heads, b'push', url)
419 419 if hasattr(ret, 'getchunks'):
420 420 # This is a bundle20 object, turn it into an unbundler.
421 421 # This little dance should be dropped eventually when the
422 422 # API is finally improved.
423 423 stream = util.chunkbuffer(ret.getchunks())
424 424 ret = bundle2.getunbundler(self.ui, stream)
425 425 return ret
426 426 except Exception as exc:
427 427 # If the exception contains output salvaged from a bundle2
428 428 # reply, we need to make sure it is printed before continuing
429 429 # to fail. So we build a bundle2 with such output and consume
430 430 # it directly.
431 431 #
432 432 # This is not very elegant but allows a "simple" solution for
433 433 # issue4594
434 434 output = getattr(exc, '_bundle2salvagedoutput', ())
435 435 if output:
436 436 bundler = bundle2.bundle20(self._repo.ui)
437 437 for out in output:
438 438 bundler.addpart(out)
439 439 stream = util.chunkbuffer(bundler.getchunks())
440 440 b = bundle2.getunbundler(self.ui, stream)
441 441 bundle2.processbundle(self._repo, b)
442 442 raise
443 443 except error.PushRaced as exc:
444 444 raise error.ResponseError(
445 445 _(b'push failed:'), stringutil.forcebytestr(exc)
446 446 )
447 447
448 448 # End of _basewirecommands interface.
449 449
450 450 # Begin of peer interface.
451 451
452 452 def commandexecutor(self):
453 453 return localcommandexecutor(self)
454 454
455 455 # End of peer interface.
456 456
457 457
458 458 @interfaceutil.implementer(repository.ipeerlegacycommands)
459 459 class locallegacypeer(localpeer):
460 460 """peer extension which implements legacy methods too; used for tests with
461 461 restricted capabilities"""
462 462
463 463 def __init__(self, repo, path=None, remotehidden=False):
464 464 super(locallegacypeer, self).__init__(
465 465 repo, caps=legacycaps, path=path, remotehidden=remotehidden
466 466 )
467 467
468 468 # Begin of baselegacywirecommands interface.
469 469
470 470 def between(self, pairs):
471 471 return self._repo.between(pairs)
472 472
473 473 def branches(self, nodes):
474 474 return self._repo.branches(nodes)
475 475
476 476 def changegroup(self, nodes, source):
477 477 outgoing = discovery.outgoing(
478 478 self._repo, missingroots=nodes, ancestorsof=self._repo.heads()
479 479 )
480 480 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
481 481
482 482 def changegroupsubset(self, bases, heads, source):
483 483 outgoing = discovery.outgoing(
484 484 self._repo, missingroots=bases, ancestorsof=heads
485 485 )
486 486 return changegroup.makechangegroup(self._repo, outgoing, b'01', source)
487 487
488 488 # End of baselegacywirecommands interface.
489 489
490 490
491 491 # Functions receiving (ui, features) that extensions can register to impact
492 492 # the ability to load repositories with custom requirements. Only
493 493 # functions defined in loaded extensions are called.
494 494 #
495 495 # The function receives a set of requirement strings that the repository
496 496 # is capable of opening. Functions will typically add elements to the
497 497 # set to reflect that the extension knows how to handle that requirements.
498 498 featuresetupfuncs = set()
499 499
500 500
501 501 def _getsharedvfs(hgvfs, requirements):
502 502 """returns the vfs object pointing to root of shared source
503 503 repo for a shared repository
504 504
505 505 hgvfs is vfs pointing at .hg/ of current repo (shared one)
506 506 requirements is a set of requirements of current repo (shared one)
507 507 """
508 508 # The ``shared`` or ``relshared`` requirements indicate the
509 509 # store lives in the path contained in the ``.hg/sharedpath`` file.
510 510 # This is an absolute path for ``shared`` and relative to
511 511 # ``.hg/`` for ``relshared``.
512 512 sharedpath = hgvfs.read(b'sharedpath').rstrip(b'\n')
513 513 if requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements:
514 514 sharedpath = util.normpath(hgvfs.join(sharedpath))
515 515
516 516 sharedvfs = vfsmod.vfs(sharedpath, realpath=True)
517 517
518 518 if not sharedvfs.exists():
519 519 raise error.RepoError(
520 520 _(b'.hg/sharedpath points to nonexistent directory %s')
521 521 % sharedvfs.base
522 522 )
523 523 return sharedvfs
524 524
525 525
526 526 def _readrequires(vfs, allowmissing):
527 527 """reads the require file present at root of this vfs
528 528 and return a set of requirements
529 529
530 530 If allowmissing is True, we suppress FileNotFoundError if raised"""
531 531 # requires file contains a newline-delimited list of
532 532 # features/capabilities the opener (us) must have in order to use
533 533 # the repository. This file was introduced in Mercurial 0.9.2,
534 534 # which means very old repositories may not have one. We assume
535 535 # a missing file translates to no requirements.
536 536 read = vfs.tryread if allowmissing else vfs.read
537 537 return set(read(b'requires').splitlines())
538 538
539 539
540 540 def makelocalrepository(baseui, path: bytes, intents=None):
541 541 """Create a local repository object.
542 542
543 543 Given arguments needed to construct a local repository, this function
544 544 performs various early repository loading functionality (such as
545 545 reading the ``.hg/requires`` and ``.hg/hgrc`` files), validates that
546 546 the repository can be opened, derives a type suitable for representing
547 547 that repository, and returns an instance of it.
548 548
549 549 The returned object conforms to the ``repository.completelocalrepository``
550 550 interface.
551 551
552 552 The repository type is derived by calling a series of factory functions
553 553 for each aspect/interface of the final repository. These are defined by
554 554 ``REPO_INTERFACES``.
555 555
556 556 Each factory function is called to produce a type implementing a specific
557 557 interface. The cumulative list of returned types will be combined into a
558 558 new type and that type will be instantiated to represent the local
559 559 repository.
560 560
561 561 The factory functions each receive various state that may be consulted
562 562 as part of deriving a type.
563 563
564 564 Extensions should wrap these factory functions to customize repository type
565 565 creation. Note that an extension's wrapped function may be called even if
566 566 that extension is not loaded for the repo being constructed. Extensions
567 567 should check if their ``__name__`` appears in the
568 568 ``extensionmodulenames`` set passed to the factory function and no-op if
569 569 not.
570 570 """
571 571 ui = baseui.copy()
572 572 # Prevent copying repo configuration.
573 573 ui.copy = baseui.copy
574 574
575 575 # Working directory VFS rooted at repository root.
576 576 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
577 577
578 578 # Main VFS for .hg/ directory.
579 579 hgpath = wdirvfs.join(b'.hg')
580 580 hgvfs = vfsmod.vfs(hgpath, cacheaudited=True)
581 581 # Whether this repository is shared one or not
582 582 shared = False
583 583 # If this repository is shared, vfs pointing to shared repo
584 584 sharedvfs = None
585 585
586 586 # The .hg/ path should exist and should be a directory. All other
587 587 # cases are errors.
588 588 if not hgvfs.isdir():
589 589 try:
590 590 hgvfs.stat()
591 591 except FileNotFoundError:
592 592 pass
593 593 except ValueError as e:
594 594 # Can be raised on Python 3.8 when path is invalid.
595 595 raise error.Abort(
596 596 _(b'invalid path %s: %s') % (path, stringutil.forcebytestr(e))
597 597 )
598 598
599 599 raise error.RepoError(_(b'repository %s not found') % path)
600 600
601 601 requirements = _readrequires(hgvfs, True)
602 602 shared = (
603 603 requirementsmod.SHARED_REQUIREMENT in requirements
604 604 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
605 605 )
606 606 storevfs = None
607 607 if shared:
608 608 # This is a shared repo
609 609 sharedvfs = _getsharedvfs(hgvfs, requirements)
610 610 storevfs = vfsmod.vfs(sharedvfs.join(b'store'))
611 611 else:
612 612 storevfs = vfsmod.vfs(hgvfs.join(b'store'))
613 613
614 614 # if .hg/requires contains the sharesafe requirement, it means
615 615 # there exists a `.hg/store/requires` too and we should read it
616 616 # NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
617 617 # is present. We never write SHARESAFE_REQUIREMENT for a repo if store
618 618 # is not present, refer checkrequirementscompat() for that
619 619 #
620 620 # However, if SHARESAFE_REQUIREMENT is not present, it means that the
621 621 # repository was shared the old way. We check the share source .hg/requires
622 622 # for SHARESAFE_REQUIREMENT to detect whether the current repository needs
623 623 # to be reshared
624 624 hint = _(b"see `hg help config.format.use-share-safe` for more information")
625 625 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
626 626 if (
627 627 shared
628 628 and requirementsmod.SHARESAFE_REQUIREMENT
629 629 not in _readrequires(sharedvfs, True)
630 630 ):
631 631 mismatch_warn = ui.configbool(
632 632 b'share', b'safe-mismatch.source-not-safe.warn'
633 633 )
634 634 mismatch_config = ui.config(
635 635 b'share', b'safe-mismatch.source-not-safe'
636 636 )
637 637 mismatch_verbose_upgrade = ui.configbool(
638 638 b'share', b'safe-mismatch.source-not-safe:verbose-upgrade'
639 639 )
640 640 if mismatch_config in (
641 641 b'downgrade-allow',
642 642 b'allow',
643 643 b'downgrade-abort',
644 644 ):
645 645 # prevent cyclic import localrepo -> upgrade -> localrepo
646 646 from . import upgrade
647 647
648 648 upgrade.downgrade_share_to_non_safe(
649 649 ui,
650 650 hgvfs,
651 651 sharedvfs,
652 652 requirements,
653 653 mismatch_config,
654 654 mismatch_warn,
655 655 mismatch_verbose_upgrade,
656 656 )
657 657 elif mismatch_config == b'abort':
658 658 raise error.Abort(
659 659 _(b"share source does not support share-safe requirement"),
660 660 hint=hint,
661 661 )
662 662 else:
663 663 raise error.Abort(
664 664 _(
665 665 b"share-safe mismatch with source.\nUnrecognized"
666 666 b" value '%s' of `share.safe-mismatch.source-not-safe`"
667 667 b" set."
668 668 )
669 669 % mismatch_config,
670 670 hint=hint,
671 671 )
672 672 else:
673 673 requirements |= _readrequires(storevfs, False)
674 674 elif shared:
675 675 sourcerequires = _readrequires(sharedvfs, False)
676 676 if requirementsmod.SHARESAFE_REQUIREMENT in sourcerequires:
677 677 mismatch_config = ui.config(b'share', b'safe-mismatch.source-safe')
678 678 mismatch_warn = ui.configbool(
679 679 b'share', b'safe-mismatch.source-safe.warn'
680 680 )
681 681 mismatch_verbose_upgrade = ui.configbool(
682 682 b'share', b'safe-mismatch.source-safe:verbose-upgrade'
683 683 )
684 684 if mismatch_config in (
685 685 b'upgrade-allow',
686 686 b'allow',
687 687 b'upgrade-abort',
688 688 ):
689 689 # prevent cyclic import localrepo -> upgrade -> localrepo
690 690 from . import upgrade
691 691
692 692 upgrade.upgrade_share_to_safe(
693 693 ui,
694 694 hgvfs,
695 695 storevfs,
696 696 requirements,
697 697 mismatch_config,
698 698 mismatch_warn,
699 699 mismatch_verbose_upgrade,
700 700 )
701 701 elif mismatch_config == b'abort':
702 702 raise error.Abort(
703 703 _(
704 704 b'version mismatch: source uses share-safe'
705 705 b' functionality while the current share does not'
706 706 ),
707 707 hint=hint,
708 708 )
709 709 else:
710 710 raise error.Abort(
711 711 _(
712 712 b"share-safe mismatch with source.\nUnrecognized"
713 713 b" value '%s' of `share.safe-mismatch.source-safe` set."
714 714 )
715 715 % mismatch_config,
716 716 hint=hint,
717 717 )
718 718
719 719 # The .hg/hgrc file may load extensions or contain config options
720 720 # that influence repository construction. Attempt to load it and
721 721 # process any new extensions that it may have pulled in.
722 722 if loadhgrc(ui, wdirvfs, hgvfs, requirements, sharedvfs):
723 723 afterhgrcload(ui, wdirvfs, hgvfs, requirements)
724 724 extensions.loadall(ui)
725 725 extensions.populateui(ui)
726 726
727 727 # Set of module names of extensions loaded for this repository.
728 728 extensionmodulenames = {m.__name__ for n, m in extensions.extensions(ui)}
729 729
730 730 supportedrequirements = gathersupportedrequirements(ui)
731 731
732 732 # We first validate the requirements are known.
733 733 ensurerequirementsrecognized(requirements, supportedrequirements)
734 734
735 735 # Then we validate that the known set is reasonable to use together.
736 736 ensurerequirementscompatible(ui, requirements)
737 737
738 738 # TODO there are unhandled edge cases related to opening repositories with
739 739 # shared storage. If storage is shared, we should also test for requirements
740 740 # compatibility in the pointed-to repo. This entails loading the .hg/hgrc in
741 741 # that repo, as that repo may load extensions needed to open it. This is a
742 742 # bit complicated because we don't want the other hgrc to overwrite settings
743 743 # in this hgrc.
744 744 #
745 745 # This bug is somewhat mitigated by the fact that we copy the .hg/requires
746 746 # file when sharing repos. But if a requirement is added after the share is
747 747 # performed, thereby introducing a new requirement for the opener, we may
748 748 # will not see that and could encounter a run-time error interacting with
749 749 # that shared store since it has an unknown-to-us requirement.
750 750
751 751 # At this point, we know we should be capable of opening the repository.
752 752 # Now get on with doing that.
753 753
754 754 features = set()
755 755
756 756 # The "store" part of the repository holds versioned data. How it is
757 757 # accessed is determined by various requirements. If `shared` or
758 758 # `relshared` requirements are present, this indicates current repository
759 759 # is a share and store exists in path mentioned in `.hg/sharedpath`
760 760 if shared:
761 761 storebasepath = sharedvfs.base
762 762 cachepath = sharedvfs.join(b'cache')
763 763 features.add(repository.REPO_FEATURE_SHARED_STORAGE)
764 764 else:
765 765 storebasepath = hgvfs.base
766 766 cachepath = hgvfs.join(b'cache')
767 767 wcachepath = hgvfs.join(b'wcache')
768 768
769 769 # The store has changed over time and the exact layout is dictated by
770 770 # requirements. The store interface abstracts differences across all
771 771 # of them.
772 772 store = makestore(
773 773 requirements,
774 774 storebasepath,
775 775 lambda base: vfsmod.vfs(base, cacheaudited=True),
776 776 )
777 777 hgvfs.createmode = store.createmode
778 778
779 779 storevfs = store.vfs
780 780 storevfs.options = resolvestorevfsoptions(ui, requirements, features)
781 781
782 782 if (
783 783 requirementsmod.REVLOGV2_REQUIREMENT in requirements
784 784 or requirementsmod.CHANGELOGV2_REQUIREMENT in requirements
785 785 ):
786 786 features.add(repository.REPO_FEATURE_SIDE_DATA)
787 787 # the revlogv2 docket introduced race condition that we need to fix
788 788 features.discard(repository.REPO_FEATURE_STREAM_CLONE)
789 789
790 790 # The cache vfs is used to manage cache files.
791 791 cachevfs = vfsmod.vfs(cachepath, cacheaudited=True)
792 792 cachevfs.createmode = store.createmode
793 793 # The cache vfs is used to manage cache files related to the working copy
794 794 wcachevfs = vfsmod.vfs(wcachepath, cacheaudited=True)
795 795 wcachevfs.createmode = store.createmode
796 796
797 797 # Now resolve the type for the repository object. We do this by repeatedly
798 798 # calling a factory function to produces types for specific aspects of the
799 799 # repo's operation. The aggregate returned types are used as base classes
800 800 # for a dynamically-derived type, which will represent our new repository.
801 801
802 802 bases = []
803 803 extrastate = {}
804 804
805 805 for iface, fn in REPO_INTERFACES:
806 806 # We pass all potentially useful state to give extensions tons of
807 807 # flexibility.
808 808 typ = fn()(
809 809 ui=ui,
810 810 intents=intents,
811 811 requirements=requirements,
812 812 features=features,
813 813 wdirvfs=wdirvfs,
814 814 hgvfs=hgvfs,
815 815 store=store,
816 816 storevfs=storevfs,
817 817 storeoptions=storevfs.options,
818 818 cachevfs=cachevfs,
819 819 wcachevfs=wcachevfs,
820 820 extensionmodulenames=extensionmodulenames,
821 821 extrastate=extrastate,
822 822 baseclasses=bases,
823 823 )
824 824
825 825 if not isinstance(typ, type):
826 826 raise error.ProgrammingError(
827 827 b'unable to construct type for %s' % iface
828 828 )
829 829
830 830 bases.append(typ)
831 831
832 832 # type() allows you to use characters in type names that wouldn't be
833 833 # recognized as Python symbols in source code. We abuse that to add
834 834 # rich information about our constructed repo.
835 835 name = pycompat.sysstr(
836 836 b'derivedrepo:%s<%s>' % (wdirvfs.base, b','.join(sorted(requirements)))
837 837 )
838 838
839 839 cls = type(name, tuple(bases), {})
840 840
841 841 return cls(
842 842 baseui=baseui,
843 843 ui=ui,
844 844 origroot=path,
845 845 wdirvfs=wdirvfs,
846 846 hgvfs=hgvfs,
847 847 requirements=requirements,
848 848 supportedrequirements=supportedrequirements,
849 849 sharedpath=storebasepath,
850 850 store=store,
851 851 cachevfs=cachevfs,
852 852 wcachevfs=wcachevfs,
853 853 features=features,
854 854 intents=intents,
855 855 )
856 856
857 857
858 858 def loadhgrc(
859 859 ui,
860 860 wdirvfs: vfsmod.vfs,
861 861 hgvfs: vfsmod.vfs,
862 862 requirements,
863 863 sharedvfs: Optional[vfsmod.vfs] = None,
864 864 ):
865 865 """Load hgrc files/content into a ui instance.
866 866
867 867 This is called during repository opening to load any additional
868 868 config files or settings relevant to the current repository.
869 869
870 870 Returns a bool indicating whether any additional configs were loaded.
871 871
872 872 Extensions should monkeypatch this function to modify how per-repo
873 873 configs are loaded. For example, an extension may wish to pull in
874 874 configs from alternate files or sources.
875 875
876 876 sharedvfs is vfs object pointing to source repo if the current one is a
877 877 shared one
878 878 """
879 879 if not rcutil.use_repo_hgrc():
880 880 return False
881 881
882 882 ret = False
883 883 # first load config from shared source if we has to
884 884 if requirementsmod.SHARESAFE_REQUIREMENT in requirements and sharedvfs:
885 885 try:
886 886 ui.readconfig(sharedvfs.join(b'hgrc'), root=sharedvfs.base)
887 887 ret = True
888 888 except IOError:
889 889 pass
890 890
891 891 try:
892 892 ui.readconfig(hgvfs.join(b'hgrc'), root=wdirvfs.base)
893 893 ret = True
894 894 except IOError:
895 895 pass
896 896
897 897 try:
898 898 ui.readconfig(hgvfs.join(b'hgrc-not-shared'), root=wdirvfs.base)
899 899 ret = True
900 900 except IOError:
901 901 pass
902 902
903 903 return ret
904 904
905 905
906 906 def afterhgrcload(ui, wdirvfs, hgvfs, requirements):
907 907 """Perform additional actions after .hg/hgrc is loaded.
908 908
909 909 This function is called during repository loading immediately after
910 910 the .hg/hgrc file is loaded and before per-repo extensions are loaded.
911 911
912 912 The function can be used to validate configs, automatically add
913 913 options (including extensions) based on requirements, etc.
914 914 """
915 915
916 916 # Map of requirements to list of extensions to load automatically when
917 917 # requirement is present.
918 918 autoextensions = {
919 919 b'git': [b'git'],
920 920 b'largefiles': [b'largefiles'],
921 921 b'lfs': [b'lfs'],
922 922 }
923 923
924 924 for requirement, names in sorted(autoextensions.items()):
925 925 if requirement not in requirements:
926 926 continue
927 927
928 928 for name in names:
929 929 if not ui.hasconfig(b'extensions', name):
930 930 ui.setconfig(b'extensions', name, b'', source=b'autoload')
931 931
932 932
933 933 def gathersupportedrequirements(ui):
934 934 """Determine the complete set of recognized requirements."""
935 935 # Start with all requirements supported by this file.
936 936 supported = set(localrepository._basesupported)
937 937
938 938 # Execute ``featuresetupfuncs`` entries if they belong to an extension
939 939 # relevant to this ui instance.
940 940 modules = {m.__name__ for n, m in extensions.extensions(ui)}
941 941
942 942 for fn in featuresetupfuncs:
943 943 if fn.__module__ in modules:
944 944 fn(ui, supported)
945 945
946 946 # Add derived requirements from registered compression engines.
947 947 for name in util.compengines:
948 948 engine = util.compengines[name]
949 949 if engine.available() and engine.revlogheader():
950 950 supported.add(b'exp-compression-%s' % name)
951 951 if engine.name() == b'zstd':
952 952 supported.add(requirementsmod.REVLOG_COMPRESSION_ZSTD)
953 953
954 954 return supported
955 955
956 956
957 957 def ensurerequirementsrecognized(requirements, supported):
958 958 """Validate that a set of local requirements is recognized.
959 959
960 960 Receives a set of requirements. Raises an ``error.RepoError`` if there
961 961 exists any requirement in that set that currently loaded code doesn't
962 962 recognize.
963 963
964 964 Returns a set of supported requirements.
965 965 """
966 966 missing = set()
967 967
968 968 for requirement in requirements:
969 969 if requirement in supported:
970 970 continue
971 971
972 972 if not requirement or not requirement[0:1].isalnum():
973 973 raise error.RequirementError(_(b'.hg/requires file is corrupt'))
974 974
975 975 missing.add(requirement)
976 976
977 977 if missing:
978 978 raise error.RequirementError(
979 979 _(b'repository requires features unknown to this Mercurial: %s')
980 980 % b' '.join(sorted(missing)),
981 981 hint=_(
982 982 b'see https://mercurial-scm.org/wiki/MissingRequirement '
983 983 b'for more information'
984 984 ),
985 985 )
986 986
987 987
988 988 def ensurerequirementscompatible(ui, requirements):
989 989 """Validates that a set of recognized requirements is mutually compatible.
990 990
991 991 Some requirements may not be compatible with others or require
992 992 config options that aren't enabled. This function is called during
993 993 repository opening to ensure that the set of requirements needed
994 994 to open a repository is sane and compatible with config options.
995 995
996 996 Extensions can monkeypatch this function to perform additional
997 997 checking.
998 998
999 999 ``error.RepoError`` should be raised on failure.
1000 1000 """
1001 1001 if (
1002 1002 requirementsmod.SPARSE_REQUIREMENT in requirements
1003 1003 and not sparse.enabled
1004 1004 ):
1005 1005 raise error.RepoError(
1006 1006 _(
1007 1007 b'repository is using sparse feature but '
1008 1008 b'sparse is not enabled; enable the '
1009 1009 b'"sparse" extensions to access'
1010 1010 )
1011 1011 )
1012 1012
1013 1013
1014 1014 def makestore(requirements, path, vfstype):
1015 1015 """Construct a storage object for a repository."""
1016 1016 if requirementsmod.STORE_REQUIREMENT in requirements:
1017 1017 if requirementsmod.FNCACHE_REQUIREMENT in requirements:
1018 1018 dotencode = requirementsmod.DOTENCODE_REQUIREMENT in requirements
1019 1019 return storemod.fncachestore(path, vfstype, dotencode)
1020 1020
1021 1021 return storemod.encodedstore(path, vfstype)
1022 1022
1023 1023 return storemod.basicstore(path, vfstype)
1024 1024
1025 1025
1026 1026 def resolvestorevfsoptions(ui, requirements, features):
1027 1027 """Resolve the options to pass to the store vfs opener.
1028 1028
1029 1029 The returned dict is used to influence behavior of the storage layer.
1030 1030 """
1031 1031 options = {}
1032 1032
1033 1033 if requirementsmod.TREEMANIFEST_REQUIREMENT in requirements:
1034 1034 options[b'treemanifest'] = True
1035 1035
1036 1036 # experimental config: format.manifestcachesize
1037 1037 manifestcachesize = ui.configint(b'format', b'manifestcachesize')
1038 1038 if manifestcachesize is not None:
1039 1039 options[b'manifestcachesize'] = manifestcachesize
1040 1040
1041 1041 # In the absence of another requirement superseding a revlog-related
1042 1042 # requirement, we have to assume the repo is using revlog version 0.
1043 1043 # This revlog format is super old and we don't bother trying to parse
1044 1044 # opener options for it because those options wouldn't do anything
1045 1045 # meaningful on such old repos.
1046 1046 if (
1047 1047 requirementsmod.REVLOGV1_REQUIREMENT in requirements
1048 1048 or requirementsmod.REVLOGV2_REQUIREMENT in requirements
1049 1049 ):
1050 1050 options.update(resolverevlogstorevfsoptions(ui, requirements, features))
1051 1051 else: # explicitly mark repo as using revlogv0
1052 1052 options[b'revlogv0'] = True
1053 1053
1054 1054 if requirementsmod.COPIESSDC_REQUIREMENT in requirements:
1055 1055 options[b'copies-storage'] = b'changeset-sidedata'
1056 1056 else:
1057 1057 writecopiesto = ui.config(b'experimental', b'copies.write-to')
1058 1058 copiesextramode = (b'changeset-only', b'compatibility')
1059 1059 if writecopiesto in copiesextramode:
1060 1060 options[b'copies-storage'] = b'extra'
1061 1061
1062 1062 return options
1063 1063
1064 1064
1065 1065 def resolverevlogstorevfsoptions(ui, requirements, features):
1066 1066 """Resolve opener options specific to revlogs."""
1067 1067
1068 1068 options = {}
1069 1069 options[b'flagprocessors'] = {}
1070 1070
1071 1071 feature_config = options[b'feature-config'] = revlog.FeatureConfig()
1072 1072 data_config = options[b'data-config'] = revlog.DataConfig()
1073 1073 delta_config = options[b'delta-config'] = revlog.DeltaConfig()
1074 1074
1075 1075 if requirementsmod.REVLOGV1_REQUIREMENT in requirements:
1076 1076 options[b'revlogv1'] = True
1077 1077 if requirementsmod.REVLOGV2_REQUIREMENT in requirements:
1078 1078 options[b'revlogv2'] = True
1079 1079 if requirementsmod.CHANGELOGV2_REQUIREMENT in requirements:
1080 1080 options[b'changelogv2'] = True
1081 1081 cmp_rank = ui.configbool(b'experimental', b'changelog-v2.compute-rank')
1082 1082 options[b'changelogv2.compute-rank'] = cmp_rank
1083 1083
1084 1084 if requirementsmod.GENERALDELTA_REQUIREMENT in requirements:
1085 1085 options[b'generaldelta'] = True
1086 1086
1087 1087 # experimental config: format.chunkcachesize
1088 1088 chunkcachesize = ui.configint(b'format', b'chunkcachesize')
1089 1089 if chunkcachesize is not None:
1090 1090 data_config.chunk_cache_size = chunkcachesize
1091 1091
1092 deltabothparents = ui.configbool(
1092 delta_config.delta_both_parents = ui.configbool(
1093 1093 b'storage', b'revlog.optimize-delta-parent-choice'
1094 1094 )
1095 options[b'deltabothparents'] = deltabothparents
1096 1095 dps_cgds = ui.configint(
1097 1096 b'storage',
1098 1097 b'revlog.delta-parent-search.candidate-group-chunk-size',
1099 1098 )
1100 1099 options[b'delta-parent-search.candidate-group-chunk-size'] = dps_cgds
1101 1100 options[b'debug-delta'] = ui.configbool(b'debug', b'revlog.debug-delta')
1102 1101
1103 1102 issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming')
1104 1103 options[b'issue6528.fix-incoming'] = issue6528
1105 1104
1106 1105 lazydelta = ui.configbool(b'storage', b'revlog.reuse-external-delta')
1107 1106 lazydeltabase = False
1108 1107 if lazydelta:
1109 1108 lazydeltabase = ui.configbool(
1110 1109 b'storage', b'revlog.reuse-external-delta-parent'
1111 1110 )
1112 1111 if lazydeltabase is None:
1113 1112 lazydeltabase = not scmutil.gddeltaconfig(ui)
1114 1113 options[b'lazydelta'] = lazydelta
1115 1114 options[b'lazydeltabase'] = lazydeltabase
1116 1115
1117 1116 chainspan = ui.configbytes(b'experimental', b'maxdeltachainspan')
1118 1117 if 0 <= chainspan:
1119 1118 options[b'maxdeltachainspan'] = chainspan
1120 1119
1121 1120 mmapindexthreshold = ui.configbytes(b'experimental', b'mmapindexthreshold')
1122 1121 if mmapindexthreshold is not None:
1123 1122 options[b'mmapindexthreshold'] = mmapindexthreshold
1124 1123
1125 1124 withsparseread = ui.configbool(b'experimental', b'sparse-read')
1126 1125 srdensitythres = float(
1127 1126 ui.config(b'experimental', b'sparse-read.density-threshold')
1128 1127 )
1129 1128 srmingapsize = ui.configbytes(b'experimental', b'sparse-read.min-gap-size')
1130 1129 options[b'with-sparse-read'] = withsparseread
1131 1130 options[b'sparse-read-density-threshold'] = srdensitythres
1132 1131 options[b'sparse-read-min-gap-size'] = srmingapsize
1133 1132
1134 1133 sparserevlog = requirementsmod.SPARSEREVLOG_REQUIREMENT in requirements
1135 1134 options[b'sparse-revlog'] = sparserevlog
1136 1135 if sparserevlog:
1137 1136 options[b'generaldelta'] = True
1138 1137
1139 1138 maxchainlen = None
1140 1139 if sparserevlog:
1141 1140 maxchainlen = revlogconst.SPARSE_REVLOG_MAX_CHAIN_LENGTH
1142 1141 # experimental config: format.maxchainlen
1143 1142 maxchainlen = ui.configint(b'format', b'maxchainlen', maxchainlen)
1144 1143 if maxchainlen is not None:
1145 1144 options[b'maxchainlen'] = maxchainlen
1146 1145
1147 1146 for r in requirements:
1148 1147 # we allow multiple compression engine requirement to co-exist because
1149 1148 # strickly speaking, revlog seems to support mixed compression style.
1150 1149 #
1151 1150 # The compression used for new entries will be "the last one"
1152 1151 prefix = r.startswith
1153 1152 if prefix(b'revlog-compression-') or prefix(b'exp-compression-'):
1154 1153 options[b'compengine'] = r.split(b'-', 2)[2]
1155 1154
1156 1155 options[b'zlib.level'] = ui.configint(b'storage', b'revlog.zlib.level')
1157 1156 if options[b'zlib.level'] is not None:
1158 1157 if not (0 <= options[b'zlib.level'] <= 9):
1159 1158 msg = _(b'invalid value for `storage.revlog.zlib.level` config: %d')
1160 1159 raise error.Abort(msg % options[b'zlib.level'])
1161 1160 options[b'zstd.level'] = ui.configint(b'storage', b'revlog.zstd.level')
1162 1161 if options[b'zstd.level'] is not None:
1163 1162 if not (0 <= options[b'zstd.level'] <= 22):
1164 1163 msg = _(b'invalid value for `storage.revlog.zstd.level` config: %d')
1165 1164 raise error.Abort(msg % options[b'zstd.level'])
1166 1165
1167 1166 if requirementsmod.NARROW_REQUIREMENT in requirements:
1168 1167 options[b'enableellipsis'] = True
1169 1168
1170 1169 if ui.configbool(b'experimental', b'rust.index'):
1171 1170 options[b'rust.index'] = True
1172 1171 if requirementsmod.NODEMAP_REQUIREMENT in requirements:
1173 1172 slow_path = ui.config(
1174 1173 b'storage', b'revlog.persistent-nodemap.slow-path'
1175 1174 )
1176 1175 if slow_path not in (b'allow', b'warn', b'abort'):
1177 1176 default = ui.config_default(
1178 1177 b'storage', b'revlog.persistent-nodemap.slow-path'
1179 1178 )
1180 1179 msg = _(
1181 1180 b'unknown value for config '
1182 1181 b'"storage.revlog.persistent-nodemap.slow-path": "%s"\n'
1183 1182 )
1184 1183 ui.warn(msg % slow_path)
1185 1184 if not ui.quiet:
1186 1185 ui.warn(_(b'falling back to default value: %s\n') % default)
1187 1186 slow_path = default
1188 1187
1189 1188 msg = _(
1190 1189 b"accessing `persistent-nodemap` repository without associated "
1191 1190 b"fast implementation."
1192 1191 )
1193 1192 hint = _(
1194 1193 b"check `hg help config.format.use-persistent-nodemap` "
1195 1194 b"for details"
1196 1195 )
1197 1196 if not revlog.HAS_FAST_PERSISTENT_NODEMAP:
1198 1197 if slow_path == b'warn':
1199 1198 msg = b"warning: " + msg + b'\n'
1200 1199 ui.warn(msg)
1201 1200 if not ui.quiet:
1202 1201 hint = b'(' + hint + b')\n'
1203 1202 ui.warn(hint)
1204 1203 if slow_path == b'abort':
1205 1204 raise error.Abort(msg, hint=hint)
1206 1205 options[b'persistent-nodemap'] = True
1207 1206 if requirementsmod.DIRSTATE_V2_REQUIREMENT in requirements:
1208 1207 slow_path = ui.config(b'storage', b'dirstate-v2.slow-path')
1209 1208 if slow_path not in (b'allow', b'warn', b'abort'):
1210 1209 default = ui.config_default(b'storage', b'dirstate-v2.slow-path')
1211 1210 msg = _(b'unknown value for config "dirstate-v2.slow-path": "%s"\n')
1212 1211 ui.warn(msg % slow_path)
1213 1212 if not ui.quiet:
1214 1213 ui.warn(_(b'falling back to default value: %s\n') % default)
1215 1214 slow_path = default
1216 1215
1217 1216 msg = _(
1218 1217 b"accessing `dirstate-v2` repository without associated "
1219 1218 b"fast implementation."
1220 1219 )
1221 1220 hint = _(
1222 1221 b"check `hg help config.format.use-dirstate-v2` " b"for details"
1223 1222 )
1224 1223 if not dirstate.HAS_FAST_DIRSTATE_V2:
1225 1224 if slow_path == b'warn':
1226 1225 msg = b"warning: " + msg + b'\n'
1227 1226 ui.warn(msg)
1228 1227 if not ui.quiet:
1229 1228 hint = b'(' + hint + b')\n'
1230 1229 ui.warn(hint)
1231 1230 if slow_path == b'abort':
1232 1231 raise error.Abort(msg, hint=hint)
1233 1232 if ui.configbool(b'storage', b'revlog.persistent-nodemap.mmap'):
1234 1233 options[b'persistent-nodemap.mmap'] = True
1235 1234 if ui.configbool(b'devel', b'persistent-nodemap'):
1236 1235 options[b'devel-force-nodemap'] = True
1237 1236
1238 1237 return options
1239 1238
1240 1239
1241 1240 def makemain(**kwargs):
1242 1241 """Produce a type conforming to ``ilocalrepositorymain``."""
1243 1242 return localrepository
1244 1243
1245 1244
1246 1245 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1247 1246 class revlogfilestorage:
1248 1247 """File storage when using revlogs."""
1249 1248
1250 1249 def file(self, path):
1251 1250 if path.startswith(b'/'):
1252 1251 path = path[1:]
1253 1252
1254 1253 try_split = (
1255 1254 self.currenttransaction() is not None
1256 1255 or txnutil.mayhavepending(self.root)
1257 1256 )
1258 1257
1259 1258 return filelog.filelog(self.svfs, path, try_split=try_split)
1260 1259
1261 1260
1262 1261 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1263 1262 class revlognarrowfilestorage:
1264 1263 """File storage when using revlogs and narrow files."""
1265 1264
1266 1265 def file(self, path):
1267 1266 if path.startswith(b'/'):
1268 1267 path = path[1:]
1269 1268
1270 1269 try_split = (
1271 1270 self.currenttransaction() is not None
1272 1271 or txnutil.mayhavepending(self.root)
1273 1272 )
1274 1273 return filelog.narrowfilelog(
1275 1274 self.svfs, path, self._storenarrowmatch, try_split=try_split
1276 1275 )
1277 1276
1278 1277
1279 1278 def makefilestorage(requirements, features, **kwargs):
1280 1279 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1281 1280 features.add(repository.REPO_FEATURE_REVLOG_FILE_STORAGE)
1282 1281 features.add(repository.REPO_FEATURE_STREAM_CLONE)
1283 1282
1284 1283 if requirementsmod.NARROW_REQUIREMENT in requirements:
1285 1284 return revlognarrowfilestorage
1286 1285 else:
1287 1286 return revlogfilestorage
1288 1287
1289 1288
1290 1289 # List of repository interfaces and factory functions for them. Each
1291 1290 # will be called in order during ``makelocalrepository()`` to iteratively
1292 1291 # derive the final type for a local repository instance. We capture the
1293 1292 # function as a lambda so we don't hold a reference and the module-level
1294 1293 # functions can be wrapped.
1295 1294 REPO_INTERFACES = [
1296 1295 (repository.ilocalrepositorymain, lambda: makemain),
1297 1296 (repository.ilocalrepositoryfilestorage, lambda: makefilestorage),
1298 1297 ]
1299 1298
1300 1299
1301 1300 @interfaceutil.implementer(repository.ilocalrepositorymain)
1302 1301 class localrepository:
1303 1302 """Main class for representing local repositories.
1304 1303
1305 1304 All local repositories are instances of this class.
1306 1305
1307 1306 Constructed on its own, instances of this class are not usable as
1308 1307 repository objects. To obtain a usable repository object, call
1309 1308 ``hg.repository()``, ``localrepo.instance()``, or
1310 1309 ``localrepo.makelocalrepository()``. The latter is the lowest-level.
1311 1310 ``instance()`` adds support for creating new repositories.
1312 1311 ``hg.repository()`` adds more extension integration, including calling
1313 1312 ``reposetup()``. Generally speaking, ``hg.repository()`` should be
1314 1313 used.
1315 1314 """
1316 1315
1317 1316 _basesupported = {
1318 1317 requirementsmod.ARCHIVED_PHASE_REQUIREMENT,
1319 1318 requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT,
1320 1319 requirementsmod.CHANGELOGV2_REQUIREMENT,
1321 1320 requirementsmod.COPIESSDC_REQUIREMENT,
1322 1321 requirementsmod.DIRSTATE_TRACKED_HINT_V1,
1323 1322 requirementsmod.DIRSTATE_V2_REQUIREMENT,
1324 1323 requirementsmod.DOTENCODE_REQUIREMENT,
1325 1324 requirementsmod.FNCACHE_REQUIREMENT,
1326 1325 requirementsmod.GENERALDELTA_REQUIREMENT,
1327 1326 requirementsmod.INTERNAL_PHASE_REQUIREMENT,
1328 1327 requirementsmod.NODEMAP_REQUIREMENT,
1329 1328 requirementsmod.RELATIVE_SHARED_REQUIREMENT,
1330 1329 requirementsmod.REVLOGV1_REQUIREMENT,
1331 1330 requirementsmod.REVLOGV2_REQUIREMENT,
1332 1331 requirementsmod.SHARED_REQUIREMENT,
1333 1332 requirementsmod.SHARESAFE_REQUIREMENT,
1334 1333 requirementsmod.SPARSE_REQUIREMENT,
1335 1334 requirementsmod.SPARSEREVLOG_REQUIREMENT,
1336 1335 requirementsmod.STORE_REQUIREMENT,
1337 1336 requirementsmod.TREEMANIFEST_REQUIREMENT,
1338 1337 }
1339 1338
1340 1339 # list of prefix for file which can be written without 'wlock'
1341 1340 # Extensions should extend this list when needed
1342 1341 _wlockfreeprefix = {
1343 1342 # We migh consider requiring 'wlock' for the next
1344 1343 # two, but pretty much all the existing code assume
1345 1344 # wlock is not needed so we keep them excluded for
1346 1345 # now.
1347 1346 b'hgrc',
1348 1347 b'requires',
1349 1348 # XXX cache is a complicatged business someone
1350 1349 # should investigate this in depth at some point
1351 1350 b'cache/',
1352 1351 # XXX bisect was still a bit too messy at the time
1353 1352 # this changeset was introduced. Someone should fix
1354 1353 # the remainig bit and drop this line
1355 1354 b'bisect.state',
1356 1355 }
1357 1356
1358 1357 def __init__(
1359 1358 self,
1360 1359 baseui,
1361 1360 ui,
1362 1361 origroot: bytes,
1363 1362 wdirvfs: vfsmod.vfs,
1364 1363 hgvfs: vfsmod.vfs,
1365 1364 requirements,
1366 1365 supportedrequirements,
1367 1366 sharedpath: bytes,
1368 1367 store,
1369 1368 cachevfs: vfsmod.vfs,
1370 1369 wcachevfs: vfsmod.vfs,
1371 1370 features,
1372 1371 intents=None,
1373 1372 ):
1374 1373 """Create a new local repository instance.
1375 1374
1376 1375 Most callers should use ``hg.repository()``, ``localrepo.instance()``,
1377 1376 or ``localrepo.makelocalrepository()`` for obtaining a new repository
1378 1377 object.
1379 1378
1380 1379 Arguments:
1381 1380
1382 1381 baseui
1383 1382 ``ui.ui`` instance that ``ui`` argument was based off of.
1384 1383
1385 1384 ui
1386 1385 ``ui.ui`` instance for use by the repository.
1387 1386
1388 1387 origroot
1389 1388 ``bytes`` path to working directory root of this repository.
1390 1389
1391 1390 wdirvfs
1392 1391 ``vfs.vfs`` rooted at the working directory.
1393 1392
1394 1393 hgvfs
1395 1394 ``vfs.vfs`` rooted at .hg/
1396 1395
1397 1396 requirements
1398 1397 ``set`` of bytestrings representing repository opening requirements.
1399 1398
1400 1399 supportedrequirements
1401 1400 ``set`` of bytestrings representing repository requirements that we
1402 1401 know how to open. May be a supetset of ``requirements``.
1403 1402
1404 1403 sharedpath
1405 1404 ``bytes`` Defining path to storage base directory. Points to a
1406 1405 ``.hg/`` directory somewhere.
1407 1406
1408 1407 store
1409 1408 ``store.basicstore`` (or derived) instance providing access to
1410 1409 versioned storage.
1411 1410
1412 1411 cachevfs
1413 1412 ``vfs.vfs`` used for cache files.
1414 1413
1415 1414 wcachevfs
1416 1415 ``vfs.vfs`` used for cache files related to the working copy.
1417 1416
1418 1417 features
1419 1418 ``set`` of bytestrings defining features/capabilities of this
1420 1419 instance.
1421 1420
1422 1421 intents
1423 1422 ``set`` of system strings indicating what this repo will be used
1424 1423 for.
1425 1424 """
1426 1425 self.baseui = baseui
1427 1426 self.ui = ui
1428 1427 self.origroot = origroot
1429 1428 # vfs rooted at working directory.
1430 1429 self.wvfs = wdirvfs
1431 1430 self.root = wdirvfs.base
1432 1431 # vfs rooted at .hg/. Used to access most non-store paths.
1433 1432 self.vfs = hgvfs
1434 1433 self.path = hgvfs.base
1435 1434 self.requirements = requirements
1436 1435 self.nodeconstants = sha1nodeconstants
1437 1436 self.nullid = self.nodeconstants.nullid
1438 1437 self.supported = supportedrequirements
1439 1438 self.sharedpath = sharedpath
1440 1439 self.store = store
1441 1440 self.cachevfs = cachevfs
1442 1441 self.wcachevfs = wcachevfs
1443 1442 self.features = features
1444 1443
1445 1444 self.filtername = None
1446 1445
1447 1446 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1448 1447 b'devel', b'check-locks'
1449 1448 ):
1450 1449 self.vfs.audit = self._getvfsward(self.vfs.audit)
1451 1450 # A list of callback to shape the phase if no data were found.
1452 1451 # Callback are in the form: func(repo, roots) --> processed root.
1453 1452 # This list it to be filled by extension during repo setup
1454 1453 self._phasedefaults = []
1455 1454
1456 1455 color.setup(self.ui)
1457 1456
1458 1457 self.spath = self.store.path
1459 1458 self.svfs = self.store.vfs
1460 1459 self.sjoin = self.store.join
1461 1460 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
1462 1461 b'devel', b'check-locks'
1463 1462 ):
1464 1463 if hasattr(self.svfs, 'vfs'): # this is filtervfs
1465 1464 self.svfs.vfs.audit = self._getsvfsward(self.svfs.vfs.audit)
1466 1465 else: # standard vfs
1467 1466 self.svfs.audit = self._getsvfsward(self.svfs.audit)
1468 1467
1469 1468 self._dirstatevalidatewarned = False
1470 1469
1471 1470 self._branchcaches = branchmap.BranchMapCache()
1472 1471 self._revbranchcache = None
1473 1472 self._filterpats = {}
1474 1473 self._datafilters = {}
1475 1474 self._transref = self._lockref = self._wlockref = None
1476 1475
1477 1476 # A cache for various files under .hg/ that tracks file changes,
1478 1477 # (used by the filecache decorator)
1479 1478 #
1480 1479 # Maps a property name to its util.filecacheentry
1481 1480 self._filecache = {}
1482 1481
1483 1482 # hold sets of revision to be filtered
1484 1483 # should be cleared when something might have changed the filter value:
1485 1484 # - new changesets,
1486 1485 # - phase change,
1487 1486 # - new obsolescence marker,
1488 1487 # - working directory parent change,
1489 1488 # - bookmark changes
1490 1489 self.filteredrevcache = {}
1491 1490
1492 1491 self._dirstate = None
1493 1492 # post-dirstate-status hooks
1494 1493 self._postdsstatus = []
1495 1494
1496 1495 self._pending_narrow_pats = None
1497 1496 self._pending_narrow_pats_dirstate = None
1498 1497
1499 1498 # generic mapping between names and nodes
1500 1499 self.names = namespaces.namespaces()
1501 1500
1502 1501 # Key to signature value.
1503 1502 self._sparsesignaturecache = {}
1504 1503 # Signature to cached matcher instance.
1505 1504 self._sparsematchercache = {}
1506 1505
1507 1506 self._extrafilterid = repoview.extrafilter(ui)
1508 1507
1509 1508 self.filecopiesmode = None
1510 1509 if requirementsmod.COPIESSDC_REQUIREMENT in self.requirements:
1511 1510 self.filecopiesmode = b'changeset-sidedata'
1512 1511
1513 1512 self._wanted_sidedata = set()
1514 1513 self._sidedata_computers = {}
1515 1514 sidedatamod.set_sidedata_spec_for_repo(self)
1516 1515
1517 1516 def _getvfsward(self, origfunc):
1518 1517 """build a ward for self.vfs"""
1519 1518 rref = weakref.ref(self)
1520 1519
1521 1520 def checkvfs(path, mode=None):
1522 1521 ret = origfunc(path, mode=mode)
1523 1522 repo = rref()
1524 1523 if (
1525 1524 repo is None
1526 1525 or not hasattr(repo, '_wlockref')
1527 1526 or not hasattr(repo, '_lockref')
1528 1527 ):
1529 1528 return
1530 1529 if mode in (None, b'r', b'rb'):
1531 1530 return
1532 1531 if path.startswith(repo.path):
1533 1532 # truncate name relative to the repository (.hg)
1534 1533 path = path[len(repo.path) + 1 :]
1535 1534 if path.startswith(b'cache/'):
1536 1535 msg = b'accessing cache with vfs instead of cachevfs: "%s"'
1537 1536 repo.ui.develwarn(msg % path, stacklevel=3, config=b"cache-vfs")
1538 1537 # path prefixes covered by 'lock'
1539 1538 vfs_path_prefixes = (
1540 1539 b'journal.',
1541 1540 b'undo.',
1542 1541 b'strip-backup/',
1543 1542 b'cache/',
1544 1543 )
1545 1544 if any(path.startswith(prefix) for prefix in vfs_path_prefixes):
1546 1545 if repo._currentlock(repo._lockref) is None:
1547 1546 repo.ui.develwarn(
1548 1547 b'write with no lock: "%s"' % path,
1549 1548 stacklevel=3,
1550 1549 config=b'check-locks',
1551 1550 )
1552 1551 elif repo._currentlock(repo._wlockref) is None:
1553 1552 # rest of vfs files are covered by 'wlock'
1554 1553 #
1555 1554 # exclude special files
1556 1555 for prefix in self._wlockfreeprefix:
1557 1556 if path.startswith(prefix):
1558 1557 return
1559 1558 repo.ui.develwarn(
1560 1559 b'write with no wlock: "%s"' % path,
1561 1560 stacklevel=3,
1562 1561 config=b'check-locks',
1563 1562 )
1564 1563 return ret
1565 1564
1566 1565 return checkvfs
1567 1566
1568 1567 def _getsvfsward(self, origfunc):
1569 1568 """build a ward for self.svfs"""
1570 1569 rref = weakref.ref(self)
1571 1570
1572 1571 def checksvfs(path, mode=None):
1573 1572 ret = origfunc(path, mode=mode)
1574 1573 repo = rref()
1575 1574 if repo is None or not hasattr(repo, '_lockref'):
1576 1575 return
1577 1576 if mode in (None, b'r', b'rb'):
1578 1577 return
1579 1578 if path.startswith(repo.sharedpath):
1580 1579 # truncate name relative to the repository (.hg)
1581 1580 path = path[len(repo.sharedpath) + 1 :]
1582 1581 if repo._currentlock(repo._lockref) is None:
1583 1582 repo.ui.develwarn(
1584 1583 b'write with no lock: "%s"' % path, stacklevel=4
1585 1584 )
1586 1585 return ret
1587 1586
1588 1587 return checksvfs
1589 1588
1590 1589 @property
1591 1590 def vfs_map(self):
1592 1591 return {
1593 1592 b'': self.svfs,
1594 1593 b'plain': self.vfs,
1595 1594 b'store': self.svfs,
1596 1595 }
1597 1596
1598 1597 def close(self):
1599 1598 self._writecaches()
1600 1599
1601 1600 def _writecaches(self):
1602 1601 if self._revbranchcache:
1603 1602 self._revbranchcache.write()
1604 1603
1605 1604 def _restrictcapabilities(self, caps):
1606 1605 if self.ui.configbool(b'experimental', b'bundle2-advertise'):
1607 1606 caps = set(caps)
1608 1607 capsblob = bundle2.encodecaps(
1609 1608 bundle2.getrepocaps(self, role=b'client')
1610 1609 )
1611 1610 caps.add(b'bundle2=' + urlreq.quote(capsblob))
1612 1611 if self.ui.configbool(b'experimental', b'narrow'):
1613 1612 caps.add(wireprototypes.NARROWCAP)
1614 1613 return caps
1615 1614
1616 1615 # Don't cache auditor/nofsauditor, or you'll end up with reference cycle:
1617 1616 # self -> auditor -> self._checknested -> self
1618 1617
1619 1618 @property
1620 1619 def auditor(self):
1621 1620 # This is only used by context.workingctx.match in order to
1622 1621 # detect files in subrepos.
1623 1622 return pathutil.pathauditor(self.root, callback=self._checknested)
1624 1623
1625 1624 @property
1626 1625 def nofsauditor(self):
1627 1626 # This is only used by context.basectx.match in order to detect
1628 1627 # files in subrepos.
1629 1628 return pathutil.pathauditor(
1630 1629 self.root, callback=self._checknested, realfs=False, cached=True
1631 1630 )
1632 1631
1633 1632 def _checknested(self, path):
1634 1633 """Determine if path is a legal nested repository."""
1635 1634 if not path.startswith(self.root):
1636 1635 return False
1637 1636 subpath = path[len(self.root) + 1 :]
1638 1637 normsubpath = util.pconvert(subpath)
1639 1638
1640 1639 # XXX: Checking against the current working copy is wrong in
1641 1640 # the sense that it can reject things like
1642 1641 #
1643 1642 # $ hg cat -r 10 sub/x.txt
1644 1643 #
1645 1644 # if sub/ is no longer a subrepository in the working copy
1646 1645 # parent revision.
1647 1646 #
1648 1647 # However, it can of course also allow things that would have
1649 1648 # been rejected before, such as the above cat command if sub/
1650 1649 # is a subrepository now, but was a normal directory before.
1651 1650 # The old path auditor would have rejected by mistake since it
1652 1651 # panics when it sees sub/.hg/.
1653 1652 #
1654 1653 # All in all, checking against the working copy seems sensible
1655 1654 # since we want to prevent access to nested repositories on
1656 1655 # the filesystem *now*.
1657 1656 ctx = self[None]
1658 1657 parts = util.splitpath(subpath)
1659 1658 while parts:
1660 1659 prefix = b'/'.join(parts)
1661 1660 if prefix in ctx.substate:
1662 1661 if prefix == normsubpath:
1663 1662 return True
1664 1663 else:
1665 1664 sub = ctx.sub(prefix)
1666 1665 return sub.checknested(subpath[len(prefix) + 1 :])
1667 1666 else:
1668 1667 parts.pop()
1669 1668 return False
1670 1669
1671 1670 def peer(self, path=None, remotehidden=False):
1672 1671 return localpeer(
1673 1672 self, path=path, remotehidden=remotehidden
1674 1673 ) # not cached to avoid reference cycle
1675 1674
1676 1675 def unfiltered(self):
1677 1676 """Return unfiltered version of the repository
1678 1677
1679 1678 Intended to be overwritten by filtered repo."""
1680 1679 return self
1681 1680
1682 1681 def filtered(self, name, visibilityexceptions=None):
1683 1682 """Return a filtered version of a repository
1684 1683
1685 1684 The `name` parameter is the identifier of the requested view. This
1686 1685 will return a repoview object set "exactly" to the specified view.
1687 1686
1688 1687 This function does not apply recursive filtering to a repository. For
1689 1688 example calling `repo.filtered("served")` will return a repoview using
1690 1689 the "served" view, regardless of the initial view used by `repo`.
1691 1690
1692 1691 In other word, there is always only one level of `repoview` "filtering".
1693 1692 """
1694 1693 if self._extrafilterid is not None and b'%' not in name:
1695 1694 name = name + b'%' + self._extrafilterid
1696 1695
1697 1696 cls = repoview.newtype(self.unfiltered().__class__)
1698 1697 return cls(self, name, visibilityexceptions)
1699 1698
1700 1699 @mixedrepostorecache(
1701 1700 (b'bookmarks', b'plain'),
1702 1701 (b'bookmarks.current', b'plain'),
1703 1702 (b'bookmarks', b''),
1704 1703 (b'00changelog.i', b''),
1705 1704 )
1706 1705 def _bookmarks(self):
1707 1706 # Since the multiple files involved in the transaction cannot be
1708 1707 # written atomically (with current repository format), there is a race
1709 1708 # condition here.
1710 1709 #
1711 1710 # 1) changelog content A is read
1712 1711 # 2) outside transaction update changelog to content B
1713 1712 # 3) outside transaction update bookmark file referring to content B
1714 1713 # 4) bookmarks file content is read and filtered against changelog-A
1715 1714 #
1716 1715 # When this happens, bookmarks against nodes missing from A are dropped.
1717 1716 #
1718 1717 # Having this happening during read is not great, but it become worse
1719 1718 # when this happen during write because the bookmarks to the "unknown"
1720 1719 # nodes will be dropped for good. However, writes happen within locks.
1721 1720 # This locking makes it possible to have a race free consistent read.
1722 1721 # For this purpose data read from disc before locking are
1723 1722 # "invalidated" right after the locks are taken. This invalidations are
1724 1723 # "light", the `filecache` mechanism keep the data in memory and will
1725 1724 # reuse them if the underlying files did not changed. Not parsing the
1726 1725 # same data multiple times helps performances.
1727 1726 #
1728 1727 # Unfortunately in the case describe above, the files tracked by the
1729 1728 # bookmarks file cache might not have changed, but the in-memory
1730 1729 # content is still "wrong" because we used an older changelog content
1731 1730 # to process the on-disk data. So after locking, the changelog would be
1732 1731 # refreshed but `_bookmarks` would be preserved.
1733 1732 # Adding `00changelog.i` to the list of tracked file is not
1734 1733 # enough, because at the time we build the content for `_bookmarks` in
1735 1734 # (4), the changelog file has already diverged from the content used
1736 1735 # for loading `changelog` in (1)
1737 1736 #
1738 1737 # To prevent the issue, we force the changelog to be explicitly
1739 1738 # reloaded while computing `_bookmarks`. The data race can still happen
1740 1739 # without the lock (with a narrower window), but it would no longer go
1741 1740 # undetected during the lock time refresh.
1742 1741 #
1743 1742 # The new schedule is as follow
1744 1743 #
1745 1744 # 1) filecache logic detect that `_bookmarks` needs to be computed
1746 1745 # 2) cachestat for `bookmarks` and `changelog` are captured (for book)
1747 1746 # 3) We force `changelog` filecache to be tested
1748 1747 # 4) cachestat for `changelog` are captured (for changelog)
1749 1748 # 5) `_bookmarks` is computed and cached
1750 1749 #
1751 1750 # The step in (3) ensure we have a changelog at least as recent as the
1752 1751 # cache stat computed in (1). As a result at locking time:
1753 1752 # * if the changelog did not changed since (1) -> we can reuse the data
1754 1753 # * otherwise -> the bookmarks get refreshed.
1755 1754 self._refreshchangelog()
1756 1755 return bookmarks.bmstore(self)
1757 1756
1758 1757 def _refreshchangelog(self):
1759 1758 """make sure the in memory changelog match the on-disk one"""
1760 1759 if 'changelog' in vars(self) and self.currenttransaction() is None:
1761 1760 del self.changelog
1762 1761
1763 1762 @property
1764 1763 def _activebookmark(self):
1765 1764 return self._bookmarks.active
1766 1765
1767 1766 # _phasesets depend on changelog. what we need is to call
1768 1767 # _phasecache.invalidate() if '00changelog.i' was changed, but it
1769 1768 # can't be easily expressed in filecache mechanism.
1770 1769 @storecache(b'phaseroots', b'00changelog.i')
1771 1770 def _phasecache(self):
1772 1771 return phases.phasecache(self, self._phasedefaults)
1773 1772
1774 1773 @storecache(b'obsstore')
1775 1774 def obsstore(self):
1776 1775 return obsolete.makestore(self.ui, self)
1777 1776
1778 1777 @changelogcache()
1779 1778 def changelog(repo):
1780 1779 # load dirstate before changelog to avoid race see issue6303
1781 1780 repo.dirstate.prefetch_parents()
1782 1781 return repo.store.changelog(
1783 1782 txnutil.mayhavepending(repo.root),
1784 1783 concurrencychecker=revlogchecker.get_checker(repo.ui, b'changelog'),
1785 1784 )
1786 1785
1787 1786 @manifestlogcache()
1788 1787 def manifestlog(self):
1789 1788 return self.store.manifestlog(self, self._storenarrowmatch)
1790 1789
1791 1790 @unfilteredpropertycache
1792 1791 def dirstate(self):
1793 1792 if self._dirstate is None:
1794 1793 self._dirstate = self._makedirstate()
1795 1794 else:
1796 1795 self._dirstate.refresh()
1797 1796 return self._dirstate
1798 1797
1799 1798 def _makedirstate(self):
1800 1799 """Extension point for wrapping the dirstate per-repo."""
1801 1800 sparsematchfn = None
1802 1801 if sparse.use_sparse(self):
1803 1802 sparsematchfn = lambda: sparse.matcher(self)
1804 1803 v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT
1805 1804 th = requirementsmod.DIRSTATE_TRACKED_HINT_V1
1806 1805 use_dirstate_v2 = v2_req in self.requirements
1807 1806 use_tracked_hint = th in self.requirements
1808 1807
1809 1808 return dirstate.dirstate(
1810 1809 self.vfs,
1811 1810 self.ui,
1812 1811 self.root,
1813 1812 self._dirstatevalidate,
1814 1813 sparsematchfn,
1815 1814 self.nodeconstants,
1816 1815 use_dirstate_v2,
1817 1816 use_tracked_hint=use_tracked_hint,
1818 1817 )
1819 1818
1820 1819 def _dirstatevalidate(self, node):
1821 1820 okay = True
1822 1821 try:
1823 1822 self.changelog.rev(node)
1824 1823 except error.LookupError:
1825 1824 # If the parent are unknown it might just be because the changelog
1826 1825 # in memory is lagging behind the dirstate in memory. So try to
1827 1826 # refresh the changelog first.
1828 1827 #
1829 1828 # We only do so if we don't hold the lock, if we do hold the lock
1830 1829 # the invalidation at that time should have taken care of this and
1831 1830 # something is very fishy.
1832 1831 if self.currentlock() is None:
1833 1832 self.invalidate()
1834 1833 try:
1835 1834 self.changelog.rev(node)
1836 1835 except error.LookupError:
1837 1836 okay = False
1838 1837 else:
1839 1838 # XXX we should consider raising an error here.
1840 1839 okay = False
1841 1840 if okay:
1842 1841 return node
1843 1842 else:
1844 1843 if not self._dirstatevalidatewarned:
1845 1844 self._dirstatevalidatewarned = True
1846 1845 self.ui.warn(
1847 1846 _(b"warning: ignoring unknown working parent %s!\n")
1848 1847 % short(node)
1849 1848 )
1850 1849 return self.nullid
1851 1850
1852 1851 @storecache(narrowspec.FILENAME)
1853 1852 def narrowpats(self):
1854 1853 """matcher patterns for this repository's narrowspec
1855 1854
1856 1855 A tuple of (includes, excludes).
1857 1856 """
1858 1857 # the narrow management should probably move into its own object
1859 1858 val = self._pending_narrow_pats
1860 1859 if val is None:
1861 1860 val = narrowspec.load(self)
1862 1861 return val
1863 1862
1864 1863 @storecache(narrowspec.FILENAME)
1865 1864 def _storenarrowmatch(self):
1866 1865 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1867 1866 return matchmod.always()
1868 1867 include, exclude = self.narrowpats
1869 1868 return narrowspec.match(self.root, include=include, exclude=exclude)
1870 1869
1871 1870 @storecache(narrowspec.FILENAME)
1872 1871 def _narrowmatch(self):
1873 1872 if requirementsmod.NARROW_REQUIREMENT not in self.requirements:
1874 1873 return matchmod.always()
1875 1874 narrowspec.checkworkingcopynarrowspec(self)
1876 1875 include, exclude = self.narrowpats
1877 1876 return narrowspec.match(self.root, include=include, exclude=exclude)
1878 1877
1879 1878 def narrowmatch(self, match=None, includeexact=False):
1880 1879 """matcher corresponding the the repo's narrowspec
1881 1880
1882 1881 If `match` is given, then that will be intersected with the narrow
1883 1882 matcher.
1884 1883
1885 1884 If `includeexact` is True, then any exact matches from `match` will
1886 1885 be included even if they're outside the narrowspec.
1887 1886 """
1888 1887 if match:
1889 1888 if includeexact and not self._narrowmatch.always():
1890 1889 # do not exclude explicitly-specified paths so that they can
1891 1890 # be warned later on
1892 1891 em = matchmod.exact(match.files())
1893 1892 nm = matchmod.unionmatcher([self._narrowmatch, em])
1894 1893 return matchmod.intersectmatchers(match, nm)
1895 1894 return matchmod.intersectmatchers(match, self._narrowmatch)
1896 1895 return self._narrowmatch
1897 1896
1898 1897 def setnarrowpats(self, newincludes, newexcludes):
1899 1898 narrowspec.save(self, newincludes, newexcludes)
1900 1899 self.invalidate(clearfilecache=True)
1901 1900
1902 1901 @unfilteredpropertycache
1903 1902 def _quick_access_changeid_null(self):
1904 1903 return {
1905 1904 b'null': (nullrev, self.nodeconstants.nullid),
1906 1905 nullrev: (nullrev, self.nodeconstants.nullid),
1907 1906 self.nullid: (nullrev, self.nullid),
1908 1907 }
1909 1908
1910 1909 @unfilteredpropertycache
1911 1910 def _quick_access_changeid_wc(self):
1912 1911 # also fast path access to the working copy parents
1913 1912 # however, only do it for filter that ensure wc is visible.
1914 1913 quick = self._quick_access_changeid_null.copy()
1915 1914 cl = self.unfiltered().changelog
1916 1915 for node in self.dirstate.parents():
1917 1916 if node == self.nullid:
1918 1917 continue
1919 1918 rev = cl.index.get_rev(node)
1920 1919 if rev is None:
1921 1920 # unknown working copy parent case:
1922 1921 #
1923 1922 # skip the fast path and let higher code deal with it
1924 1923 continue
1925 1924 pair = (rev, node)
1926 1925 quick[rev] = pair
1927 1926 quick[node] = pair
1928 1927 # also add the parents of the parents
1929 1928 for r in cl.parentrevs(rev):
1930 1929 if r == nullrev:
1931 1930 continue
1932 1931 n = cl.node(r)
1933 1932 pair = (r, n)
1934 1933 quick[r] = pair
1935 1934 quick[n] = pair
1936 1935 p1node = self.dirstate.p1()
1937 1936 if p1node != self.nullid:
1938 1937 quick[b'.'] = quick[p1node]
1939 1938 return quick
1940 1939
1941 1940 @unfilteredmethod
1942 1941 def _quick_access_changeid_invalidate(self):
1943 1942 if '_quick_access_changeid_wc' in vars(self):
1944 1943 del self.__dict__['_quick_access_changeid_wc']
1945 1944
1946 1945 @property
1947 1946 def _quick_access_changeid(self):
1948 1947 """an helper dictionnary for __getitem__ calls
1949 1948
1950 1949 This contains a list of symbol we can recognise right away without
1951 1950 further processing.
1952 1951 """
1953 1952 if self.filtername in repoview.filter_has_wc:
1954 1953 return self._quick_access_changeid_wc
1955 1954 return self._quick_access_changeid_null
1956 1955
1957 1956 def __getitem__(self, changeid):
1958 1957 # dealing with special cases
1959 1958 if changeid is None:
1960 1959 return context.workingctx(self)
1961 1960 if isinstance(changeid, context.basectx):
1962 1961 return changeid
1963 1962
1964 1963 # dealing with multiple revisions
1965 1964 if isinstance(changeid, slice):
1966 1965 # wdirrev isn't contiguous so the slice shouldn't include it
1967 1966 return [
1968 1967 self[i]
1969 1968 for i in range(*changeid.indices(len(self)))
1970 1969 if i not in self.changelog.filteredrevs
1971 1970 ]
1972 1971
1973 1972 # dealing with some special values
1974 1973 quick_access = self._quick_access_changeid.get(changeid)
1975 1974 if quick_access is not None:
1976 1975 rev, node = quick_access
1977 1976 return context.changectx(self, rev, node, maybe_filtered=False)
1978 1977 if changeid == b'tip':
1979 1978 node = self.changelog.tip()
1980 1979 rev = self.changelog.rev(node)
1981 1980 return context.changectx(self, rev, node)
1982 1981
1983 1982 # dealing with arbitrary values
1984 1983 try:
1985 1984 if isinstance(changeid, int):
1986 1985 node = self.changelog.node(changeid)
1987 1986 rev = changeid
1988 1987 elif changeid == b'.':
1989 1988 # this is a hack to delay/avoid loading obsmarkers
1990 1989 # when we know that '.' won't be hidden
1991 1990 node = self.dirstate.p1()
1992 1991 rev = self.unfiltered().changelog.rev(node)
1993 1992 elif len(changeid) == self.nodeconstants.nodelen:
1994 1993 try:
1995 1994 node = changeid
1996 1995 rev = self.changelog.rev(changeid)
1997 1996 except error.FilteredLookupError:
1998 1997 changeid = hex(changeid) # for the error message
1999 1998 raise
2000 1999 except LookupError:
2001 2000 # check if it might have come from damaged dirstate
2002 2001 #
2003 2002 # XXX we could avoid the unfiltered if we had a recognizable
2004 2003 # exception for filtered changeset access
2005 2004 if (
2006 2005 self.local()
2007 2006 and changeid in self.unfiltered().dirstate.parents()
2008 2007 ):
2009 2008 msg = _(b"working directory has unknown parent '%s'!")
2010 2009 raise error.Abort(msg % short(changeid))
2011 2010 changeid = hex(changeid) # for the error message
2012 2011 raise
2013 2012
2014 2013 elif len(changeid) == 2 * self.nodeconstants.nodelen:
2015 2014 node = bin(changeid)
2016 2015 rev = self.changelog.rev(node)
2017 2016 else:
2018 2017 raise error.ProgrammingError(
2019 2018 b"unsupported changeid '%s' of type %s"
2020 2019 % (changeid, pycompat.bytestr(type(changeid)))
2021 2020 )
2022 2021
2023 2022 return context.changectx(self, rev, node)
2024 2023
2025 2024 except (error.FilteredIndexError, error.FilteredLookupError):
2026 2025 raise error.FilteredRepoLookupError(
2027 2026 _(b"filtered revision '%s'") % pycompat.bytestr(changeid)
2028 2027 )
2029 2028 except (IndexError, LookupError):
2030 2029 raise error.RepoLookupError(
2031 2030 _(b"unknown revision '%s'") % pycompat.bytestr(changeid)
2032 2031 )
2033 2032 except error.WdirUnsupported:
2034 2033 return context.workingctx(self)
2035 2034
2036 2035 def __contains__(self, changeid):
2037 2036 """True if the given changeid exists"""
2038 2037 try:
2039 2038 self[changeid]
2040 2039 return True
2041 2040 except error.RepoLookupError:
2042 2041 return False
2043 2042
2044 2043 def __nonzero__(self):
2045 2044 return True
2046 2045
2047 2046 __bool__ = __nonzero__
2048 2047
2049 2048 def __len__(self):
2050 2049 # no need to pay the cost of repoview.changelog
2051 2050 unfi = self.unfiltered()
2052 2051 return len(unfi.changelog)
2053 2052
2054 2053 def __iter__(self):
2055 2054 return iter(self.changelog)
2056 2055
2057 2056 def revs(self, expr: bytes, *args):
2058 2057 """Find revisions matching a revset.
2059 2058
2060 2059 The revset is specified as a string ``expr`` that may contain
2061 2060 %-formatting to escape certain types. See ``revsetlang.formatspec``.
2062 2061
2063 2062 Revset aliases from the configuration are not expanded. To expand
2064 2063 user aliases, consider calling ``scmutil.revrange()`` or
2065 2064 ``repo.anyrevs([expr], user=True)``.
2066 2065
2067 2066 Returns a smartset.abstractsmartset, which is a list-like interface
2068 2067 that contains integer revisions.
2069 2068 """
2070 2069 tree = revsetlang.spectree(expr, *args)
2071 2070 return revset.makematcher(tree)(self)
2072 2071
2073 2072 def set(self, expr: bytes, *args):
2074 2073 """Find revisions matching a revset and emit changectx instances.
2075 2074
2076 2075 This is a convenience wrapper around ``revs()`` that iterates the
2077 2076 result and is a generator of changectx instances.
2078 2077
2079 2078 Revset aliases from the configuration are not expanded. To expand
2080 2079 user aliases, consider calling ``scmutil.revrange()``.
2081 2080 """
2082 2081 for r in self.revs(expr, *args):
2083 2082 yield self[r]
2084 2083
2085 2084 def anyrevs(self, specs: bytes, user=False, localalias=None):
2086 2085 """Find revisions matching one of the given revsets.
2087 2086
2088 2087 Revset aliases from the configuration are not expanded by default. To
2089 2088 expand user aliases, specify ``user=True``. To provide some local
2090 2089 definitions overriding user aliases, set ``localalias`` to
2091 2090 ``{name: definitionstring}``.
2092 2091 """
2093 2092 if specs == [b'null']:
2094 2093 return revset.baseset([nullrev])
2095 2094 if specs == [b'.']:
2096 2095 quick_data = self._quick_access_changeid.get(b'.')
2097 2096 if quick_data is not None:
2098 2097 return revset.baseset([quick_data[0]])
2099 2098 if user:
2100 2099 m = revset.matchany(
2101 2100 self.ui,
2102 2101 specs,
2103 2102 lookup=revset.lookupfn(self),
2104 2103 localalias=localalias,
2105 2104 )
2106 2105 else:
2107 2106 m = revset.matchany(None, specs, localalias=localalias)
2108 2107 return m(self)
2109 2108
2110 2109 def url(self) -> bytes:
2111 2110 return b'file:' + self.root
2112 2111
2113 2112 def hook(self, name, throw=False, **args):
2114 2113 """Call a hook, passing this repo instance.
2115 2114
2116 2115 This a convenience method to aid invoking hooks. Extensions likely
2117 2116 won't call this unless they have registered a custom hook or are
2118 2117 replacing code that is expected to call a hook.
2119 2118 """
2120 2119 return hook.hook(self.ui, self, name, throw, **args)
2121 2120
2122 2121 @filteredpropertycache
2123 2122 def _tagscache(self):
2124 2123 """Returns a tagscache object that contains various tags related
2125 2124 caches."""
2126 2125
2127 2126 # This simplifies its cache management by having one decorated
2128 2127 # function (this one) and the rest simply fetch things from it.
2129 2128 class tagscache:
2130 2129 def __init__(self):
2131 2130 # These two define the set of tags for this repository. tags
2132 2131 # maps tag name to node; tagtypes maps tag name to 'global' or
2133 2132 # 'local'. (Global tags are defined by .hgtags across all
2134 2133 # heads, and local tags are defined in .hg/localtags.)
2135 2134 # They constitute the in-memory cache of tags.
2136 2135 self.tags = self.tagtypes = None
2137 2136
2138 2137 self.nodetagscache = self.tagslist = None
2139 2138
2140 2139 cache = tagscache()
2141 2140 cache.tags, cache.tagtypes = self._findtags()
2142 2141
2143 2142 return cache
2144 2143
2145 2144 def tags(self):
2146 2145 '''return a mapping of tag to node'''
2147 2146 t = {}
2148 2147 if self.changelog.filteredrevs:
2149 2148 tags, tt = self._findtags()
2150 2149 else:
2151 2150 tags = self._tagscache.tags
2152 2151 rev = self.changelog.rev
2153 2152 for k, v in tags.items():
2154 2153 try:
2155 2154 # ignore tags to unknown nodes
2156 2155 rev(v)
2157 2156 t[k] = v
2158 2157 except (error.LookupError, ValueError):
2159 2158 pass
2160 2159 return t
2161 2160
2162 2161 def _findtags(self):
2163 2162 """Do the hard work of finding tags. Return a pair of dicts
2164 2163 (tags, tagtypes) where tags maps tag name to node, and tagtypes
2165 2164 maps tag name to a string like \'global\' or \'local\'.
2166 2165 Subclasses or extensions are free to add their own tags, but
2167 2166 should be aware that the returned dicts will be retained for the
2168 2167 duration of the localrepo object."""
2169 2168
2170 2169 # XXX what tagtype should subclasses/extensions use? Currently
2171 2170 # mq and bookmarks add tags, but do not set the tagtype at all.
2172 2171 # Should each extension invent its own tag type? Should there
2173 2172 # be one tagtype for all such "virtual" tags? Or is the status
2174 2173 # quo fine?
2175 2174
2176 2175 # map tag name to (node, hist)
2177 2176 alltags = tagsmod.findglobaltags(self.ui, self)
2178 2177 # map tag name to tag type
2179 2178 tagtypes = {tag: b'global' for tag in alltags}
2180 2179
2181 2180 tagsmod.readlocaltags(self.ui, self, alltags, tagtypes)
2182 2181
2183 2182 # Build the return dicts. Have to re-encode tag names because
2184 2183 # the tags module always uses UTF-8 (in order not to lose info
2185 2184 # writing to the cache), but the rest of Mercurial wants them in
2186 2185 # local encoding.
2187 2186 tags = {}
2188 2187 for name, (node, hist) in alltags.items():
2189 2188 if node != self.nullid:
2190 2189 tags[encoding.tolocal(name)] = node
2191 2190 tags[b'tip'] = self.changelog.tip()
2192 2191 tagtypes = {
2193 2192 encoding.tolocal(name): value for (name, value) in tagtypes.items()
2194 2193 }
2195 2194 return (tags, tagtypes)
2196 2195
2197 2196 def tagtype(self, tagname):
2198 2197 """
2199 2198 return the type of the given tag. result can be:
2200 2199
2201 2200 'local' : a local tag
2202 2201 'global' : a global tag
2203 2202 None : tag does not exist
2204 2203 """
2205 2204
2206 2205 return self._tagscache.tagtypes.get(tagname)
2207 2206
2208 2207 def tagslist(self):
2209 2208 '''return a list of tags ordered by revision'''
2210 2209 if not self._tagscache.tagslist:
2211 2210 l = []
2212 2211 for t, n in self.tags().items():
2213 2212 l.append((self.changelog.rev(n), t, n))
2214 2213 self._tagscache.tagslist = [(t, n) for r, t, n in sorted(l)]
2215 2214
2216 2215 return self._tagscache.tagslist
2217 2216
2218 2217 def nodetags(self, node):
2219 2218 '''return the tags associated with a node'''
2220 2219 if not self._tagscache.nodetagscache:
2221 2220 nodetagscache = {}
2222 2221 for t, n in self._tagscache.tags.items():
2223 2222 nodetagscache.setdefault(n, []).append(t)
2224 2223 for tags in nodetagscache.values():
2225 2224 tags.sort()
2226 2225 self._tagscache.nodetagscache = nodetagscache
2227 2226 return self._tagscache.nodetagscache.get(node, [])
2228 2227
2229 2228 def nodebookmarks(self, node):
2230 2229 """return the list of bookmarks pointing to the specified node"""
2231 2230 return self._bookmarks.names(node)
2232 2231
2233 2232 def branchmap(self):
2234 2233 """returns a dictionary {branch: [branchheads]} with branchheads
2235 2234 ordered by increasing revision number"""
2236 2235 return self._branchcaches[self]
2237 2236
2238 2237 @unfilteredmethod
2239 2238 def revbranchcache(self):
2240 2239 if not self._revbranchcache:
2241 2240 self._revbranchcache = branchmap.revbranchcache(self.unfiltered())
2242 2241 return self._revbranchcache
2243 2242
2244 2243 def register_changeset(self, rev, changelogrevision):
2245 2244 self.revbranchcache().setdata(rev, changelogrevision)
2246 2245
2247 2246 def branchtip(self, branch, ignoremissing=False):
2248 2247 """return the tip node for a given branch
2249 2248
2250 2249 If ignoremissing is True, then this method will not raise an error.
2251 2250 This is helpful for callers that only expect None for a missing branch
2252 2251 (e.g. namespace).
2253 2252
2254 2253 """
2255 2254 try:
2256 2255 return self.branchmap().branchtip(branch)
2257 2256 except KeyError:
2258 2257 if not ignoremissing:
2259 2258 raise error.RepoLookupError(_(b"unknown branch '%s'") % branch)
2260 2259 else:
2261 2260 pass
2262 2261
2263 2262 def lookup(self, key):
2264 2263 node = scmutil.revsymbol(self, key).node()
2265 2264 if node is None:
2266 2265 raise error.RepoLookupError(_(b"unknown revision '%s'") % key)
2267 2266 return node
2268 2267
2269 2268 def lookupbranch(self, key):
2270 2269 if self.branchmap().hasbranch(key):
2271 2270 return key
2272 2271
2273 2272 return scmutil.revsymbol(self, key).branch()
2274 2273
2275 2274 def known(self, nodes):
2276 2275 cl = self.changelog
2277 2276 get_rev = cl.index.get_rev
2278 2277 filtered = cl.filteredrevs
2279 2278 result = []
2280 2279 for n in nodes:
2281 2280 r = get_rev(n)
2282 2281 resp = not (r is None or r in filtered)
2283 2282 result.append(resp)
2284 2283 return result
2285 2284
2286 2285 def local(self):
2287 2286 return self
2288 2287
2289 2288 def publishing(self):
2290 2289 # it's safe (and desirable) to trust the publish flag unconditionally
2291 2290 # so that we don't finalize changes shared between users via ssh or nfs
2292 2291 return self.ui.configbool(b'phases', b'publish', untrusted=True)
2293 2292
2294 2293 def cancopy(self):
2295 2294 # so statichttprepo's override of local() works
2296 2295 if not self.local():
2297 2296 return False
2298 2297 if not self.publishing():
2299 2298 return True
2300 2299 # if publishing we can't copy if there is filtered content
2301 2300 return not self.filtered(b'visible').changelog.filteredrevs
2302 2301
2303 2302 def shared(self):
2304 2303 '''the type of shared repository (None if not shared)'''
2305 2304 if self.sharedpath != self.path:
2306 2305 return b'store'
2307 2306 return None
2308 2307
2309 2308 def wjoin(self, f: bytes, *insidef: bytes) -> bytes:
2310 2309 return self.vfs.reljoin(self.root, f, *insidef)
2311 2310
2312 2311 def setparents(self, p1, p2=None):
2313 2312 if p2 is None:
2314 2313 p2 = self.nullid
2315 2314 self[None].setparents(p1, p2)
2316 2315 self._quick_access_changeid_invalidate()
2317 2316
2318 2317 def filectx(self, path: bytes, changeid=None, fileid=None, changectx=None):
2319 2318 """changeid must be a changeset revision, if specified.
2320 2319 fileid can be a file revision or node."""
2321 2320 return context.filectx(
2322 2321 self, path, changeid, fileid, changectx=changectx
2323 2322 )
2324 2323
2325 2324 def getcwd(self) -> bytes:
2326 2325 return self.dirstate.getcwd()
2327 2326
2328 2327 def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
2329 2328 return self.dirstate.pathto(f, cwd)
2330 2329
2331 2330 def _loadfilter(self, filter):
2332 2331 if filter not in self._filterpats:
2333 2332 l = []
2334 2333 for pat, cmd in self.ui.configitems(filter):
2335 2334 if cmd == b'!':
2336 2335 continue
2337 2336 mf = matchmod.match(self.root, b'', [pat])
2338 2337 fn = None
2339 2338 params = cmd
2340 2339 for name, filterfn in self._datafilters.items():
2341 2340 if cmd.startswith(name):
2342 2341 fn = filterfn
2343 2342 params = cmd[len(name) :].lstrip()
2344 2343 break
2345 2344 if not fn:
2346 2345 fn = lambda s, c, **kwargs: procutil.filter(s, c)
2347 2346 fn.__name__ = 'commandfilter'
2348 2347 # Wrap old filters not supporting keyword arguments
2349 2348 if not pycompat.getargspec(fn)[2]:
2350 2349 oldfn = fn
2351 2350 fn = lambda s, c, oldfn=oldfn, **kwargs: oldfn(s, c)
2352 2351 fn.__name__ = 'compat-' + oldfn.__name__
2353 2352 l.append((mf, fn, params))
2354 2353 self._filterpats[filter] = l
2355 2354 return self._filterpats[filter]
2356 2355
2357 2356 def _filter(self, filterpats, filename, data):
2358 2357 for mf, fn, cmd in filterpats:
2359 2358 if mf(filename):
2360 2359 self.ui.debug(
2361 2360 b"filtering %s through %s\n"
2362 2361 % (filename, cmd or pycompat.sysbytes(fn.__name__))
2363 2362 )
2364 2363 data = fn(data, cmd, ui=self.ui, repo=self, filename=filename)
2365 2364 break
2366 2365
2367 2366 return data
2368 2367
2369 2368 @unfilteredpropertycache
2370 2369 def _encodefilterpats(self):
2371 2370 return self._loadfilter(b'encode')
2372 2371
2373 2372 @unfilteredpropertycache
2374 2373 def _decodefilterpats(self):
2375 2374 return self._loadfilter(b'decode')
2376 2375
2377 2376 def adddatafilter(self, name, filter):
2378 2377 self._datafilters[name] = filter
2379 2378
2380 2379 def wread(self, filename: bytes) -> bytes:
2381 2380 if self.wvfs.islink(filename):
2382 2381 data = self.wvfs.readlink(filename)
2383 2382 else:
2384 2383 data = self.wvfs.read(filename)
2385 2384 return self._filter(self._encodefilterpats, filename, data)
2386 2385
2387 2386 def wwrite(
2388 2387 self,
2389 2388 filename: bytes,
2390 2389 data: bytes,
2391 2390 flags: bytes,
2392 2391 backgroundclose=False,
2393 2392 **kwargs
2394 2393 ) -> int:
2395 2394 """write ``data`` into ``filename`` in the working directory
2396 2395
2397 2396 This returns length of written (maybe decoded) data.
2398 2397 """
2399 2398 data = self._filter(self._decodefilterpats, filename, data)
2400 2399 if b'l' in flags:
2401 2400 self.wvfs.symlink(data, filename)
2402 2401 else:
2403 2402 self.wvfs.write(
2404 2403 filename, data, backgroundclose=backgroundclose, **kwargs
2405 2404 )
2406 2405 if b'x' in flags:
2407 2406 self.wvfs.setflags(filename, False, True)
2408 2407 else:
2409 2408 self.wvfs.setflags(filename, False, False)
2410 2409 return len(data)
2411 2410
2412 2411 def wwritedata(self, filename: bytes, data: bytes) -> bytes:
2413 2412 return self._filter(self._decodefilterpats, filename, data)
2414 2413
2415 2414 def currenttransaction(self):
2416 2415 """return the current transaction or None if non exists"""
2417 2416 if self._transref:
2418 2417 tr = self._transref()
2419 2418 else:
2420 2419 tr = None
2421 2420
2422 2421 if tr and tr.running():
2423 2422 return tr
2424 2423 return None
2425 2424
2426 2425 def transaction(self, desc, report=None):
2427 2426 if self.ui.configbool(b'devel', b'all-warnings') or self.ui.configbool(
2428 2427 b'devel', b'check-locks'
2429 2428 ):
2430 2429 if self._currentlock(self._lockref) is None:
2431 2430 raise error.ProgrammingError(b'transaction requires locking')
2432 2431 tr = self.currenttransaction()
2433 2432 if tr is not None:
2434 2433 return tr.nest(name=desc)
2435 2434
2436 2435 # abort here if the journal already exists
2437 2436 if self.svfs.exists(b"journal"):
2438 2437 raise error.RepoError(
2439 2438 _(b"abandoned transaction found"),
2440 2439 hint=_(b"run 'hg recover' to clean up transaction"),
2441 2440 )
2442 2441
2443 2442 # At that point your dirstate should be clean:
2444 2443 #
2445 2444 # - If you don't have the wlock, why would you still have a dirty
2446 2445 # dirstate ?
2447 2446 #
2448 2447 # - If you hold the wlock, you should not be opening a transaction in
2449 2448 # the middle of a `distate.changing_*` block. The transaction needs to
2450 2449 # be open before that and wrap the change-context.
2451 2450 #
2452 2451 # - If you are not within a `dirstate.changing_*` context, why is our
2453 2452 # dirstate dirty?
2454 2453 if self.dirstate._dirty:
2455 2454 m = "cannot open a transaction with a dirty dirstate"
2456 2455 raise error.ProgrammingError(m)
2457 2456
2458 2457 idbase = b"%.40f#%f" % (random.random(), time.time())
2459 2458 ha = hex(hashutil.sha1(idbase).digest())
2460 2459 txnid = b'TXN:' + ha
2461 2460 self.hook(b'pretxnopen', throw=True, txnname=desc, txnid=txnid)
2462 2461
2463 2462 self._writejournal(desc)
2464 2463 if report:
2465 2464 rp = report
2466 2465 else:
2467 2466 rp = self.ui.warn
2468 2467 vfsmap = self.vfs_map
2469 2468 # we must avoid cyclic reference between repo and transaction.
2470 2469 reporef = weakref.ref(self)
2471 2470 # Code to track tag movement
2472 2471 #
2473 2472 # Since tags are all handled as file content, it is actually quite hard
2474 2473 # to track these movement from a code perspective. So we fallback to a
2475 2474 # tracking at the repository level. One could envision to track changes
2476 2475 # to the '.hgtags' file through changegroup apply but that fails to
2477 2476 # cope with case where transaction expose new heads without changegroup
2478 2477 # being involved (eg: phase movement).
2479 2478 #
2480 2479 # For now, We gate the feature behind a flag since this likely comes
2481 2480 # with performance impacts. The current code run more often than needed
2482 2481 # and do not use caches as much as it could. The current focus is on
2483 2482 # the behavior of the feature so we disable it by default. The flag
2484 2483 # will be removed when we are happy with the performance impact.
2485 2484 #
2486 2485 # Once this feature is no longer experimental move the following
2487 2486 # documentation to the appropriate help section:
2488 2487 #
2489 2488 # The ``HG_TAG_MOVED`` variable will be set if the transaction touched
2490 2489 # tags (new or changed or deleted tags). In addition the details of
2491 2490 # these changes are made available in a file at:
2492 2491 # ``REPOROOT/.hg/changes/tags.changes``.
2493 2492 # Make sure you check for HG_TAG_MOVED before reading that file as it
2494 2493 # might exist from a previous transaction even if no tag were touched
2495 2494 # in this one. Changes are recorded in a line base format::
2496 2495 #
2497 2496 # <action> <hex-node> <tag-name>\n
2498 2497 #
2499 2498 # Actions are defined as follow:
2500 2499 # "-R": tag is removed,
2501 2500 # "+A": tag is added,
2502 2501 # "-M": tag is moved (old value),
2503 2502 # "+M": tag is moved (new value),
2504 2503 tracktags = lambda x: None
2505 2504 # experimental config: experimental.hook-track-tags
2506 2505 shouldtracktags = self.ui.configbool(
2507 2506 b'experimental', b'hook-track-tags'
2508 2507 )
2509 2508 if desc != b'strip' and shouldtracktags:
2510 2509 oldheads = self.changelog.headrevs()
2511 2510
2512 2511 def tracktags(tr2):
2513 2512 repo = reporef()
2514 2513 assert repo is not None # help pytype
2515 2514 oldfnodes = tagsmod.fnoderevs(repo.ui, repo, oldheads)
2516 2515 newheads = repo.changelog.headrevs()
2517 2516 newfnodes = tagsmod.fnoderevs(repo.ui, repo, newheads)
2518 2517 # notes: we compare lists here.
2519 2518 # As we do it only once buiding set would not be cheaper
2520 2519 changes = tagsmod.difftags(repo.ui, repo, oldfnodes, newfnodes)
2521 2520 if changes:
2522 2521 tr2.hookargs[b'tag_moved'] = b'1'
2523 2522 with repo.vfs(
2524 2523 b'changes/tags.changes', b'w', atomictemp=True
2525 2524 ) as changesfile:
2526 2525 # note: we do not register the file to the transaction
2527 2526 # because we needs it to still exist on the transaction
2528 2527 # is close (for txnclose hooks)
2529 2528 tagsmod.writediff(changesfile, changes)
2530 2529
2531 2530 def validate(tr2):
2532 2531 """will run pre-closing hooks"""
2533 2532 # XXX the transaction API is a bit lacking here so we take a hacky
2534 2533 # path for now
2535 2534 #
2536 2535 # We cannot add this as a "pending" hooks since the 'tr.hookargs'
2537 2536 # dict is copied before these run. In addition we needs the data
2538 2537 # available to in memory hooks too.
2539 2538 #
2540 2539 # Moreover, we also need to make sure this runs before txnclose
2541 2540 # hooks and there is no "pending" mechanism that would execute
2542 2541 # logic only if hooks are about to run.
2543 2542 #
2544 2543 # Fixing this limitation of the transaction is also needed to track
2545 2544 # other families of changes (bookmarks, phases, obsolescence).
2546 2545 #
2547 2546 # This will have to be fixed before we remove the experimental
2548 2547 # gating.
2549 2548 tracktags(tr2)
2550 2549 repo = reporef()
2551 2550 assert repo is not None # help pytype
2552 2551
2553 2552 singleheadopt = (b'experimental', b'single-head-per-branch')
2554 2553 singlehead = repo.ui.configbool(*singleheadopt)
2555 2554 if singlehead:
2556 2555 singleheadsub = repo.ui.configsuboptions(*singleheadopt)[1]
2557 2556 accountclosed = singleheadsub.get(
2558 2557 b"account-closed-heads", False
2559 2558 )
2560 2559 if singleheadsub.get(b"public-changes-only", False):
2561 2560 filtername = b"immutable"
2562 2561 else:
2563 2562 filtername = b"visible"
2564 2563 scmutil.enforcesinglehead(
2565 2564 repo, tr2, desc, accountclosed, filtername
2566 2565 )
2567 2566 if hook.hashook(repo.ui, b'pretxnclose-bookmark'):
2568 2567 for name, (old, new) in sorted(
2569 2568 tr.changes[b'bookmarks'].items()
2570 2569 ):
2571 2570 args = tr.hookargs.copy()
2572 2571 args.update(bookmarks.preparehookargs(name, old, new))
2573 2572 repo.hook(
2574 2573 b'pretxnclose-bookmark',
2575 2574 throw=True,
2576 2575 **pycompat.strkwargs(args)
2577 2576 )
2578 2577 if hook.hashook(repo.ui, b'pretxnclose-phase'):
2579 2578 cl = repo.unfiltered().changelog
2580 2579 for revs, (old, new) in tr.changes[b'phases']:
2581 2580 for rev in revs:
2582 2581 args = tr.hookargs.copy()
2583 2582 node = hex(cl.node(rev))
2584 2583 args.update(phases.preparehookargs(node, old, new))
2585 2584 repo.hook(
2586 2585 b'pretxnclose-phase',
2587 2586 throw=True,
2588 2587 **pycompat.strkwargs(args)
2589 2588 )
2590 2589
2591 2590 repo.hook(
2592 2591 b'pretxnclose', throw=True, **pycompat.strkwargs(tr.hookargs)
2593 2592 )
2594 2593
2595 2594 def releasefn(tr, success):
2596 2595 repo = reporef()
2597 2596 if repo is None:
2598 2597 # If the repo has been GC'd (and this release function is being
2599 2598 # called from transaction.__del__), there's not much we can do,
2600 2599 # so just leave the unfinished transaction there and let the
2601 2600 # user run `hg recover`.
2602 2601 return
2603 2602 if success:
2604 2603 # this should be explicitly invoked here, because
2605 2604 # in-memory changes aren't written out at closing
2606 2605 # transaction, if tr.addfilegenerator (via
2607 2606 # dirstate.write or so) isn't invoked while
2608 2607 # transaction running
2609 2608 repo.dirstate.write(None)
2610 2609 else:
2611 2610 # discard all changes (including ones already written
2612 2611 # out) in this transaction
2613 2612 repo.invalidate(clearfilecache=True)
2614 2613
2615 2614 tr = transaction.transaction(
2616 2615 rp,
2617 2616 self.svfs,
2618 2617 vfsmap,
2619 2618 b"journal",
2620 2619 b"undo",
2621 2620 lambda: None,
2622 2621 self.store.createmode,
2623 2622 validator=validate,
2624 2623 releasefn=releasefn,
2625 2624 checkambigfiles=_cachedfiles,
2626 2625 name=desc,
2627 2626 )
2628 2627 for vfs_id, path in self._journalfiles():
2629 2628 tr.add_journal(vfs_id, path)
2630 2629 tr.changes[b'origrepolen'] = len(self)
2631 2630 tr.changes[b'obsmarkers'] = set()
2632 2631 tr.changes[b'phases'] = []
2633 2632 tr.changes[b'bookmarks'] = {}
2634 2633
2635 2634 tr.hookargs[b'txnid'] = txnid
2636 2635 tr.hookargs[b'txnname'] = desc
2637 2636 tr.hookargs[b'changes'] = tr.changes
2638 2637 # note: writing the fncache only during finalize mean that the file is
2639 2638 # outdated when running hooks. As fncache is used for streaming clone,
2640 2639 # this is not expected to break anything that happen during the hooks.
2641 2640 tr.addfinalize(b'flush-fncache', self.store.write)
2642 2641
2643 2642 def txnclosehook(tr2):
2644 2643 """To be run if transaction is successful, will schedule a hook run"""
2645 2644 # Don't reference tr2 in hook() so we don't hold a reference.
2646 2645 # This reduces memory consumption when there are multiple
2647 2646 # transactions per lock. This can likely go away if issue5045
2648 2647 # fixes the function accumulation.
2649 2648 hookargs = tr2.hookargs
2650 2649
2651 2650 def hookfunc(unused_success):
2652 2651 repo = reporef()
2653 2652 assert repo is not None # help pytype
2654 2653
2655 2654 if hook.hashook(repo.ui, b'txnclose-bookmark'):
2656 2655 bmchanges = sorted(tr.changes[b'bookmarks'].items())
2657 2656 for name, (old, new) in bmchanges:
2658 2657 args = tr.hookargs.copy()
2659 2658 args.update(bookmarks.preparehookargs(name, old, new))
2660 2659 repo.hook(
2661 2660 b'txnclose-bookmark',
2662 2661 throw=False,
2663 2662 **pycompat.strkwargs(args)
2664 2663 )
2665 2664
2666 2665 if hook.hashook(repo.ui, b'txnclose-phase'):
2667 2666 cl = repo.unfiltered().changelog
2668 2667 phasemv = sorted(
2669 2668 tr.changes[b'phases'], key=lambda r: r[0][0]
2670 2669 )
2671 2670 for revs, (old, new) in phasemv:
2672 2671 for rev in revs:
2673 2672 args = tr.hookargs.copy()
2674 2673 node = hex(cl.node(rev))
2675 2674 args.update(phases.preparehookargs(node, old, new))
2676 2675 repo.hook(
2677 2676 b'txnclose-phase',
2678 2677 throw=False,
2679 2678 **pycompat.strkwargs(args)
2680 2679 )
2681 2680
2682 2681 repo.hook(
2683 2682 b'txnclose', throw=False, **pycompat.strkwargs(hookargs)
2684 2683 )
2685 2684
2686 2685 repo = reporef()
2687 2686 assert repo is not None # help pytype
2688 2687 repo._afterlock(hookfunc)
2689 2688
2690 2689 tr.addfinalize(b'txnclose-hook', txnclosehook)
2691 2690 # Include a leading "-" to make it happen before the transaction summary
2692 2691 # reports registered via scmutil.registersummarycallback() whose names
2693 2692 # are 00-txnreport etc. That way, the caches will be warm when the
2694 2693 # callbacks run.
2695 2694 tr.addpostclose(b'-warm-cache', self._buildcacheupdater(tr))
2696 2695
2697 2696 def txnaborthook(tr2):
2698 2697 """To be run if transaction is aborted"""
2699 2698 repo = reporef()
2700 2699 assert repo is not None # help pytype
2701 2700 repo.hook(
2702 2701 b'txnabort', throw=False, **pycompat.strkwargs(tr2.hookargs)
2703 2702 )
2704 2703
2705 2704 tr.addabort(b'txnabort-hook', txnaborthook)
2706 2705 # avoid eager cache invalidation. in-memory data should be identical
2707 2706 # to stored data if transaction has no error.
2708 2707 tr.addpostclose(b'refresh-filecachestats', self._refreshfilecachestats)
2709 2708 self._transref = weakref.ref(tr)
2710 2709 scmutil.registersummarycallback(self, tr, desc)
2711 2710 # This only exist to deal with the need of rollback to have viable
2712 2711 # parents at the end of the operation. So backup viable parents at the
2713 2712 # time of this operation.
2714 2713 #
2715 2714 # We only do it when the `wlock` is taken, otherwise other might be
2716 2715 # altering the dirstate under us.
2717 2716 #
2718 2717 # This is really not a great way to do this (first, because we cannot
2719 2718 # always do it). There are more viable alternative that exists
2720 2719 #
2721 2720 # - backing only the working copy parent in a dedicated files and doing
2722 2721 # a clean "keep-update" to them on `hg rollback`.
2723 2722 #
2724 2723 # - slightly changing the behavior an applying a logic similar to "hg
2725 2724 # strip" to pick a working copy destination on `hg rollback`
2726 2725 if self.currentwlock() is not None:
2727 2726 ds = self.dirstate
2728 2727 if not self.vfs.exists(b'branch'):
2729 2728 # force a file to be written if None exist
2730 2729 ds.setbranch(b'default', None)
2731 2730
2732 2731 def backup_dirstate(tr):
2733 2732 for f in ds.all_file_names():
2734 2733 # hardlink backup is okay because `dirstate` is always
2735 2734 # atomically written and possible data file are append only
2736 2735 # and resistant to trailing data.
2737 2736 tr.addbackup(f, hardlink=True, location=b'plain')
2738 2737
2739 2738 tr.addvalidator(b'dirstate-backup', backup_dirstate)
2740 2739 return tr
2741 2740
2742 2741 def _journalfiles(self):
2743 2742 return (
2744 2743 (self.svfs, b'journal'),
2745 2744 (self.vfs, b'journal.desc'),
2746 2745 )
2747 2746
2748 2747 def undofiles(self):
2749 2748 return [(vfs, undoname(x)) for vfs, x in self._journalfiles()]
2750 2749
2751 2750 @unfilteredmethod
2752 2751 def _writejournal(self, desc):
2753 2752 self.vfs.write(b"journal.desc", b"%d\n%s\n" % (len(self), desc))
2754 2753
2755 2754 def recover(self):
2756 2755 with self.lock():
2757 2756 if self.svfs.exists(b"journal"):
2758 2757 self.ui.status(_(b"rolling back interrupted transaction\n"))
2759 2758 vfsmap = self.vfs_map
2760 2759 transaction.rollback(
2761 2760 self.svfs,
2762 2761 vfsmap,
2763 2762 b"journal",
2764 2763 self.ui.warn,
2765 2764 checkambigfiles=_cachedfiles,
2766 2765 )
2767 2766 self.invalidate()
2768 2767 return True
2769 2768 else:
2770 2769 self.ui.warn(_(b"no interrupted transaction available\n"))
2771 2770 return False
2772 2771
2773 2772 def rollback(self, dryrun=False, force=False):
2774 2773 wlock = lock = None
2775 2774 try:
2776 2775 wlock = self.wlock()
2777 2776 lock = self.lock()
2778 2777 if self.svfs.exists(b"undo"):
2779 2778 return self._rollback(dryrun, force)
2780 2779 else:
2781 2780 self.ui.warn(_(b"no rollback information available\n"))
2782 2781 return 1
2783 2782 finally:
2784 2783 release(lock, wlock)
2785 2784
2786 2785 @unfilteredmethod # Until we get smarter cache management
2787 2786 def _rollback(self, dryrun, force):
2788 2787 ui = self.ui
2789 2788
2790 2789 parents = self.dirstate.parents()
2791 2790 try:
2792 2791 args = self.vfs.read(b'undo.desc').splitlines()
2793 2792 (oldlen, desc, detail) = (int(args[0]), args[1], None)
2794 2793 if len(args) >= 3:
2795 2794 detail = args[2]
2796 2795 oldtip = oldlen - 1
2797 2796
2798 2797 if detail and ui.verbose:
2799 2798 msg = _(
2800 2799 b'repository tip rolled back to revision %d'
2801 2800 b' (undo %s: %s)\n'
2802 2801 ) % (oldtip, desc, detail)
2803 2802 else:
2804 2803 msg = _(
2805 2804 b'repository tip rolled back to revision %d (undo %s)\n'
2806 2805 ) % (oldtip, desc)
2807 2806 parentgone = any(self[p].rev() > oldtip for p in parents)
2808 2807 except IOError:
2809 2808 msg = _(b'rolling back unknown transaction\n')
2810 2809 desc = None
2811 2810 parentgone = True
2812 2811
2813 2812 if not force and self[b'.'] != self[b'tip'] and desc == b'commit':
2814 2813 raise error.Abort(
2815 2814 _(
2816 2815 b'rollback of last commit while not checked out '
2817 2816 b'may lose data'
2818 2817 ),
2819 2818 hint=_(b'use -f to force'),
2820 2819 )
2821 2820
2822 2821 ui.status(msg)
2823 2822 if dryrun:
2824 2823 return 0
2825 2824
2826 2825 self.destroying()
2827 2826 vfsmap = self.vfs_map
2828 2827 skip_journal_pattern = None
2829 2828 if not parentgone:
2830 2829 skip_journal_pattern = RE_SKIP_DIRSTATE_ROLLBACK
2831 2830 transaction.rollback(
2832 2831 self.svfs,
2833 2832 vfsmap,
2834 2833 b'undo',
2835 2834 ui.warn,
2836 2835 checkambigfiles=_cachedfiles,
2837 2836 skip_journal_pattern=skip_journal_pattern,
2838 2837 )
2839 2838 self.invalidate()
2840 2839 self.dirstate.invalidate()
2841 2840
2842 2841 if parentgone:
2843 2842 # replace this with some explicit parent update in the future.
2844 2843 has_node = self.changelog.index.has_node
2845 2844 if not all(has_node(p) for p in self.dirstate._pl):
2846 2845 # There was no dirstate to backup initially, we need to drop
2847 2846 # the existing one.
2848 2847 with self.dirstate.changing_parents(self):
2849 2848 self.dirstate.setparents(self.nullid)
2850 2849 self.dirstate.clear()
2851 2850
2852 2851 parents = tuple([p.rev() for p in self[None].parents()])
2853 2852 if len(parents) > 1:
2854 2853 ui.status(
2855 2854 _(
2856 2855 b'working directory now based on '
2857 2856 b'revisions %d and %d\n'
2858 2857 )
2859 2858 % parents
2860 2859 )
2861 2860 else:
2862 2861 ui.status(
2863 2862 _(b'working directory now based on revision %d\n') % parents
2864 2863 )
2865 2864 mergestatemod.mergestate.clean(self)
2866 2865
2867 2866 # TODO: if we know which new heads may result from this rollback, pass
2868 2867 # them to destroy(), which will prevent the branchhead cache from being
2869 2868 # invalidated.
2870 2869 self.destroyed()
2871 2870 return 0
2872 2871
2873 2872 def _buildcacheupdater(self, newtransaction):
2874 2873 """called during transaction to build the callback updating cache
2875 2874
2876 2875 Lives on the repository to help extension who might want to augment
2877 2876 this logic. For this purpose, the created transaction is passed to the
2878 2877 method.
2879 2878 """
2880 2879 # we must avoid cyclic reference between repo and transaction.
2881 2880 reporef = weakref.ref(self)
2882 2881
2883 2882 def updater(tr):
2884 2883 repo = reporef()
2885 2884 assert repo is not None # help pytype
2886 2885 repo.updatecaches(tr)
2887 2886
2888 2887 return updater
2889 2888
2890 2889 @unfilteredmethod
2891 2890 def updatecaches(self, tr=None, full=False, caches=None):
2892 2891 """warm appropriate caches
2893 2892
2894 2893 If this function is called after a transaction closed. The transaction
2895 2894 will be available in the 'tr' argument. This can be used to selectively
2896 2895 update caches relevant to the changes in that transaction.
2897 2896
2898 2897 If 'full' is set, make sure all caches the function knows about have
2899 2898 up-to-date data. Even the ones usually loaded more lazily.
2900 2899
2901 2900 The `full` argument can take a special "post-clone" value. In this case
2902 2901 the cache warming is made after a clone and of the slower cache might
2903 2902 be skipped, namely the `.fnodetags` one. This argument is 5.8 specific
2904 2903 as we plan for a cleaner way to deal with this for 5.9.
2905 2904 """
2906 2905 if tr is not None and tr.hookargs.get(b'source') == b'strip':
2907 2906 # During strip, many caches are invalid but
2908 2907 # later call to `destroyed` will refresh them.
2909 2908 return
2910 2909
2911 2910 unfi = self.unfiltered()
2912 2911
2913 2912 if full:
2914 2913 msg = (
2915 2914 "`full` argument for `repo.updatecaches` is deprecated\n"
2916 2915 "(use `caches=repository.CACHE_ALL` instead)"
2917 2916 )
2918 2917 self.ui.deprecwarn(msg, b"5.9")
2919 2918 caches = repository.CACHES_ALL
2920 2919 if full == b"post-clone":
2921 2920 caches = repository.CACHES_POST_CLONE
2922 2921 caches = repository.CACHES_ALL
2923 2922 elif caches is None:
2924 2923 caches = repository.CACHES_DEFAULT
2925 2924
2926 2925 if repository.CACHE_BRANCHMAP_SERVED in caches:
2927 2926 if tr is None or tr.changes[b'origrepolen'] < len(self):
2928 2927 # accessing the 'served' branchmap should refresh all the others,
2929 2928 self.ui.debug(b'updating the branch cache\n')
2930 2929 self.filtered(b'served').branchmap()
2931 2930 self.filtered(b'served.hidden').branchmap()
2932 2931 # flush all possibly delayed write.
2933 2932 self._branchcaches.write_delayed(self)
2934 2933
2935 2934 if repository.CACHE_CHANGELOG_CACHE in caches:
2936 2935 self.changelog.update_caches(transaction=tr)
2937 2936
2938 2937 if repository.CACHE_MANIFESTLOG_CACHE in caches:
2939 2938 self.manifestlog.update_caches(transaction=tr)
2940 2939 for entry in self.store.walk():
2941 2940 if not entry.is_revlog:
2942 2941 continue
2943 2942 if not entry.is_manifestlog:
2944 2943 continue
2945 2944 manifestrevlog = entry.get_revlog_instance(self).get_revlog()
2946 2945 if manifestrevlog is not None:
2947 2946 manifestrevlog.update_caches(transaction=tr)
2948 2947
2949 2948 if repository.CACHE_REV_BRANCH in caches:
2950 2949 rbc = unfi.revbranchcache()
2951 2950 for r in unfi.changelog:
2952 2951 rbc.branchinfo(r)
2953 2952 rbc.write()
2954 2953
2955 2954 if repository.CACHE_FULL_MANIFEST in caches:
2956 2955 # ensure the working copy parents are in the manifestfulltextcache
2957 2956 for ctx in self[b'.'].parents():
2958 2957 ctx.manifest() # accessing the manifest is enough
2959 2958
2960 2959 if repository.CACHE_FILE_NODE_TAGS in caches:
2961 2960 # accessing fnode cache warms the cache
2962 2961 tagsmod.fnoderevs(self.ui, unfi, unfi.changelog.revs())
2963 2962
2964 2963 if repository.CACHE_TAGS_DEFAULT in caches:
2965 2964 # accessing tags warm the cache
2966 2965 self.tags()
2967 2966 if repository.CACHE_TAGS_SERVED in caches:
2968 2967 self.filtered(b'served').tags()
2969 2968
2970 2969 if repository.CACHE_BRANCHMAP_ALL in caches:
2971 2970 # The CACHE_BRANCHMAP_ALL updates lazily-loaded caches immediately,
2972 2971 # so we're forcing a write to cause these caches to be warmed up
2973 2972 # even if they haven't explicitly been requested yet (if they've
2974 2973 # never been used by hg, they won't ever have been written, even if
2975 2974 # they're a subset of another kind of cache that *has* been used).
2976 2975 for filt in repoview.filtertable.keys():
2977 2976 filtered = self.filtered(filt)
2978 2977 filtered.branchmap().write(filtered)
2979 2978
2980 2979 def invalidatecaches(self):
2981 2980 if '_tagscache' in vars(self):
2982 2981 # can't use delattr on proxy
2983 2982 del self.__dict__['_tagscache']
2984 2983
2985 2984 self._branchcaches.clear()
2986 2985 self.invalidatevolatilesets()
2987 2986 self._sparsesignaturecache.clear()
2988 2987
2989 2988 def invalidatevolatilesets(self):
2990 2989 self.filteredrevcache.clear()
2991 2990 obsolete.clearobscaches(self)
2992 2991 self._quick_access_changeid_invalidate()
2993 2992
2994 2993 def invalidatedirstate(self):
2995 2994 """Invalidates the dirstate, causing the next call to dirstate
2996 2995 to check if it was modified since the last time it was read,
2997 2996 rereading it if it has.
2998 2997
2999 2998 This is different to dirstate.invalidate() that it doesn't always
3000 2999 rereads the dirstate. Use dirstate.invalidate() if you want to
3001 3000 explicitly read the dirstate again (i.e. restoring it to a previous
3002 3001 known good state)."""
3003 3002 unfi = self.unfiltered()
3004 3003 if 'dirstate' in unfi.__dict__:
3005 3004 assert not self.dirstate.is_changing_any
3006 3005 del unfi.__dict__['dirstate']
3007 3006
3008 3007 def invalidate(self, clearfilecache=False):
3009 3008 """Invalidates both store and non-store parts other than dirstate
3010 3009
3011 3010 If a transaction is running, invalidation of store is omitted,
3012 3011 because discarding in-memory changes might cause inconsistency
3013 3012 (e.g. incomplete fncache causes unintentional failure, but
3014 3013 redundant one doesn't).
3015 3014 """
3016 3015 unfiltered = self.unfiltered() # all file caches are stored unfiltered
3017 3016 for k in list(self._filecache.keys()):
3018 3017 if (
3019 3018 k == b'changelog'
3020 3019 and self.currenttransaction()
3021 3020 and self.changelog._delayed
3022 3021 ):
3023 3022 # The changelog object may store unwritten revisions. We don't
3024 3023 # want to lose them.
3025 3024 # TODO: Solve the problem instead of working around it.
3026 3025 continue
3027 3026
3028 3027 if clearfilecache:
3029 3028 del self._filecache[k]
3030 3029 try:
3031 3030 # XXX ideally, the key would be a unicode string to match the
3032 3031 # fact it refers to an attribut name. However changing this was
3033 3032 # a bit a scope creep compared to the series cleaning up
3034 3033 # del/set/getattr so we kept thing simple here.
3035 3034 delattr(unfiltered, pycompat.sysstr(k))
3036 3035 except AttributeError:
3037 3036 pass
3038 3037 self.invalidatecaches()
3039 3038 if not self.currenttransaction():
3040 3039 # TODO: Changing contents of store outside transaction
3041 3040 # causes inconsistency. We should make in-memory store
3042 3041 # changes detectable, and abort if changed.
3043 3042 self.store.invalidatecaches()
3044 3043
3045 3044 def invalidateall(self):
3046 3045 """Fully invalidates both store and non-store parts, causing the
3047 3046 subsequent operation to reread any outside changes."""
3048 3047 # extension should hook this to invalidate its caches
3049 3048 self.invalidate()
3050 3049 self.invalidatedirstate()
3051 3050
3052 3051 @unfilteredmethod
3053 3052 def _refreshfilecachestats(self, tr):
3054 3053 """Reload stats of cached files so that they are flagged as valid"""
3055 3054 for k, ce in self._filecache.items():
3056 3055 k = pycompat.sysstr(k)
3057 3056 if k == 'dirstate' or k not in self.__dict__:
3058 3057 continue
3059 3058 ce.refresh()
3060 3059
3061 3060 def _lock(
3062 3061 self,
3063 3062 vfs,
3064 3063 lockname,
3065 3064 wait,
3066 3065 releasefn,
3067 3066 acquirefn,
3068 3067 desc,
3069 3068 ):
3070 3069 timeout = 0
3071 3070 warntimeout = 0
3072 3071 if wait:
3073 3072 timeout = self.ui.configint(b"ui", b"timeout")
3074 3073 warntimeout = self.ui.configint(b"ui", b"timeout.warn")
3075 3074 # internal config: ui.signal-safe-lock
3076 3075 signalsafe = self.ui.configbool(b'ui', b'signal-safe-lock')
3077 3076
3078 3077 l = lockmod.trylock(
3079 3078 self.ui,
3080 3079 vfs,
3081 3080 lockname,
3082 3081 timeout,
3083 3082 warntimeout,
3084 3083 releasefn=releasefn,
3085 3084 acquirefn=acquirefn,
3086 3085 desc=desc,
3087 3086 signalsafe=signalsafe,
3088 3087 )
3089 3088 return l
3090 3089
3091 3090 def _afterlock(self, callback):
3092 3091 """add a callback to be run when the repository is fully unlocked
3093 3092
3094 3093 The callback will be executed when the outermost lock is released
3095 3094 (with wlock being higher level than 'lock')."""
3096 3095 for ref in (self._wlockref, self._lockref):
3097 3096 l = ref and ref()
3098 3097 if l and l.held:
3099 3098 l.postrelease.append(callback)
3100 3099 break
3101 3100 else: # no lock have been found.
3102 3101 callback(True)
3103 3102
3104 3103 def lock(self, wait=True):
3105 3104 """Lock the repository store (.hg/store) and return a weak reference
3106 3105 to the lock. Use this before modifying the store (e.g. committing or
3107 3106 stripping). If you are opening a transaction, get a lock as well.)
3108 3107
3109 3108 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3110 3109 'wlock' first to avoid a dead-lock hazard."""
3111 3110 l = self._currentlock(self._lockref)
3112 3111 if l is not None:
3113 3112 l.lock()
3114 3113 return l
3115 3114
3116 3115 l = self._lock(
3117 3116 vfs=self.svfs,
3118 3117 lockname=b"lock",
3119 3118 wait=wait,
3120 3119 releasefn=None,
3121 3120 acquirefn=self.invalidate,
3122 3121 desc=_(b'repository %s') % self.origroot,
3123 3122 )
3124 3123 self._lockref = weakref.ref(l)
3125 3124 return l
3126 3125
3127 3126 def wlock(self, wait=True):
3128 3127 """Lock the non-store parts of the repository (everything under
3129 3128 .hg except .hg/store) and return a weak reference to the lock.
3130 3129
3131 3130 Use this before modifying files in .hg.
3132 3131
3133 3132 If both 'lock' and 'wlock' must be acquired, ensure you always acquires
3134 3133 'wlock' first to avoid a dead-lock hazard."""
3135 3134 l = self._wlockref() if self._wlockref else None
3136 3135 if l is not None and l.held:
3137 3136 l.lock()
3138 3137 return l
3139 3138
3140 3139 # We do not need to check for non-waiting lock acquisition. Such
3141 3140 # acquisition would not cause dead-lock as they would just fail.
3142 3141 if wait and (
3143 3142 self.ui.configbool(b'devel', b'all-warnings')
3144 3143 or self.ui.configbool(b'devel', b'check-locks')
3145 3144 ):
3146 3145 if self._currentlock(self._lockref) is not None:
3147 3146 self.ui.develwarn(b'"wlock" acquired after "lock"')
3148 3147
3149 3148 def unlock():
3150 3149 if self.dirstate.is_changing_any:
3151 3150 msg = b"wlock release in the middle of a changing parents"
3152 3151 self.ui.develwarn(msg)
3153 3152 self.dirstate.invalidate()
3154 3153 else:
3155 3154 if self.dirstate._dirty:
3156 3155 msg = b"dirty dirstate on wlock release"
3157 3156 self.ui.develwarn(msg)
3158 3157 self.dirstate.write(None)
3159 3158
3160 3159 unfi = self.unfiltered()
3161 3160 if 'dirstate' in unfi.__dict__:
3162 3161 del unfi.__dict__['dirstate']
3163 3162
3164 3163 l = self._lock(
3165 3164 self.vfs,
3166 3165 b"wlock",
3167 3166 wait,
3168 3167 unlock,
3169 3168 self.invalidatedirstate,
3170 3169 _(b'working directory of %s') % self.origroot,
3171 3170 )
3172 3171 self._wlockref = weakref.ref(l)
3173 3172 return l
3174 3173
3175 3174 def _currentlock(self, lockref):
3176 3175 """Returns the lock if it's held, or None if it's not."""
3177 3176 if lockref is None:
3178 3177 return None
3179 3178 l = lockref()
3180 3179 if l is None or not l.held:
3181 3180 return None
3182 3181 return l
3183 3182
3184 3183 def currentwlock(self):
3185 3184 """Returns the wlock if it's held, or None if it's not."""
3186 3185 return self._currentlock(self._wlockref)
3187 3186
3188 3187 def currentlock(self):
3189 3188 """Returns the lock if it's held, or None if it's not."""
3190 3189 return self._currentlock(self._lockref)
3191 3190
3192 3191 def checkcommitpatterns(self, wctx, match, status, fail):
3193 3192 """check for commit arguments that aren't committable"""
3194 3193 if match.isexact() or match.prefix():
3195 3194 matched = set(status.modified + status.added + status.removed)
3196 3195
3197 3196 for f in match.files():
3198 3197 f = self.dirstate.normalize(f)
3199 3198 if f == b'.' or f in matched or f in wctx.substate:
3200 3199 continue
3201 3200 if f in status.deleted:
3202 3201 fail(f, _(b'file not found!'))
3203 3202 # Is it a directory that exists or used to exist?
3204 3203 if self.wvfs.isdir(f) or wctx.p1().hasdir(f):
3205 3204 d = f + b'/'
3206 3205 for mf in matched:
3207 3206 if mf.startswith(d):
3208 3207 break
3209 3208 else:
3210 3209 fail(f, _(b"no match under directory!"))
3211 3210 elif f not in self.dirstate:
3212 3211 fail(f, _(b"file not tracked!"))
3213 3212
3214 3213 @unfilteredmethod
3215 3214 def commit(
3216 3215 self,
3217 3216 text=b"",
3218 3217 user=None,
3219 3218 date=None,
3220 3219 match=None,
3221 3220 force=False,
3222 3221 editor=None,
3223 3222 extra=None,
3224 3223 ):
3225 3224 """Add a new revision to current repository.
3226 3225
3227 3226 Revision information is gathered from the working directory,
3228 3227 match can be used to filter the committed files. If editor is
3229 3228 supplied, it is called to get a commit message.
3230 3229 """
3231 3230 if extra is None:
3232 3231 extra = {}
3233 3232
3234 3233 def fail(f, msg):
3235 3234 raise error.InputError(b'%s: %s' % (f, msg))
3236 3235
3237 3236 if not match:
3238 3237 match = matchmod.always()
3239 3238
3240 3239 if not force:
3241 3240 match.bad = fail
3242 3241
3243 3242 # lock() for recent changelog (see issue4368)
3244 3243 with self.wlock(), self.lock():
3245 3244 wctx = self[None]
3246 3245 merge = len(wctx.parents()) > 1
3247 3246
3248 3247 if not force and merge and not match.always():
3249 3248 raise error.Abort(
3250 3249 _(
3251 3250 b'cannot partially commit a merge '
3252 3251 b'(do not specify files or patterns)'
3253 3252 )
3254 3253 )
3255 3254
3256 3255 status = self.status(match=match, clean=force)
3257 3256 if force:
3258 3257 status.modified.extend(
3259 3258 status.clean
3260 3259 ) # mq may commit clean files
3261 3260
3262 3261 # check subrepos
3263 3262 subs, commitsubs, newstate = subrepoutil.precommit(
3264 3263 self.ui, wctx, status, match, force=force
3265 3264 )
3266 3265
3267 3266 # make sure all explicit patterns are matched
3268 3267 if not force:
3269 3268 self.checkcommitpatterns(wctx, match, status, fail)
3270 3269
3271 3270 cctx = context.workingcommitctx(
3272 3271 self, status, text, user, date, extra
3273 3272 )
3274 3273
3275 3274 ms = mergestatemod.mergestate.read(self)
3276 3275 mergeutil.checkunresolved(ms)
3277 3276
3278 3277 # internal config: ui.allowemptycommit
3279 3278 if cctx.isempty() and not self.ui.configbool(
3280 3279 b'ui', b'allowemptycommit'
3281 3280 ):
3282 3281 self.ui.debug(b'nothing to commit, clearing merge state\n')
3283 3282 ms.reset()
3284 3283 return None
3285 3284
3286 3285 if merge and cctx.deleted():
3287 3286 raise error.Abort(_(b"cannot commit merge with missing files"))
3288 3287
3289 3288 if editor:
3290 3289 cctx._text = editor(self, cctx, subs)
3291 3290 edited = text != cctx._text
3292 3291
3293 3292 # Save commit message in case this transaction gets rolled back
3294 3293 # (e.g. by a pretxncommit hook). Leave the content alone on
3295 3294 # the assumption that the user will use the same editor again.
3296 3295 msg_path = self.savecommitmessage(cctx._text)
3297 3296
3298 3297 # commit subs and write new state
3299 3298 if subs:
3300 3299 uipathfn = scmutil.getuipathfn(self)
3301 3300 for s in sorted(commitsubs):
3302 3301 sub = wctx.sub(s)
3303 3302 self.ui.status(
3304 3303 _(b'committing subrepository %s\n')
3305 3304 % uipathfn(subrepoutil.subrelpath(sub))
3306 3305 )
3307 3306 sr = sub.commit(cctx._text, user, date)
3308 3307 newstate[s] = (newstate[s][0], sr)
3309 3308 subrepoutil.writestate(self, newstate)
3310 3309
3311 3310 p1, p2 = self.dirstate.parents()
3312 3311 hookp1, hookp2 = hex(p1), (p2 != self.nullid and hex(p2) or b'')
3313 3312 try:
3314 3313 self.hook(
3315 3314 b"precommit", throw=True, parent1=hookp1, parent2=hookp2
3316 3315 )
3317 3316 with self.transaction(b'commit'):
3318 3317 ret = self.commitctx(cctx, True)
3319 3318 # update bookmarks, dirstate and mergestate
3320 3319 bookmarks.update(self, [p1, p2], ret)
3321 3320 cctx.markcommitted(ret)
3322 3321 ms.reset()
3323 3322 except: # re-raises
3324 3323 if edited:
3325 3324 self.ui.write(
3326 3325 _(b'note: commit message saved in %s\n') % msg_path
3327 3326 )
3328 3327 self.ui.write(
3329 3328 _(
3330 3329 b"note: use 'hg commit --logfile "
3331 3330 b"%s --edit' to reuse it\n"
3332 3331 )
3333 3332 % msg_path
3334 3333 )
3335 3334 raise
3336 3335
3337 3336 def commithook(unused_success):
3338 3337 # hack for command that use a temporary commit (eg: histedit)
3339 3338 # temporary commit got stripped before hook release
3340 3339 if self.changelog.hasnode(ret):
3341 3340 self.hook(
3342 3341 b"commit", node=hex(ret), parent1=hookp1, parent2=hookp2
3343 3342 )
3344 3343
3345 3344 self._afterlock(commithook)
3346 3345 return ret
3347 3346
3348 3347 @unfilteredmethod
3349 3348 def commitctx(self, ctx, error=False, origctx=None):
3350 3349 return commit.commitctx(self, ctx, error=error, origctx=origctx)
3351 3350
3352 3351 @unfilteredmethod
3353 3352 def destroying(self):
3354 3353 """Inform the repository that nodes are about to be destroyed.
3355 3354 Intended for use by strip and rollback, so there's a common
3356 3355 place for anything that has to be done before destroying history.
3357 3356
3358 3357 This is mostly useful for saving state that is in memory and waiting
3359 3358 to be flushed when the current lock is released. Because a call to
3360 3359 destroyed is imminent, the repo will be invalidated causing those
3361 3360 changes to stay in memory (waiting for the next unlock), or vanish
3362 3361 completely.
3363 3362 """
3364 3363 # When using the same lock to commit and strip, the phasecache is left
3365 3364 # dirty after committing. Then when we strip, the repo is invalidated,
3366 3365 # causing those changes to disappear.
3367 3366 if '_phasecache' in vars(self):
3368 3367 self._phasecache.write()
3369 3368
3370 3369 @unfilteredmethod
3371 3370 def destroyed(self):
3372 3371 """Inform the repository that nodes have been destroyed.
3373 3372 Intended for use by strip and rollback, so there's a common
3374 3373 place for anything that has to be done after destroying history.
3375 3374 """
3376 3375 # When one tries to:
3377 3376 # 1) destroy nodes thus calling this method (e.g. strip)
3378 3377 # 2) use phasecache somewhere (e.g. commit)
3379 3378 #
3380 3379 # then 2) will fail because the phasecache contains nodes that were
3381 3380 # removed. We can either remove phasecache from the filecache,
3382 3381 # causing it to reload next time it is accessed, or simply filter
3383 3382 # the removed nodes now and write the updated cache.
3384 3383 self._phasecache.filterunknown(self)
3385 3384 self._phasecache.write()
3386 3385
3387 3386 # refresh all repository caches
3388 3387 self.updatecaches()
3389 3388
3390 3389 # Ensure the persistent tag cache is updated. Doing it now
3391 3390 # means that the tag cache only has to worry about destroyed
3392 3391 # heads immediately after a strip/rollback. That in turn
3393 3392 # guarantees that "cachetip == currenttip" (comparing both rev
3394 3393 # and node) always means no nodes have been added or destroyed.
3395 3394
3396 3395 # XXX this is suboptimal when qrefresh'ing: we strip the current
3397 3396 # head, refresh the tag cache, then immediately add a new head.
3398 3397 # But I think doing it this way is necessary for the "instant
3399 3398 # tag cache retrieval" case to work.
3400 3399 self.invalidate()
3401 3400
3402 3401 def status(
3403 3402 self,
3404 3403 node1=b'.',
3405 3404 node2=None,
3406 3405 match=None,
3407 3406 ignored=False,
3408 3407 clean=False,
3409 3408 unknown=False,
3410 3409 listsubrepos=False,
3411 3410 ):
3412 3411 '''a convenience method that calls node1.status(node2)'''
3413 3412 return self[node1].status(
3414 3413 node2, match, ignored, clean, unknown, listsubrepos
3415 3414 )
3416 3415
3417 3416 def addpostdsstatus(self, ps):
3418 3417 """Add a callback to run within the wlock, at the point at which status
3419 3418 fixups happen.
3420 3419
3421 3420 On status completion, callback(wctx, status) will be called with the
3422 3421 wlock held, unless the dirstate has changed from underneath or the wlock
3423 3422 couldn't be grabbed.
3424 3423
3425 3424 Callbacks should not capture and use a cached copy of the dirstate --
3426 3425 it might change in the meanwhile. Instead, they should access the
3427 3426 dirstate via wctx.repo().dirstate.
3428 3427
3429 3428 This list is emptied out after each status run -- extensions should
3430 3429 make sure it adds to this list each time dirstate.status is called.
3431 3430 Extensions should also make sure they don't call this for statuses
3432 3431 that don't involve the dirstate.
3433 3432 """
3434 3433
3435 3434 # The list is located here for uniqueness reasons -- it is actually
3436 3435 # managed by the workingctx, but that isn't unique per-repo.
3437 3436 self._postdsstatus.append(ps)
3438 3437
3439 3438 def postdsstatus(self):
3440 3439 """Used by workingctx to get the list of post-dirstate-status hooks."""
3441 3440 return self._postdsstatus
3442 3441
3443 3442 def clearpostdsstatus(self):
3444 3443 """Used by workingctx to clear post-dirstate-status hooks."""
3445 3444 del self._postdsstatus[:]
3446 3445
3447 3446 def heads(self, start=None):
3448 3447 if start is None:
3449 3448 cl = self.changelog
3450 3449 headrevs = reversed(cl.headrevs())
3451 3450 return [cl.node(rev) for rev in headrevs]
3452 3451
3453 3452 heads = self.changelog.heads(start)
3454 3453 # sort the output in rev descending order
3455 3454 return sorted(heads, key=self.changelog.rev, reverse=True)
3456 3455
3457 3456 def branchheads(self, branch=None, start=None, closed=False):
3458 3457 """return a (possibly filtered) list of heads for the given branch
3459 3458
3460 3459 Heads are returned in topological order, from newest to oldest.
3461 3460 If branch is None, use the dirstate branch.
3462 3461 If start is not None, return only heads reachable from start.
3463 3462 If closed is True, return heads that are marked as closed as well.
3464 3463 """
3465 3464 if branch is None:
3466 3465 branch = self[None].branch()
3467 3466 branches = self.branchmap()
3468 3467 if not branches.hasbranch(branch):
3469 3468 return []
3470 3469 # the cache returns heads ordered lowest to highest
3471 3470 bheads = list(reversed(branches.branchheads(branch, closed=closed)))
3472 3471 if start is not None:
3473 3472 # filter out the heads that cannot be reached from startrev
3474 3473 fbheads = set(self.changelog.nodesbetween([start], bheads)[2])
3475 3474 bheads = [h for h in bheads if h in fbheads]
3476 3475 return bheads
3477 3476
3478 3477 def branches(self, nodes):
3479 3478 if not nodes:
3480 3479 nodes = [self.changelog.tip()]
3481 3480 b = []
3482 3481 for n in nodes:
3483 3482 t = n
3484 3483 while True:
3485 3484 p = self.changelog.parents(n)
3486 3485 if p[1] != self.nullid or p[0] == self.nullid:
3487 3486 b.append((t, n, p[0], p[1]))
3488 3487 break
3489 3488 n = p[0]
3490 3489 return b
3491 3490
3492 3491 def between(self, pairs):
3493 3492 r = []
3494 3493
3495 3494 for top, bottom in pairs:
3496 3495 n, l, i = top, [], 0
3497 3496 f = 1
3498 3497
3499 3498 while n != bottom and n != self.nullid:
3500 3499 p = self.changelog.parents(n)[0]
3501 3500 if i == f:
3502 3501 l.append(n)
3503 3502 f = f * 2
3504 3503 n = p
3505 3504 i += 1
3506 3505
3507 3506 r.append(l)
3508 3507
3509 3508 return r
3510 3509
3511 3510 def checkpush(self, pushop):
3512 3511 """Extensions can override this function if additional checks have
3513 3512 to be performed before pushing, or call it if they override push
3514 3513 command.
3515 3514 """
3516 3515
3517 3516 @unfilteredpropertycache
3518 3517 def prepushoutgoinghooks(self):
3519 3518 """Return util.hooks consists of a pushop with repo, remote, outgoing
3520 3519 methods, which are called before pushing changesets.
3521 3520 """
3522 3521 return util.hooks()
3523 3522
3524 3523 def pushkey(self, namespace, key, old, new):
3525 3524 try:
3526 3525 tr = self.currenttransaction()
3527 3526 hookargs = {}
3528 3527 if tr is not None:
3529 3528 hookargs.update(tr.hookargs)
3530 3529 hookargs = pycompat.strkwargs(hookargs)
3531 3530 hookargs['namespace'] = namespace
3532 3531 hookargs['key'] = key
3533 3532 hookargs['old'] = old
3534 3533 hookargs['new'] = new
3535 3534 self.hook(b'prepushkey', throw=True, **hookargs)
3536 3535 except error.HookAbort as exc:
3537 3536 self.ui.write_err(_(b"pushkey-abort: %s\n") % exc)
3538 3537 if exc.hint:
3539 3538 self.ui.write_err(_(b"(%s)\n") % exc.hint)
3540 3539 return False
3541 3540 self.ui.debug(b'pushing key for "%s:%s"\n' % (namespace, key))
3542 3541 ret = pushkey.push(self, namespace, key, old, new)
3543 3542
3544 3543 def runhook(unused_success):
3545 3544 self.hook(
3546 3545 b'pushkey',
3547 3546 namespace=namespace,
3548 3547 key=key,
3549 3548 old=old,
3550 3549 new=new,
3551 3550 ret=ret,
3552 3551 )
3553 3552
3554 3553 self._afterlock(runhook)
3555 3554 return ret
3556 3555
3557 3556 def listkeys(self, namespace):
3558 3557 self.hook(b'prelistkeys', throw=True, namespace=namespace)
3559 3558 self.ui.debug(b'listing keys for "%s"\n' % namespace)
3560 3559 values = pushkey.list(self, namespace)
3561 3560 self.hook(b'listkeys', namespace=namespace, values=values)
3562 3561 return values
3563 3562
3564 3563 def debugwireargs(self, one, two, three=None, four=None, five=None):
3565 3564 '''used to test argument passing over the wire'''
3566 3565 return b"%s %s %s %s %s" % (
3567 3566 one,
3568 3567 two,
3569 3568 pycompat.bytestr(three),
3570 3569 pycompat.bytestr(four),
3571 3570 pycompat.bytestr(five),
3572 3571 )
3573 3572
3574 3573 def savecommitmessage(self, text):
3575 3574 fp = self.vfs(b'last-message.txt', b'wb')
3576 3575 try:
3577 3576 fp.write(text)
3578 3577 finally:
3579 3578 fp.close()
3580 3579 return self.pathto(fp.name[len(self.root) + 1 :])
3581 3580
3582 3581 def register_wanted_sidedata(self, category):
3583 3582 if repository.REPO_FEATURE_SIDE_DATA not in self.features:
3584 3583 # Only revlogv2 repos can want sidedata.
3585 3584 return
3586 3585 self._wanted_sidedata.add(pycompat.bytestr(category))
3587 3586
3588 3587 def register_sidedata_computer(
3589 3588 self, kind, category, keys, computer, flags, replace=False
3590 3589 ):
3591 3590 if kind not in revlogconst.ALL_KINDS:
3592 3591 msg = _(b"unexpected revlog kind '%s'.")
3593 3592 raise error.ProgrammingError(msg % kind)
3594 3593 category = pycompat.bytestr(category)
3595 3594 already_registered = category in self._sidedata_computers.get(kind, [])
3596 3595 if already_registered and not replace:
3597 3596 msg = _(
3598 3597 b"cannot register a sidedata computer twice for category '%s'."
3599 3598 )
3600 3599 raise error.ProgrammingError(msg % category)
3601 3600 if replace and not already_registered:
3602 3601 msg = _(
3603 3602 b"cannot replace a sidedata computer that isn't registered "
3604 3603 b"for category '%s'."
3605 3604 )
3606 3605 raise error.ProgrammingError(msg % category)
3607 3606 self._sidedata_computers.setdefault(kind, {})
3608 3607 self._sidedata_computers[kind][category] = (keys, computer, flags)
3609 3608
3610 3609
3611 3610 def undoname(fn: bytes) -> bytes:
3612 3611 base, name = os.path.split(fn)
3613 3612 assert name.startswith(b'journal')
3614 3613 return os.path.join(base, name.replace(b'journal', b'undo', 1))
3615 3614
3616 3615
3617 3616 def instance(ui, path: bytes, create, intents=None, createopts=None):
3618 3617 # prevent cyclic import localrepo -> upgrade -> localrepo
3619 3618 from . import upgrade
3620 3619
3621 3620 localpath = urlutil.urllocalpath(path)
3622 3621 if create:
3623 3622 createrepository(ui, localpath, createopts=createopts)
3624 3623
3625 3624 def repo_maker():
3626 3625 return makelocalrepository(ui, localpath, intents=intents)
3627 3626
3628 3627 repo = repo_maker()
3629 3628 repo = upgrade.may_auto_upgrade(repo, repo_maker)
3630 3629 return repo
3631 3630
3632 3631
3633 3632 def islocal(path: bytes) -> bool:
3634 3633 return True
3635 3634
3636 3635
3637 3636 def defaultcreateopts(ui, createopts=None):
3638 3637 """Populate the default creation options for a repository.
3639 3638
3640 3639 A dictionary of explicitly requested creation options can be passed
3641 3640 in. Missing keys will be populated.
3642 3641 """
3643 3642 createopts = dict(createopts or {})
3644 3643
3645 3644 if b'backend' not in createopts:
3646 3645 # experimental config: storage.new-repo-backend
3647 3646 createopts[b'backend'] = ui.config(b'storage', b'new-repo-backend')
3648 3647
3649 3648 return createopts
3650 3649
3651 3650
3652 3651 def clone_requirements(ui, createopts, srcrepo):
3653 3652 """clone the requirements of a local repo for a local clone
3654 3653
3655 3654 The store requirements are unchanged while the working copy requirements
3656 3655 depends on the configuration
3657 3656 """
3658 3657 target_requirements = set()
3659 3658 if not srcrepo.requirements:
3660 3659 # this is a legacy revlog "v0" repository, we cannot do anything fancy
3661 3660 # with it.
3662 3661 return target_requirements
3663 3662 createopts = defaultcreateopts(ui, createopts=createopts)
3664 3663 for r in newreporequirements(ui, createopts):
3665 3664 if r in requirementsmod.WORKING_DIR_REQUIREMENTS:
3666 3665 target_requirements.add(r)
3667 3666
3668 3667 for r in srcrepo.requirements:
3669 3668 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS:
3670 3669 target_requirements.add(r)
3671 3670 return target_requirements
3672 3671
3673 3672
3674 3673 def newreporequirements(ui, createopts):
3675 3674 """Determine the set of requirements for a new local repository.
3676 3675
3677 3676 Extensions can wrap this function to specify custom requirements for
3678 3677 new repositories.
3679 3678 """
3680 3679
3681 3680 if b'backend' not in createopts:
3682 3681 raise error.ProgrammingError(
3683 3682 b'backend key not present in createopts; '
3684 3683 b'was defaultcreateopts() called?'
3685 3684 )
3686 3685
3687 3686 if createopts[b'backend'] != b'revlogv1':
3688 3687 raise error.Abort(
3689 3688 _(
3690 3689 b'unable to determine repository requirements for '
3691 3690 b'storage backend: %s'
3692 3691 )
3693 3692 % createopts[b'backend']
3694 3693 )
3695 3694
3696 3695 requirements = {requirementsmod.REVLOGV1_REQUIREMENT}
3697 3696 if ui.configbool(b'format', b'usestore'):
3698 3697 requirements.add(requirementsmod.STORE_REQUIREMENT)
3699 3698 if ui.configbool(b'format', b'usefncache'):
3700 3699 requirements.add(requirementsmod.FNCACHE_REQUIREMENT)
3701 3700 if ui.configbool(b'format', b'dotencode'):
3702 3701 requirements.add(requirementsmod.DOTENCODE_REQUIREMENT)
3703 3702
3704 3703 compengines = ui.configlist(b'format', b'revlog-compression')
3705 3704 for compengine in compengines:
3706 3705 if compengine in util.compengines:
3707 3706 engine = util.compengines[compengine]
3708 3707 if engine.available() and engine.revlogheader():
3709 3708 break
3710 3709 else:
3711 3710 raise error.Abort(
3712 3711 _(
3713 3712 b'compression engines %s defined by '
3714 3713 b'format.revlog-compression not available'
3715 3714 )
3716 3715 % b', '.join(b'"%s"' % e for e in compengines),
3717 3716 hint=_(
3718 3717 b'run "hg debuginstall" to list available '
3719 3718 b'compression engines'
3720 3719 ),
3721 3720 )
3722 3721
3723 3722 # zlib is the historical default and doesn't need an explicit requirement.
3724 3723 if compengine == b'zstd':
3725 3724 requirements.add(b'revlog-compression-zstd')
3726 3725 elif compengine != b'zlib':
3727 3726 requirements.add(b'exp-compression-%s' % compengine)
3728 3727
3729 3728 if scmutil.gdinitconfig(ui):
3730 3729 requirements.add(requirementsmod.GENERALDELTA_REQUIREMENT)
3731 3730 if ui.configbool(b'format', b'sparse-revlog'):
3732 3731 requirements.add(requirementsmod.SPARSEREVLOG_REQUIREMENT)
3733 3732
3734 3733 # experimental config: format.use-dirstate-v2
3735 3734 # Keep this logic in sync with `has_dirstate_v2()` in `tests/hghave.py`
3736 3735 if ui.configbool(b'format', b'use-dirstate-v2'):
3737 3736 requirements.add(requirementsmod.DIRSTATE_V2_REQUIREMENT)
3738 3737
3739 3738 # experimental config: format.exp-use-copies-side-data-changeset
3740 3739 if ui.configbool(b'format', b'exp-use-copies-side-data-changeset'):
3741 3740 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3742 3741 requirements.add(requirementsmod.COPIESSDC_REQUIREMENT)
3743 3742 if ui.configbool(b'experimental', b'treemanifest'):
3744 3743 requirements.add(requirementsmod.TREEMANIFEST_REQUIREMENT)
3745 3744
3746 3745 changelogv2 = ui.config(b'format', b'exp-use-changelog-v2')
3747 3746 if changelogv2 == b'enable-unstable-format-and-corrupt-my-data':
3748 3747 requirements.add(requirementsmod.CHANGELOGV2_REQUIREMENT)
3749 3748
3750 3749 revlogv2 = ui.config(b'experimental', b'revlogv2')
3751 3750 if revlogv2 == b'enable-unstable-format-and-corrupt-my-data':
3752 3751 requirements.discard(requirementsmod.REVLOGV1_REQUIREMENT)
3753 3752 requirements.add(requirementsmod.REVLOGV2_REQUIREMENT)
3754 3753 # experimental config: format.internal-phase
3755 3754 if ui.configbool(b'format', b'use-internal-phase'):
3756 3755 requirements.add(requirementsmod.INTERNAL_PHASE_REQUIREMENT)
3757 3756
3758 3757 # experimental config: format.exp-archived-phase
3759 3758 if ui.configbool(b'format', b'exp-archived-phase'):
3760 3759 requirements.add(requirementsmod.ARCHIVED_PHASE_REQUIREMENT)
3761 3760
3762 3761 if createopts.get(b'narrowfiles'):
3763 3762 requirements.add(requirementsmod.NARROW_REQUIREMENT)
3764 3763
3765 3764 if createopts.get(b'lfs'):
3766 3765 requirements.add(b'lfs')
3767 3766
3768 3767 if ui.configbool(b'format', b'bookmarks-in-store'):
3769 3768 requirements.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3770 3769
3771 3770 # The feature is disabled unless a fast implementation is available.
3772 3771 persistent_nodemap_default = policy.importrust('revlog') is not None
3773 3772 if ui.configbool(
3774 3773 b'format', b'use-persistent-nodemap', persistent_nodemap_default
3775 3774 ):
3776 3775 requirements.add(requirementsmod.NODEMAP_REQUIREMENT)
3777 3776
3778 3777 # if share-safe is enabled, let's create the new repository with the new
3779 3778 # requirement
3780 3779 if ui.configbool(b'format', b'use-share-safe'):
3781 3780 requirements.add(requirementsmod.SHARESAFE_REQUIREMENT)
3782 3781
3783 3782 # if we are creating a share-repoΒΉ we have to handle requirement
3784 3783 # differently.
3785 3784 #
3786 3785 # [1] (i.e. reusing the store from another repository, just having a
3787 3786 # working copy)
3788 3787 if b'sharedrepo' in createopts:
3789 3788 source_requirements = set(createopts[b'sharedrepo'].requirements)
3790 3789
3791 3790 if requirementsmod.SHARESAFE_REQUIREMENT not in source_requirements:
3792 3791 # share to an old school repository, we have to copy the
3793 3792 # requirements and hope for the best.
3794 3793 requirements = source_requirements
3795 3794 else:
3796 3795 # We have control on the working copy only, so "copy" the non
3797 3796 # working copy part over, ignoring previous logic.
3798 3797 to_drop = set()
3799 3798 for req in requirements:
3800 3799 if req in requirementsmod.WORKING_DIR_REQUIREMENTS:
3801 3800 continue
3802 3801 if req in source_requirements:
3803 3802 continue
3804 3803 to_drop.add(req)
3805 3804 requirements -= to_drop
3806 3805 requirements |= source_requirements
3807 3806
3808 3807 if createopts.get(b'sharedrelative'):
3809 3808 requirements.add(requirementsmod.RELATIVE_SHARED_REQUIREMENT)
3810 3809 else:
3811 3810 requirements.add(requirementsmod.SHARED_REQUIREMENT)
3812 3811
3813 3812 if ui.configbool(b'format', b'use-dirstate-tracked-hint'):
3814 3813 version = ui.configint(b'format', b'use-dirstate-tracked-hint.version')
3815 3814 msg = _(b"ignoring unknown tracked key version: %d\n")
3816 3815 hint = _(
3817 3816 b"see `hg help config.format.use-dirstate-tracked-hint-version"
3818 3817 )
3819 3818 if version != 1:
3820 3819 ui.warn(msg % version, hint=hint)
3821 3820 else:
3822 3821 requirements.add(requirementsmod.DIRSTATE_TRACKED_HINT_V1)
3823 3822
3824 3823 return requirements
3825 3824
3826 3825
3827 3826 def checkrequirementscompat(ui, requirements):
3828 3827 """Checks compatibility of repository requirements enabled and disabled.
3829 3828
3830 3829 Returns a set of requirements which needs to be dropped because dependend
3831 3830 requirements are not enabled. Also warns users about it"""
3832 3831
3833 3832 dropped = set()
3834 3833
3835 3834 if requirementsmod.STORE_REQUIREMENT not in requirements:
3836 3835 if requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT in requirements:
3837 3836 ui.warn(
3838 3837 _(
3839 3838 b'ignoring enabled \'format.bookmarks-in-store\' config '
3840 3839 b'beacuse it is incompatible with disabled '
3841 3840 b'\'format.usestore\' config\n'
3842 3841 )
3843 3842 )
3844 3843 dropped.add(requirementsmod.BOOKMARKS_IN_STORE_REQUIREMENT)
3845 3844
3846 3845 if (
3847 3846 requirementsmod.SHARED_REQUIREMENT in requirements
3848 3847 or requirementsmod.RELATIVE_SHARED_REQUIREMENT in requirements
3849 3848 ):
3850 3849 raise error.Abort(
3851 3850 _(
3852 3851 b"cannot create shared repository as source was created"
3853 3852 b" with 'format.usestore' config disabled"
3854 3853 )
3855 3854 )
3856 3855
3857 3856 if requirementsmod.SHARESAFE_REQUIREMENT in requirements:
3858 3857 if ui.hasconfig(b'format', b'use-share-safe'):
3859 3858 msg = _(
3860 3859 b"ignoring enabled 'format.use-share-safe' config because "
3861 3860 b"it is incompatible with disabled 'format.usestore'"
3862 3861 b" config\n"
3863 3862 )
3864 3863 ui.warn(msg)
3865 3864 dropped.add(requirementsmod.SHARESAFE_REQUIREMENT)
3866 3865
3867 3866 return dropped
3868 3867
3869 3868
3870 3869 def filterknowncreateopts(ui, createopts):
3871 3870 """Filters a dict of repo creation options against options that are known.
3872 3871
3873 3872 Receives a dict of repo creation options and returns a dict of those
3874 3873 options that we don't know how to handle.
3875 3874
3876 3875 This function is called as part of repository creation. If the
3877 3876 returned dict contains any items, repository creation will not
3878 3877 be allowed, as it means there was a request to create a repository
3879 3878 with options not recognized by loaded code.
3880 3879
3881 3880 Extensions can wrap this function to filter out creation options
3882 3881 they know how to handle.
3883 3882 """
3884 3883 known = {
3885 3884 b'backend',
3886 3885 b'lfs',
3887 3886 b'narrowfiles',
3888 3887 b'sharedrepo',
3889 3888 b'sharedrelative',
3890 3889 b'shareditems',
3891 3890 b'shallowfilestore',
3892 3891 }
3893 3892
3894 3893 return {k: v for k, v in createopts.items() if k not in known}
3895 3894
3896 3895
3897 3896 def createrepository(ui, path: bytes, createopts=None, requirements=None):
3898 3897 """Create a new repository in a vfs.
3899 3898
3900 3899 ``path`` path to the new repo's working directory.
3901 3900 ``createopts`` options for the new repository.
3902 3901 ``requirement`` predefined set of requirements.
3903 3902 (incompatible with ``createopts``)
3904 3903
3905 3904 The following keys for ``createopts`` are recognized:
3906 3905
3907 3906 backend
3908 3907 The storage backend to use.
3909 3908 lfs
3910 3909 Repository will be created with ``lfs`` requirement. The lfs extension
3911 3910 will automatically be loaded when the repository is accessed.
3912 3911 narrowfiles
3913 3912 Set up repository to support narrow file storage.
3914 3913 sharedrepo
3915 3914 Repository object from which storage should be shared.
3916 3915 sharedrelative
3917 3916 Boolean indicating if the path to the shared repo should be
3918 3917 stored as relative. By default, the pointer to the "parent" repo
3919 3918 is stored as an absolute path.
3920 3919 shareditems
3921 3920 Set of items to share to the new repository (in addition to storage).
3922 3921 shallowfilestore
3923 3922 Indicates that storage for files should be shallow (not all ancestor
3924 3923 revisions are known).
3925 3924 """
3926 3925
3927 3926 if requirements is not None:
3928 3927 if createopts is not None:
3929 3928 msg = b'cannot specify both createopts and requirements'
3930 3929 raise error.ProgrammingError(msg)
3931 3930 createopts = {}
3932 3931 else:
3933 3932 createopts = defaultcreateopts(ui, createopts=createopts)
3934 3933
3935 3934 unknownopts = filterknowncreateopts(ui, createopts)
3936 3935
3937 3936 if not isinstance(unknownopts, dict):
3938 3937 raise error.ProgrammingError(
3939 3938 b'filterknowncreateopts() did not return a dict'
3940 3939 )
3941 3940
3942 3941 if unknownopts:
3943 3942 raise error.Abort(
3944 3943 _(
3945 3944 b'unable to create repository because of unknown '
3946 3945 b'creation option: %s'
3947 3946 )
3948 3947 % b', '.join(sorted(unknownopts)),
3949 3948 hint=_(b'is a required extension not loaded?'),
3950 3949 )
3951 3950
3952 3951 requirements = newreporequirements(ui, createopts=createopts)
3953 3952 requirements -= checkrequirementscompat(ui, requirements)
3954 3953
3955 3954 wdirvfs = vfsmod.vfs(path, expandpath=True, realpath=True)
3956 3955
3957 3956 hgvfs = vfsmod.vfs(wdirvfs.join(b'.hg'))
3958 3957 if hgvfs.exists():
3959 3958 raise error.RepoError(_(b'repository %s already exists') % path)
3960 3959
3961 3960 if b'sharedrepo' in createopts:
3962 3961 sharedpath = createopts[b'sharedrepo'].sharedpath
3963 3962
3964 3963 if createopts.get(b'sharedrelative'):
3965 3964 try:
3966 3965 sharedpath = os.path.relpath(sharedpath, hgvfs.base)
3967 3966 sharedpath = util.pconvert(sharedpath)
3968 3967 except (IOError, ValueError) as e:
3969 3968 # ValueError is raised on Windows if the drive letters differ
3970 3969 # on each path.
3971 3970 raise error.Abort(
3972 3971 _(b'cannot calculate relative path'),
3973 3972 hint=stringutil.forcebytestr(e),
3974 3973 )
3975 3974
3976 3975 if not wdirvfs.exists():
3977 3976 wdirvfs.makedirs()
3978 3977
3979 3978 hgvfs.makedir(notindexed=True)
3980 3979 if b'sharedrepo' not in createopts:
3981 3980 hgvfs.mkdir(b'cache')
3982 3981 hgvfs.mkdir(b'wcache')
3983 3982
3984 3983 has_store = requirementsmod.STORE_REQUIREMENT in requirements
3985 3984 if has_store and b'sharedrepo' not in createopts:
3986 3985 hgvfs.mkdir(b'store')
3987 3986
3988 3987 # We create an invalid changelog outside the store so very old
3989 3988 # Mercurial versions (which didn't know about the requirements
3990 3989 # file) encounter an error on reading the changelog. This
3991 3990 # effectively locks out old clients and prevents them from
3992 3991 # mucking with a repo in an unknown format.
3993 3992 #
3994 3993 # The revlog header has version 65535, which won't be recognized by
3995 3994 # such old clients.
3996 3995 hgvfs.append(
3997 3996 b'00changelog.i',
3998 3997 b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
3999 3998 b'layout',
4000 3999 )
4001 4000
4002 4001 # Filter the requirements into working copy and store ones
4003 4002 wcreq, storereq = scmutil.filterrequirements(requirements)
4004 4003 # write working copy ones
4005 4004 scmutil.writerequires(hgvfs, wcreq)
4006 4005 # If there are store requirements and the current repository
4007 4006 # is not a shared one, write stored requirements
4008 4007 # For new shared repository, we don't need to write the store
4009 4008 # requirements as they are already present in store requires
4010 4009 if storereq and b'sharedrepo' not in createopts:
4011 4010 storevfs = vfsmod.vfs(hgvfs.join(b'store'), cacheaudited=True)
4012 4011 scmutil.writerequires(storevfs, storereq)
4013 4012
4014 4013 # Write out file telling readers where to find the shared store.
4015 4014 if b'sharedrepo' in createopts:
4016 4015 hgvfs.write(b'sharedpath', sharedpath)
4017 4016
4018 4017 if createopts.get(b'shareditems'):
4019 4018 shared = b'\n'.join(sorted(createopts[b'shareditems'])) + b'\n'
4020 4019 hgvfs.write(b'shared', shared)
4021 4020
4022 4021
4023 4022 def poisonrepository(repo):
4024 4023 """Poison a repository instance so it can no longer be used."""
4025 4024 # Perform any cleanup on the instance.
4026 4025 repo.close()
4027 4026
4028 4027 # Our strategy is to replace the type of the object with one that
4029 4028 # has all attribute lookups result in error.
4030 4029 #
4031 4030 # But we have to allow the close() method because some constructors
4032 4031 # of repos call close() on repo references.
4033 4032 class poisonedrepository:
4034 4033 def __getattribute__(self, item):
4035 4034 if item == 'close':
4036 4035 return object.__getattribute__(self, item)
4037 4036
4038 4037 raise error.ProgrammingError(
4039 4038 b'repo instances should not be used after unshare'
4040 4039 )
4041 4040
4042 4041 def close(self):
4043 4042 pass
4044 4043
4045 4044 # We may have a repoview, which intercepts __setattr__. So be sure
4046 4045 # we operate at the lowest level possible.
4047 4046 object.__setattr__(repo, '__class__', poisonedrepository)
@@ -1,3745 +1,3743 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class revlog:
341 341 """
342 342 the underlying revision storage object
343 343
344 344 A revlog consists of two parts, an index and the revision data.
345 345
346 346 The index is a file with a fixed record size containing
347 347 information on each revision, including its nodeid (hash), the
348 348 nodeids of its parents, the position and offset of its data within
349 349 the data file, and the revision it's based on. Finally, each entry
350 350 contains a linkrev entry that can serve as a pointer to external
351 351 data.
352 352
353 353 The revision data itself is a linear collection of data chunks.
354 354 Each chunk represents a revision and is usually represented as a
355 355 delta against the previous chunk. To bound lookup time, runs of
356 356 deltas are limited to about 2 times the length of the original
357 357 version data. This makes retrieval of a version proportional to
358 358 its size, or O(1) relative to the number of revisions.
359 359
360 360 Both pieces of the revlog are written to in an append-only
361 361 fashion, which means we never need to rewrite a file to insert or
362 362 remove data, and can use some simple techniques to avoid the need
363 363 for locking while reading.
364 364
365 365 If checkambig, indexfile is opened with checkambig=True at
366 366 writing, to avoid file stat ambiguity.
367 367
368 368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 369 index will be mmapped rather than read if it is larger than the
370 370 configured threshold.
371 371
372 372 If censorable is True, the revlog can have censored revisions.
373 373
374 374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 375 compression for the data content.
376 376
377 377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 378 file handle, a filename, and an expected position. It should check whether
379 379 the current position in the file handle is valid, and log/warn/fail (by
380 380 raising).
381 381
382 382 See mercurial/revlogutils/contants.py for details about the content of an
383 383 index entry.
384 384 """
385 385
386 386 _flagserrorclass = error.RevlogError
387 387
388 388 @staticmethod
389 389 def is_inline_index(header_bytes):
390 390 """Determine if a revlog is inline from the initial bytes of the index"""
391 391 header = INDEX_HEADER.unpack(header_bytes)[0]
392 392
393 393 _format_flags = header & ~0xFFFF
394 394 _format_version = header & 0xFFFF
395 395
396 396 features = FEATURES_BY_VERSION[_format_version]
397 397 return features[b'inline'](_format_flags)
398 398
399 399 def __init__(
400 400 self,
401 401 opener,
402 402 target,
403 403 radix,
404 404 postfix=None, # only exist for `tmpcensored` now
405 405 checkambig=False,
406 406 mmaplargeindex=False,
407 407 censorable=False,
408 408 upperboundcomp=None,
409 409 persistentnodemap=False,
410 410 concurrencychecker=None,
411 411 trypending=False,
412 412 try_split=False,
413 413 canonical_parent_order=True,
414 414 ):
415 415 """
416 416 create a revlog object
417 417
418 418 opener is a function that abstracts the file opening operation
419 419 and can be used to implement COW semantics or the like.
420 420
421 421 `target`: a (KIND, ID) tuple that identify the content stored in
422 422 this revlog. It help the rest of the code to understand what the revlog
423 423 is about without having to resort to heuristic and index filename
424 424 analysis. Note: that this must be reliably be set by normal code, but
425 425 that test, debug, or performance measurement code might not set this to
426 426 accurate value.
427 427 """
428 428 self.upperboundcomp = upperboundcomp
429 429
430 430 self.radix = radix
431 431
432 432 self._docket_file = None
433 433 self._indexfile = None
434 434 self._datafile = None
435 435 self._sidedatafile = None
436 436 self._nodemap_file = None
437 437 self.postfix = postfix
438 438 self._trypending = trypending
439 439 self._try_split = try_split
440 440 self.opener = opener
441 441 if persistentnodemap:
442 442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443 443
444 444 assert target[0] in ALL_KINDS
445 445 assert len(target) == 2
446 446 self.target = target
447 447 if b'feature-config' in self.opener.options:
448 448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 449 else:
450 450 self.feature_config = FeatureConfig()
451 451 self.feature_config.censorable = censorable
452 452 self.feature_config.canonical_parent_order = canonical_parent_order
453 453 if b'data-config' in self.opener.options:
454 454 self.data_config = self.opener.options[b'data-config'].copy()
455 455 else:
456 456 self.data_config = DataConfig()
457 457 self.data_config.check_ambig = checkambig
458 458 self.data_config.mmap_large_index = mmaplargeindex
459 459 if b'delta-config' in self.opener.options:
460 460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 461 else:
462 462 self.delta_config = DeltaConfig()
463 463
464 464 # 3-tuple of (node, rev, text) for a raw revision.
465 465 self._revisioncache = None
466 466 # Maps rev to chain base rev.
467 467 self._chainbasecache = util.lrucachedict(100)
468 468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 469 self._chunkcache = (0, b'')
470 470
471 471 self.index = None
472 472 self._docket = None
473 473 self._nodemap_docket = None
474 474 # Mapping of partial identifiers to full nodes.
475 475 self._pcache = {}
476 476
477 477 # other optionnals features
478 478
479 479 # Make copy of flag processors so each revlog instance can support
480 480 # custom flags.
481 481 self._flagprocessors = dict(flagutil.flagprocessors)
482 482
483 483 # 3-tuple of file handles being used for active writing.
484 484 self._writinghandles = None
485 485 # prevent nesting of addgroup
486 486 self._adding_group = None
487 487
488 488 self._loadindex()
489 489
490 490 self._concurrencychecker = concurrencychecker
491 491
492 492 @property
493 493 def _generaldelta(self):
494 494 """temporary compatibility proxy"""
495 495 return self.delta_config.general_delta
496 496
497 497 @property
498 498 def _checkambig(self):
499 499 """temporary compatibility proxy"""
500 500 return self.data_config.check_ambig
501 501
502 502 @property
503 503 def _mmaplargeindex(self):
504 504 """temporary compatibility proxy"""
505 505 return self.data_config.mmap_large_index
506 506
507 507 @property
508 508 def _censorable(self):
509 509 """temporary compatibility proxy"""
510 510 return self.feature_config.censorable
511 511
512 512 @property
513 513 def _chunkcachesize(self):
514 514 """temporary compatibility proxy"""
515 515 return self.data_config.chunk_cache_size
516 516
517 517 @property
518 518 def _maxchainlen(self):
519 519 """temporary compatibility proxy"""
520 520 return self.delta_config.max_chain_len
521 521
522 522 @property
523 523 def _deltabothparents(self):
524 524 """temporary compatibility proxy"""
525 525 return self.delta_config.delta_both_parents
526 526
527 527 @property
528 528 def _candidate_group_chunk_size(self):
529 529 """temporary compatibility proxy"""
530 530 return self.delta_config.candidate_group_chunk_size
531 531
532 532 @property
533 533 def _debug_delta(self):
534 534 """temporary compatibility proxy"""
535 535 return self.delta_config.debug_delta
536 536
537 537 @property
538 538 def _compengine(self):
539 539 """temporary compatibility proxy"""
540 540 return self.feature_config.compression_engine
541 541
542 542 @property
543 543 def _compengineopts(self):
544 544 """temporary compatibility proxy"""
545 545 return self.feature_config.compression_engine_options
546 546
547 547 @property
548 548 def _maxdeltachainspan(self):
549 549 """temporary compatibility proxy"""
550 550 return self.delta_config.max_deltachain_span
551 551
552 552 @property
553 553 def _withsparseread(self):
554 554 """temporary compatibility proxy"""
555 555 return self.data_config.with_sparse_read
556 556
557 557 @property
558 558 def _sparserevlog(self):
559 559 """temporary compatibility proxy"""
560 560 return self.delta_config.sparse_revlog
561 561
562 562 @property
563 563 def hassidedata(self):
564 564 """temporary compatibility proxy"""
565 565 return self.feature_config.has_side_data
566 566
567 567 @property
568 568 def _srdensitythreshold(self):
569 569 """temporary compatibility proxy"""
570 570 return self.data_config.sr_density_threshold
571 571
572 572 @property
573 573 def _srmingapsize(self):
574 574 """temporary compatibility proxy"""
575 575 return self.data_config.sr_min_gap_size
576 576
577 577 @property
578 578 def _compute_rank(self):
579 579 """temporary compatibility proxy"""
580 580 return self.feature_config.compute_rank
581 581
582 582 @property
583 583 def canonical_parent_order(self):
584 584 """temporary compatibility proxy"""
585 585 return self.feature_config.canonical_parent_order
586 586
587 587 @property
588 588 def _lazydelta(self):
589 589 """temporary compatibility proxy"""
590 590 return self.delta_config.lazy_delta
591 591
592 592 @property
593 593 def _lazydeltabase(self):
594 594 """temporary compatibility proxy"""
595 595 return self.delta_config.lazy_delta_base
596 596
597 597 def _init_opts(self):
598 598 """process options (from above/config) to setup associated default revlog mode
599 599
600 600 These values might be affected when actually reading on disk information.
601 601
602 602 The relevant values are returned for use in _loadindex().
603 603
604 604 * newversionflags:
605 605 version header to use if we need to create a new revlog
606 606
607 607 * mmapindexthreshold:
608 608 minimal index size for start to use mmap
609 609
610 610 * force_nodemap:
611 611 force the usage of a "development" version of the nodemap code
612 612 """
613 613 mmapindexthreshold = None
614 614 opts = self.opener.options
615 615
616 616 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
617 617 new_header = CHANGELOGV2
618 618 compute_rank = opts.get(b'changelogv2.compute-rank', True)
619 619 self.feature_config.compute_rank = compute_rank
620 620 elif b'revlogv2' in opts:
621 621 new_header = REVLOGV2
622 622 elif b'revlogv1' in opts:
623 623 new_header = REVLOGV1 | FLAG_INLINE_DATA
624 624 if b'generaldelta' in opts:
625 625 new_header |= FLAG_GENERALDELTA
626 626 elif b'revlogv0' in self.opener.options:
627 627 new_header = REVLOGV0
628 628 else:
629 629 new_header = REVLOG_DEFAULT_VERSION
630 630
631 631 if b'maxchainlen' in opts:
632 632 self.delta_config.max_chain_len = opts[b'maxchainlen']
633 if b'deltabothparents' in opts:
634 self.delta_config.delta_both_parents = opts[b'deltabothparents']
635 633 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
636 634 if dps_cgds:
637 635 self.delta_config.candidate_group_chunk_size = dps_cgds
638 636 if b'lazydelta' in opts:
639 637 self.delta_config.lazy_delta = bool(opts[b'lazydelta'])
640 638 if self._lazydelta and b'lazydeltabase' in opts:
641 639 self.delta_config.lazy_delta_base = opts[b'lazydeltabase']
642 640 if b'debug-delta' in opts:
643 641 self.delta_config.debug_delta = opts[b'debug-delta']
644 642 if b'compengine' in opts:
645 643 self.feature_config.compression_engine = opts[b'compengine']
646 644 comp_engine_opts = self.feature_config.compression_engine_options
647 645 if b'zlib.level' in opts:
648 646 comp_engine_opts[b'zlib.level'] = opts[b'zlib.level']
649 647 if b'zstd.level' in opts:
650 648 comp_engine_opts[b'zstd.level'] = opts[b'zstd.level']
651 649 if b'maxdeltachainspan' in opts:
652 650 self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan']
653 651 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
654 652 mmapindexthreshold = opts[b'mmapindexthreshold']
655 653 self.data_config.mmap_index_threshold = mmapindexthreshold
656 654 if b'sparse-revlog' in opts:
657 655 self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog'])
658 656 if self.delta_config.sparse_revlog:
659 657 # sparse-revlog forces sparse-read
660 658 self.data_config.with_sparse_read = True
661 659 elif b'with-sparse-read' in opts:
662 660 self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
663 661 if b'sparse-read-density-threshold' in opts:
664 662 self.data_config.sr_density_threshold = opts[
665 663 b'sparse-read-density-threshold'
666 664 ]
667 665 if b'sparse-read-min-gap-size' in opts:
668 666 self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
669 667 if opts.get(b'enableellipsis'):
670 668 self.feature_config.enable_ellipsis = True
671 669 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
672 670
673 671 # revlog v0 doesn't have flag processors
674 672 for flag, processor in opts.get(b'flagprocessors', {}).items():
675 673 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
676 674
677 675 chunk_cache_size = self.data_config.chunk_cache_size
678 676 if chunk_cache_size <= 0:
679 677 raise error.RevlogError(
680 678 _(b'revlog chunk cache size %r is not greater than 0')
681 679 % chunk_cache_size
682 680 )
683 681 elif chunk_cache_size & (chunk_cache_size - 1):
684 682 raise error.RevlogError(
685 683 _(b'revlog chunk cache size %r is not a power of 2')
686 684 % chunk_cache_size
687 685 )
688 686 force_nodemap = opts.get(b'devel-force-nodemap', False)
689 687 return new_header, mmapindexthreshold, force_nodemap
690 688
691 689 def _get_data(self, filepath, mmap_threshold, size=None):
692 690 """return a file content with or without mmap
693 691
694 692 If the file is missing return the empty string"""
695 693 try:
696 694 with self.opener(filepath) as fp:
697 695 if mmap_threshold is not None:
698 696 file_size = self.opener.fstat(fp).st_size
699 697 if file_size >= mmap_threshold:
700 698 if size is not None:
701 699 # avoid potentiel mmap crash
702 700 size = min(file_size, size)
703 701 # TODO: should .close() to release resources without
704 702 # relying on Python GC
705 703 if size is None:
706 704 return util.buffer(util.mmapread(fp))
707 705 else:
708 706 return util.buffer(util.mmapread(fp, size))
709 707 if size is None:
710 708 return fp.read()
711 709 else:
712 710 return fp.read(size)
713 711 except FileNotFoundError:
714 712 return b''
715 713
716 714 def get_streams(self, max_linkrev, force_inline=False):
717 715 """return a list of streams that represent this revlog
718 716
719 717 This is used by stream-clone to do bytes to bytes copies of a repository.
720 718
721 719 This streams data for all revisions that refer to a changelog revision up
722 720 to `max_linkrev`.
723 721
724 722 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
725 723
726 724 It returns is a list of three-tuple:
727 725
728 726 [
729 727 (filename, bytes_stream, stream_size),
730 728 …
731 729 ]
732 730 """
733 731 n = len(self)
734 732 index = self.index
735 733 while n > 0:
736 734 linkrev = index[n - 1][4]
737 735 if linkrev < max_linkrev:
738 736 break
739 737 # note: this loop will rarely go through multiple iterations, since
740 738 # it only traverses commits created during the current streaming
741 739 # pull operation.
742 740 #
743 741 # If this become a problem, using a binary search should cap the
744 742 # runtime of this.
745 743 n = n - 1
746 744 if n == 0:
747 745 # no data to send
748 746 return []
749 747 index_size = n * index.entry_size
750 748 data_size = self.end(n - 1)
751 749
752 750 # XXX we might have been split (or stripped) since the object
753 751 # initialization, We need to close this race too, but having a way to
754 752 # pre-open the file we feed to the revlog and never closing them before
755 753 # we are done streaming.
756 754
757 755 if self._inline:
758 756
759 757 def get_stream():
760 758 with self._indexfp() as fp:
761 759 yield None
762 760 size = index_size + data_size
763 761 if size <= 65536:
764 762 yield fp.read(size)
765 763 else:
766 764 yield from util.filechunkiter(fp, limit=size)
767 765
768 766 inline_stream = get_stream()
769 767 next(inline_stream)
770 768 return [
771 769 (self._indexfile, inline_stream, index_size + data_size),
772 770 ]
773 771 elif force_inline:
774 772
775 773 def get_stream():
776 774 with self.reading():
777 775 yield None
778 776
779 777 for rev in range(n):
780 778 idx = self.index.entry_binary(rev)
781 779 if rev == 0 and self._docket is None:
782 780 # re-inject the inline flag
783 781 header = self._format_flags
784 782 header |= self._format_version
785 783 header |= FLAG_INLINE_DATA
786 784 header = self.index.pack_header(header)
787 785 idx = header + idx
788 786 yield idx
789 787 yield self._getsegmentforrevs(rev, rev)[1]
790 788
791 789 inline_stream = get_stream()
792 790 next(inline_stream)
793 791 return [
794 792 (self._indexfile, inline_stream, index_size + data_size),
795 793 ]
796 794 else:
797 795
798 796 def get_index_stream():
799 797 with self._indexfp() as fp:
800 798 yield None
801 799 if index_size <= 65536:
802 800 yield fp.read(index_size)
803 801 else:
804 802 yield from util.filechunkiter(fp, limit=index_size)
805 803
806 804 def get_data_stream():
807 805 with self._datafp() as fp:
808 806 yield None
809 807 if data_size <= 65536:
810 808 yield fp.read(data_size)
811 809 else:
812 810 yield from util.filechunkiter(fp, limit=data_size)
813 811
814 812 index_stream = get_index_stream()
815 813 next(index_stream)
816 814 data_stream = get_data_stream()
817 815 next(data_stream)
818 816 return [
819 817 (self._datafile, data_stream, data_size),
820 818 (self._indexfile, index_stream, index_size),
821 819 ]
822 820
823 821 def _loadindex(self, docket=None):
824 822
825 823 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
826 824
827 825 if self.postfix is not None:
828 826 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
829 827 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
830 828 entry_point = b'%s.i.a' % self.radix
831 829 elif self._try_split and self.opener.exists(self._split_index_file):
832 830 entry_point = self._split_index_file
833 831 else:
834 832 entry_point = b'%s.i' % self.radix
835 833
836 834 if docket is not None:
837 835 self._docket = docket
838 836 self._docket_file = entry_point
839 837 else:
840 838 self._initempty = True
841 839 entry_data = self._get_data(entry_point, mmapindexthreshold)
842 840 if len(entry_data) > 0:
843 841 header = INDEX_HEADER.unpack(entry_data[:4])[0]
844 842 self._initempty = False
845 843 else:
846 844 header = new_header
847 845
848 846 self._format_flags = header & ~0xFFFF
849 847 self._format_version = header & 0xFFFF
850 848
851 849 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
852 850 if supported_flags is None:
853 851 msg = _(b'unknown version (%d) in revlog %s')
854 852 msg %= (self._format_version, self.display_id)
855 853 raise error.RevlogError(msg)
856 854 elif self._format_flags & ~supported_flags:
857 855 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
858 856 display_flag = self._format_flags >> 16
859 857 msg %= (display_flag, self._format_version, self.display_id)
860 858 raise error.RevlogError(msg)
861 859
862 860 features = FEATURES_BY_VERSION[self._format_version]
863 861 self._inline = features[b'inline'](self._format_flags)
864 862 self.delta_config.general_delta = features[b'generaldelta'](
865 863 self._format_flags
866 864 )
867 865 self.feature_config.has_side_data = features[b'sidedata']
868 866
869 867 if not features[b'docket']:
870 868 self._indexfile = entry_point
871 869 index_data = entry_data
872 870 else:
873 871 self._docket_file = entry_point
874 872 if self._initempty:
875 873 self._docket = docketutil.default_docket(self, header)
876 874 else:
877 875 self._docket = docketutil.parse_docket(
878 876 self, entry_data, use_pending=self._trypending
879 877 )
880 878
881 879 if self._docket is not None:
882 880 self._indexfile = self._docket.index_filepath()
883 881 index_data = b''
884 882 index_size = self._docket.index_end
885 883 if index_size > 0:
886 884 index_data = self._get_data(
887 885 self._indexfile, mmapindexthreshold, size=index_size
888 886 )
889 887 if len(index_data) < index_size:
890 888 msg = _(b'too few index data for %s: got %d, expected %d')
891 889 msg %= (self.display_id, len(index_data), index_size)
892 890 raise error.RevlogError(msg)
893 891
894 892 self._inline = False
895 893 # generaldelta implied by version 2 revlogs.
896 894 self.delta_config.general_delta = True
897 895 # the logic for persistent nodemap will be dealt with within the
898 896 # main docket, so disable it for now.
899 897 self._nodemap_file = None
900 898
901 899 if self._docket is not None:
902 900 self._datafile = self._docket.data_filepath()
903 901 self._sidedatafile = self._docket.sidedata_filepath()
904 902 elif self.postfix is None:
905 903 self._datafile = b'%s.d' % self.radix
906 904 else:
907 905 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
908 906
909 907 self.nodeconstants = sha1nodeconstants
910 908 self.nullid = self.nodeconstants.nullid
911 909
912 910 # sparse-revlog can't be on without general-delta (issue6056)
913 911 if not self._generaldelta:
914 912 self.delta_config.sparse_revlog = False
915 913
916 914 self._storedeltachains = True
917 915
918 916 devel_nodemap = (
919 917 self._nodemap_file
920 918 and force_nodemap
921 919 and parse_index_v1_nodemap is not None
922 920 )
923 921
924 922 use_rust_index = False
925 923 if rustrevlog is not None:
926 924 if self._nodemap_file is not None:
927 925 use_rust_index = True
928 926 else:
929 927 use_rust_index = self.opener.options.get(b'rust.index')
930 928
931 929 self._parse_index = parse_index_v1
932 930 if self._format_version == REVLOGV0:
933 931 self._parse_index = revlogv0.parse_index_v0
934 932 elif self._format_version == REVLOGV2:
935 933 self._parse_index = parse_index_v2
936 934 elif self._format_version == CHANGELOGV2:
937 935 self._parse_index = parse_index_cl_v2
938 936 elif devel_nodemap:
939 937 self._parse_index = parse_index_v1_nodemap
940 938 elif use_rust_index:
941 939 self._parse_index = parse_index_v1_mixed
942 940 try:
943 941 d = self._parse_index(index_data, self._inline)
944 942 index, chunkcache = d
945 943 use_nodemap = (
946 944 not self._inline
947 945 and self._nodemap_file is not None
948 946 and hasattr(index, 'update_nodemap_data')
949 947 )
950 948 if use_nodemap:
951 949 nodemap_data = nodemaputil.persisted_data(self)
952 950 if nodemap_data is not None:
953 951 docket = nodemap_data[0]
954 952 if (
955 953 len(d[0]) > docket.tip_rev
956 954 and d[0][docket.tip_rev][7] == docket.tip_node
957 955 ):
958 956 # no changelog tampering
959 957 self._nodemap_docket = docket
960 958 index.update_nodemap_data(*nodemap_data)
961 959 except (ValueError, IndexError):
962 960 raise error.RevlogError(
963 961 _(b"index %s is corrupted") % self.display_id
964 962 )
965 963 self.index = index
966 964 self._segmentfile = randomaccessfile.randomaccessfile(
967 965 self.opener,
968 966 (self._indexfile if self._inline else self._datafile),
969 967 self._chunkcachesize,
970 968 chunkcache,
971 969 )
972 970 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
973 971 self.opener,
974 972 self._sidedatafile,
975 973 self._chunkcachesize,
976 974 )
977 975 # revnum -> (chain-length, sum-delta-length)
978 976 self._chaininfocache = util.lrucachedict(500)
979 977 # revlog header -> revlog compressor
980 978 self._decompressors = {}
981 979
982 980 def get_revlog(self):
983 981 """simple function to mirror API of other not-really-revlog API"""
984 982 return self
985 983
986 984 @util.propertycache
987 985 def revlog_kind(self):
988 986 return self.target[0]
989 987
990 988 @util.propertycache
991 989 def display_id(self):
992 990 """The public facing "ID" of the revlog that we use in message"""
993 991 if self.revlog_kind == KIND_FILELOG:
994 992 # Reference the file without the "data/" prefix, so it is familiar
995 993 # to the user.
996 994 return self.target[1]
997 995 else:
998 996 return self.radix
999 997
1000 998 def _get_decompressor(self, t):
1001 999 try:
1002 1000 compressor = self._decompressors[t]
1003 1001 except KeyError:
1004 1002 try:
1005 1003 engine = util.compengines.forrevlogheader(t)
1006 1004 compressor = engine.revlogcompressor(self._compengineopts)
1007 1005 self._decompressors[t] = compressor
1008 1006 except KeyError:
1009 1007 raise error.RevlogError(
1010 1008 _(b'unknown compression type %s') % binascii.hexlify(t)
1011 1009 )
1012 1010 return compressor
1013 1011
1014 1012 @util.propertycache
1015 1013 def _compressor(self):
1016 1014 engine = util.compengines[self._compengine]
1017 1015 return engine.revlogcompressor(self._compengineopts)
1018 1016
1019 1017 @util.propertycache
1020 1018 def _decompressor(self):
1021 1019 """the default decompressor"""
1022 1020 if self._docket is None:
1023 1021 return None
1024 1022 t = self._docket.default_compression_header
1025 1023 c = self._get_decompressor(t)
1026 1024 return c.decompress
1027 1025
1028 1026 def _indexfp(self):
1029 1027 """file object for the revlog's index file"""
1030 1028 return self.opener(self._indexfile, mode=b"r")
1031 1029
1032 1030 def __index_write_fp(self):
1033 1031 # You should not use this directly and use `_writing` instead
1034 1032 try:
1035 1033 f = self.opener(
1036 1034 self._indexfile, mode=b"r+", checkambig=self._checkambig
1037 1035 )
1038 1036 if self._docket is None:
1039 1037 f.seek(0, os.SEEK_END)
1040 1038 else:
1041 1039 f.seek(self._docket.index_end, os.SEEK_SET)
1042 1040 return f
1043 1041 except FileNotFoundError:
1044 1042 return self.opener(
1045 1043 self._indexfile, mode=b"w+", checkambig=self._checkambig
1046 1044 )
1047 1045
1048 1046 def __index_new_fp(self):
1049 1047 # You should not use this unless you are upgrading from inline revlog
1050 1048 return self.opener(
1051 1049 self._indexfile,
1052 1050 mode=b"w",
1053 1051 checkambig=self._checkambig,
1054 1052 atomictemp=True,
1055 1053 )
1056 1054
1057 1055 def _datafp(self, mode=b'r'):
1058 1056 """file object for the revlog's data file"""
1059 1057 return self.opener(self._datafile, mode=mode)
1060 1058
1061 1059 @contextlib.contextmanager
1062 1060 def _sidedatareadfp(self):
1063 1061 """file object suitable to read sidedata"""
1064 1062 if self._writinghandles:
1065 1063 yield self._writinghandles[2]
1066 1064 else:
1067 1065 with self.opener(self._sidedatafile) as fp:
1068 1066 yield fp
1069 1067
1070 1068 def tiprev(self):
1071 1069 return len(self.index) - 1
1072 1070
1073 1071 def tip(self):
1074 1072 return self.node(self.tiprev())
1075 1073
1076 1074 def __contains__(self, rev):
1077 1075 return 0 <= rev < len(self)
1078 1076
1079 1077 def __len__(self):
1080 1078 return len(self.index)
1081 1079
1082 1080 def __iter__(self):
1083 1081 return iter(range(len(self)))
1084 1082
1085 1083 def revs(self, start=0, stop=None):
1086 1084 """iterate over all rev in this revlog (from start to stop)"""
1087 1085 return storageutil.iterrevs(len(self), start=start, stop=stop)
1088 1086
1089 1087 def hasnode(self, node):
1090 1088 try:
1091 1089 self.rev(node)
1092 1090 return True
1093 1091 except KeyError:
1094 1092 return False
1095 1093
1096 1094 def _candelta(self, baserev, rev):
1097 1095 """whether two revisions (baserev, rev) can be delta-ed or not"""
1098 1096 # Disable delta if either rev requires a content-changing flag
1099 1097 # processor (ex. LFS). This is because such flag processor can alter
1100 1098 # the rawtext content that the delta will be based on, and two clients
1101 1099 # could have a same revlog node with different flags (i.e. different
1102 1100 # rawtext contents) and the delta could be incompatible.
1103 1101 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1104 1102 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1105 1103 ):
1106 1104 return False
1107 1105 return True
1108 1106
1109 1107 def update_caches(self, transaction):
1110 1108 """update on disk cache
1111 1109
1112 1110 If a transaction is passed, the update may be delayed to transaction
1113 1111 commit."""
1114 1112 if self._nodemap_file is not None:
1115 1113 if transaction is None:
1116 1114 nodemaputil.update_persistent_nodemap(self)
1117 1115 else:
1118 1116 nodemaputil.setup_persistent_nodemap(transaction, self)
1119 1117
1120 1118 def clearcaches(self):
1121 1119 """Clear in-memory caches"""
1122 1120 self._revisioncache = None
1123 1121 self._chainbasecache.clear()
1124 1122 self._segmentfile.clear_cache()
1125 1123 self._segmentfile_sidedata.clear_cache()
1126 1124 self._pcache = {}
1127 1125 self._nodemap_docket = None
1128 1126 self.index.clearcaches()
1129 1127 # The python code is the one responsible for validating the docket, we
1130 1128 # end up having to refresh it here.
1131 1129 use_nodemap = (
1132 1130 not self._inline
1133 1131 and self._nodemap_file is not None
1134 1132 and hasattr(self.index, 'update_nodemap_data')
1135 1133 )
1136 1134 if use_nodemap:
1137 1135 nodemap_data = nodemaputil.persisted_data(self)
1138 1136 if nodemap_data is not None:
1139 1137 self._nodemap_docket = nodemap_data[0]
1140 1138 self.index.update_nodemap_data(*nodemap_data)
1141 1139
1142 1140 def rev(self, node):
1143 1141 """return the revision number associated with a <nodeid>"""
1144 1142 try:
1145 1143 return self.index.rev(node)
1146 1144 except TypeError:
1147 1145 raise
1148 1146 except error.RevlogError:
1149 1147 # parsers.c radix tree lookup failed
1150 1148 if (
1151 1149 node == self.nodeconstants.wdirid
1152 1150 or node in self.nodeconstants.wdirfilenodeids
1153 1151 ):
1154 1152 raise error.WdirUnsupported
1155 1153 raise error.LookupError(node, self.display_id, _(b'no node'))
1156 1154
1157 1155 # Accessors for index entries.
1158 1156
1159 1157 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1160 1158 # are flags.
1161 1159 def start(self, rev):
1162 1160 return int(self.index[rev][0] >> 16)
1163 1161
1164 1162 def sidedata_cut_off(self, rev):
1165 1163 sd_cut_off = self.index[rev][8]
1166 1164 if sd_cut_off != 0:
1167 1165 return sd_cut_off
1168 1166 # This is some annoying dance, because entries without sidedata
1169 1167 # currently use 0 as their ofsset. (instead of previous-offset +
1170 1168 # previous-size)
1171 1169 #
1172 1170 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1173 1171 # In the meantime, we need this.
1174 1172 while 0 <= rev:
1175 1173 e = self.index[rev]
1176 1174 if e[9] != 0:
1177 1175 return e[8] + e[9]
1178 1176 rev -= 1
1179 1177 return 0
1180 1178
1181 1179 def flags(self, rev):
1182 1180 return self.index[rev][0] & 0xFFFF
1183 1181
1184 1182 def length(self, rev):
1185 1183 return self.index[rev][1]
1186 1184
1187 1185 def sidedata_length(self, rev):
1188 1186 if not self.hassidedata:
1189 1187 return 0
1190 1188 return self.index[rev][9]
1191 1189
1192 1190 def rawsize(self, rev):
1193 1191 """return the length of the uncompressed text for a given revision"""
1194 1192 l = self.index[rev][2]
1195 1193 if l >= 0:
1196 1194 return l
1197 1195
1198 1196 t = self.rawdata(rev)
1199 1197 return len(t)
1200 1198
1201 1199 def size(self, rev):
1202 1200 """length of non-raw text (processed by a "read" flag processor)"""
1203 1201 # fast path: if no "read" flag processor could change the content,
1204 1202 # size is rawsize. note: ELLIPSIS is known to not change the content.
1205 1203 flags = self.flags(rev)
1206 1204 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1207 1205 return self.rawsize(rev)
1208 1206
1209 1207 return len(self.revision(rev))
1210 1208
1211 1209 def fast_rank(self, rev):
1212 1210 """Return the rank of a revision if already known, or None otherwise.
1213 1211
1214 1212 The rank of a revision is the size of the sub-graph it defines as a
1215 1213 head. Equivalently, the rank of a revision `r` is the size of the set
1216 1214 `ancestors(r)`, `r` included.
1217 1215
1218 1216 This method returns the rank retrieved from the revlog in constant
1219 1217 time. It makes no attempt at computing unknown values for versions of
1220 1218 the revlog which do not persist the rank.
1221 1219 """
1222 1220 rank = self.index[rev][ENTRY_RANK]
1223 1221 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1224 1222 return None
1225 1223 if rev == nullrev:
1226 1224 return 0 # convention
1227 1225 return rank
1228 1226
1229 1227 def chainbase(self, rev):
1230 1228 base = self._chainbasecache.get(rev)
1231 1229 if base is not None:
1232 1230 return base
1233 1231
1234 1232 index = self.index
1235 1233 iterrev = rev
1236 1234 base = index[iterrev][3]
1237 1235 while base != iterrev:
1238 1236 iterrev = base
1239 1237 base = index[iterrev][3]
1240 1238
1241 1239 self._chainbasecache[rev] = base
1242 1240 return base
1243 1241
1244 1242 def linkrev(self, rev):
1245 1243 return self.index[rev][4]
1246 1244
1247 1245 def parentrevs(self, rev):
1248 1246 try:
1249 1247 entry = self.index[rev]
1250 1248 except IndexError:
1251 1249 if rev == wdirrev:
1252 1250 raise error.WdirUnsupported
1253 1251 raise
1254 1252
1255 1253 if self.canonical_parent_order and entry[5] == nullrev:
1256 1254 return entry[6], entry[5]
1257 1255 else:
1258 1256 return entry[5], entry[6]
1259 1257
1260 1258 # fast parentrevs(rev) where rev isn't filtered
1261 1259 _uncheckedparentrevs = parentrevs
1262 1260
1263 1261 def node(self, rev):
1264 1262 try:
1265 1263 return self.index[rev][7]
1266 1264 except IndexError:
1267 1265 if rev == wdirrev:
1268 1266 raise error.WdirUnsupported
1269 1267 raise
1270 1268
1271 1269 # Derived from index values.
1272 1270
1273 1271 def end(self, rev):
1274 1272 return self.start(rev) + self.length(rev)
1275 1273
1276 1274 def parents(self, node):
1277 1275 i = self.index
1278 1276 d = i[self.rev(node)]
1279 1277 # inline node() to avoid function call overhead
1280 1278 if self.canonical_parent_order and d[5] == self.nullid:
1281 1279 return i[d[6]][7], i[d[5]][7]
1282 1280 else:
1283 1281 return i[d[5]][7], i[d[6]][7]
1284 1282
1285 1283 def chainlen(self, rev):
1286 1284 return self._chaininfo(rev)[0]
1287 1285
1288 1286 def _chaininfo(self, rev):
1289 1287 chaininfocache = self._chaininfocache
1290 1288 if rev in chaininfocache:
1291 1289 return chaininfocache[rev]
1292 1290 index = self.index
1293 1291 generaldelta = self._generaldelta
1294 1292 iterrev = rev
1295 1293 e = index[iterrev]
1296 1294 clen = 0
1297 1295 compresseddeltalen = 0
1298 1296 while iterrev != e[3]:
1299 1297 clen += 1
1300 1298 compresseddeltalen += e[1]
1301 1299 if generaldelta:
1302 1300 iterrev = e[3]
1303 1301 else:
1304 1302 iterrev -= 1
1305 1303 if iterrev in chaininfocache:
1306 1304 t = chaininfocache[iterrev]
1307 1305 clen += t[0]
1308 1306 compresseddeltalen += t[1]
1309 1307 break
1310 1308 e = index[iterrev]
1311 1309 else:
1312 1310 # Add text length of base since decompressing that also takes
1313 1311 # work. For cache hits the length is already included.
1314 1312 compresseddeltalen += e[1]
1315 1313 r = (clen, compresseddeltalen)
1316 1314 chaininfocache[rev] = r
1317 1315 return r
1318 1316
1319 1317 def _deltachain(self, rev, stoprev=None):
1320 1318 """Obtain the delta chain for a revision.
1321 1319
1322 1320 ``stoprev`` specifies a revision to stop at. If not specified, we
1323 1321 stop at the base of the chain.
1324 1322
1325 1323 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1326 1324 revs in ascending order and ``stopped`` is a bool indicating whether
1327 1325 ``stoprev`` was hit.
1328 1326 """
1329 1327 # Try C implementation.
1330 1328 try:
1331 1329 return self.index.deltachain(rev, stoprev, self._generaldelta)
1332 1330 except AttributeError:
1333 1331 pass
1334 1332
1335 1333 chain = []
1336 1334
1337 1335 # Alias to prevent attribute lookup in tight loop.
1338 1336 index = self.index
1339 1337 generaldelta = self._generaldelta
1340 1338
1341 1339 iterrev = rev
1342 1340 e = index[iterrev]
1343 1341 while iterrev != e[3] and iterrev != stoprev:
1344 1342 chain.append(iterrev)
1345 1343 if generaldelta:
1346 1344 iterrev = e[3]
1347 1345 else:
1348 1346 iterrev -= 1
1349 1347 e = index[iterrev]
1350 1348
1351 1349 if iterrev == stoprev:
1352 1350 stopped = True
1353 1351 else:
1354 1352 chain.append(iterrev)
1355 1353 stopped = False
1356 1354
1357 1355 chain.reverse()
1358 1356 return chain, stopped
1359 1357
1360 1358 def ancestors(self, revs, stoprev=0, inclusive=False):
1361 1359 """Generate the ancestors of 'revs' in reverse revision order.
1362 1360 Does not generate revs lower than stoprev.
1363 1361
1364 1362 See the documentation for ancestor.lazyancestors for more details."""
1365 1363
1366 1364 # first, make sure start revisions aren't filtered
1367 1365 revs = list(revs)
1368 1366 checkrev = self.node
1369 1367 for r in revs:
1370 1368 checkrev(r)
1371 1369 # and we're sure ancestors aren't filtered as well
1372 1370
1373 1371 if rustancestor is not None and self.index.rust_ext_compat:
1374 1372 lazyancestors = rustancestor.LazyAncestors
1375 1373 arg = self.index
1376 1374 else:
1377 1375 lazyancestors = ancestor.lazyancestors
1378 1376 arg = self._uncheckedparentrevs
1379 1377 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1380 1378
1381 1379 def descendants(self, revs):
1382 1380 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1383 1381
1384 1382 def findcommonmissing(self, common=None, heads=None):
1385 1383 """Return a tuple of the ancestors of common and the ancestors of heads
1386 1384 that are not ancestors of common. In revset terminology, we return the
1387 1385 tuple:
1388 1386
1389 1387 ::common, (::heads) - (::common)
1390 1388
1391 1389 The list is sorted by revision number, meaning it is
1392 1390 topologically sorted.
1393 1391
1394 1392 'heads' and 'common' are both lists of node IDs. If heads is
1395 1393 not supplied, uses all of the revlog's heads. If common is not
1396 1394 supplied, uses nullid."""
1397 1395 if common is None:
1398 1396 common = [self.nullid]
1399 1397 if heads is None:
1400 1398 heads = self.heads()
1401 1399
1402 1400 common = [self.rev(n) for n in common]
1403 1401 heads = [self.rev(n) for n in heads]
1404 1402
1405 1403 # we want the ancestors, but inclusive
1406 1404 class lazyset:
1407 1405 def __init__(self, lazyvalues):
1408 1406 self.addedvalues = set()
1409 1407 self.lazyvalues = lazyvalues
1410 1408
1411 1409 def __contains__(self, value):
1412 1410 return value in self.addedvalues or value in self.lazyvalues
1413 1411
1414 1412 def __iter__(self):
1415 1413 added = self.addedvalues
1416 1414 for r in added:
1417 1415 yield r
1418 1416 for r in self.lazyvalues:
1419 1417 if not r in added:
1420 1418 yield r
1421 1419
1422 1420 def add(self, value):
1423 1421 self.addedvalues.add(value)
1424 1422
1425 1423 def update(self, values):
1426 1424 self.addedvalues.update(values)
1427 1425
1428 1426 has = lazyset(self.ancestors(common))
1429 1427 has.add(nullrev)
1430 1428 has.update(common)
1431 1429
1432 1430 # take all ancestors from heads that aren't in has
1433 1431 missing = set()
1434 1432 visit = collections.deque(r for r in heads if r not in has)
1435 1433 while visit:
1436 1434 r = visit.popleft()
1437 1435 if r in missing:
1438 1436 continue
1439 1437 else:
1440 1438 missing.add(r)
1441 1439 for p in self.parentrevs(r):
1442 1440 if p not in has:
1443 1441 visit.append(p)
1444 1442 missing = list(missing)
1445 1443 missing.sort()
1446 1444 return has, [self.node(miss) for miss in missing]
1447 1445
1448 1446 def incrementalmissingrevs(self, common=None):
1449 1447 """Return an object that can be used to incrementally compute the
1450 1448 revision numbers of the ancestors of arbitrary sets that are not
1451 1449 ancestors of common. This is an ancestor.incrementalmissingancestors
1452 1450 object.
1453 1451
1454 1452 'common' is a list of revision numbers. If common is not supplied, uses
1455 1453 nullrev.
1456 1454 """
1457 1455 if common is None:
1458 1456 common = [nullrev]
1459 1457
1460 1458 if rustancestor is not None and self.index.rust_ext_compat:
1461 1459 return rustancestor.MissingAncestors(self.index, common)
1462 1460 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1463 1461
1464 1462 def findmissingrevs(self, common=None, heads=None):
1465 1463 """Return the revision numbers of the ancestors of heads that
1466 1464 are not ancestors of common.
1467 1465
1468 1466 More specifically, return a list of revision numbers corresponding to
1469 1467 nodes N such that every N satisfies the following constraints:
1470 1468
1471 1469 1. N is an ancestor of some node in 'heads'
1472 1470 2. N is not an ancestor of any node in 'common'
1473 1471
1474 1472 The list is sorted by revision number, meaning it is
1475 1473 topologically sorted.
1476 1474
1477 1475 'heads' and 'common' are both lists of revision numbers. If heads is
1478 1476 not supplied, uses all of the revlog's heads. If common is not
1479 1477 supplied, uses nullid."""
1480 1478 if common is None:
1481 1479 common = [nullrev]
1482 1480 if heads is None:
1483 1481 heads = self.headrevs()
1484 1482
1485 1483 inc = self.incrementalmissingrevs(common=common)
1486 1484 return inc.missingancestors(heads)
1487 1485
1488 1486 def findmissing(self, common=None, heads=None):
1489 1487 """Return the ancestors of heads that are not ancestors of common.
1490 1488
1491 1489 More specifically, return a list of nodes N such that every N
1492 1490 satisfies the following constraints:
1493 1491
1494 1492 1. N is an ancestor of some node in 'heads'
1495 1493 2. N is not an ancestor of any node in 'common'
1496 1494
1497 1495 The list is sorted by revision number, meaning it is
1498 1496 topologically sorted.
1499 1497
1500 1498 'heads' and 'common' are both lists of node IDs. If heads is
1501 1499 not supplied, uses all of the revlog's heads. If common is not
1502 1500 supplied, uses nullid."""
1503 1501 if common is None:
1504 1502 common = [self.nullid]
1505 1503 if heads is None:
1506 1504 heads = self.heads()
1507 1505
1508 1506 common = [self.rev(n) for n in common]
1509 1507 heads = [self.rev(n) for n in heads]
1510 1508
1511 1509 inc = self.incrementalmissingrevs(common=common)
1512 1510 return [self.node(r) for r in inc.missingancestors(heads)]
1513 1511
1514 1512 def nodesbetween(self, roots=None, heads=None):
1515 1513 """Return a topological path from 'roots' to 'heads'.
1516 1514
1517 1515 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1518 1516 topologically sorted list of all nodes N that satisfy both of
1519 1517 these constraints:
1520 1518
1521 1519 1. N is a descendant of some node in 'roots'
1522 1520 2. N is an ancestor of some node in 'heads'
1523 1521
1524 1522 Every node is considered to be both a descendant and an ancestor
1525 1523 of itself, so every reachable node in 'roots' and 'heads' will be
1526 1524 included in 'nodes'.
1527 1525
1528 1526 'outroots' is the list of reachable nodes in 'roots', i.e., the
1529 1527 subset of 'roots' that is returned in 'nodes'. Likewise,
1530 1528 'outheads' is the subset of 'heads' that is also in 'nodes'.
1531 1529
1532 1530 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1533 1531 unspecified, uses nullid as the only root. If 'heads' is
1534 1532 unspecified, uses list of all of the revlog's heads."""
1535 1533 nonodes = ([], [], [])
1536 1534 if roots is not None:
1537 1535 roots = list(roots)
1538 1536 if not roots:
1539 1537 return nonodes
1540 1538 lowestrev = min([self.rev(n) for n in roots])
1541 1539 else:
1542 1540 roots = [self.nullid] # Everybody's a descendant of nullid
1543 1541 lowestrev = nullrev
1544 1542 if (lowestrev == nullrev) and (heads is None):
1545 1543 # We want _all_ the nodes!
1546 1544 return (
1547 1545 [self.node(r) for r in self],
1548 1546 [self.nullid],
1549 1547 list(self.heads()),
1550 1548 )
1551 1549 if heads is None:
1552 1550 # All nodes are ancestors, so the latest ancestor is the last
1553 1551 # node.
1554 1552 highestrev = len(self) - 1
1555 1553 # Set ancestors to None to signal that every node is an ancestor.
1556 1554 ancestors = None
1557 1555 # Set heads to an empty dictionary for later discovery of heads
1558 1556 heads = {}
1559 1557 else:
1560 1558 heads = list(heads)
1561 1559 if not heads:
1562 1560 return nonodes
1563 1561 ancestors = set()
1564 1562 # Turn heads into a dictionary so we can remove 'fake' heads.
1565 1563 # Also, later we will be using it to filter out the heads we can't
1566 1564 # find from roots.
1567 1565 heads = dict.fromkeys(heads, False)
1568 1566 # Start at the top and keep marking parents until we're done.
1569 1567 nodestotag = set(heads)
1570 1568 # Remember where the top was so we can use it as a limit later.
1571 1569 highestrev = max([self.rev(n) for n in nodestotag])
1572 1570 while nodestotag:
1573 1571 # grab a node to tag
1574 1572 n = nodestotag.pop()
1575 1573 # Never tag nullid
1576 1574 if n == self.nullid:
1577 1575 continue
1578 1576 # A node's revision number represents its place in a
1579 1577 # topologically sorted list of nodes.
1580 1578 r = self.rev(n)
1581 1579 if r >= lowestrev:
1582 1580 if n not in ancestors:
1583 1581 # If we are possibly a descendant of one of the roots
1584 1582 # and we haven't already been marked as an ancestor
1585 1583 ancestors.add(n) # Mark as ancestor
1586 1584 # Add non-nullid parents to list of nodes to tag.
1587 1585 nodestotag.update(
1588 1586 [p for p in self.parents(n) if p != self.nullid]
1589 1587 )
1590 1588 elif n in heads: # We've seen it before, is it a fake head?
1591 1589 # So it is, real heads should not be the ancestors of
1592 1590 # any other heads.
1593 1591 heads.pop(n)
1594 1592 if not ancestors:
1595 1593 return nonodes
1596 1594 # Now that we have our set of ancestors, we want to remove any
1597 1595 # roots that are not ancestors.
1598 1596
1599 1597 # If one of the roots was nullid, everything is included anyway.
1600 1598 if lowestrev > nullrev:
1601 1599 # But, since we weren't, let's recompute the lowest rev to not
1602 1600 # include roots that aren't ancestors.
1603 1601
1604 1602 # Filter out roots that aren't ancestors of heads
1605 1603 roots = [root for root in roots if root in ancestors]
1606 1604 # Recompute the lowest revision
1607 1605 if roots:
1608 1606 lowestrev = min([self.rev(root) for root in roots])
1609 1607 else:
1610 1608 # No more roots? Return empty list
1611 1609 return nonodes
1612 1610 else:
1613 1611 # We are descending from nullid, and don't need to care about
1614 1612 # any other roots.
1615 1613 lowestrev = nullrev
1616 1614 roots = [self.nullid]
1617 1615 # Transform our roots list into a set.
1618 1616 descendants = set(roots)
1619 1617 # Also, keep the original roots so we can filter out roots that aren't
1620 1618 # 'real' roots (i.e. are descended from other roots).
1621 1619 roots = descendants.copy()
1622 1620 # Our topologically sorted list of output nodes.
1623 1621 orderedout = []
1624 1622 # Don't start at nullid since we don't want nullid in our output list,
1625 1623 # and if nullid shows up in descendants, empty parents will look like
1626 1624 # they're descendants.
1627 1625 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1628 1626 n = self.node(r)
1629 1627 isdescendant = False
1630 1628 if lowestrev == nullrev: # Everybody is a descendant of nullid
1631 1629 isdescendant = True
1632 1630 elif n in descendants:
1633 1631 # n is already a descendant
1634 1632 isdescendant = True
1635 1633 # This check only needs to be done here because all the roots
1636 1634 # will start being marked is descendants before the loop.
1637 1635 if n in roots:
1638 1636 # If n was a root, check if it's a 'real' root.
1639 1637 p = tuple(self.parents(n))
1640 1638 # If any of its parents are descendants, it's not a root.
1641 1639 if (p[0] in descendants) or (p[1] in descendants):
1642 1640 roots.remove(n)
1643 1641 else:
1644 1642 p = tuple(self.parents(n))
1645 1643 # A node is a descendant if either of its parents are
1646 1644 # descendants. (We seeded the dependents list with the roots
1647 1645 # up there, remember?)
1648 1646 if (p[0] in descendants) or (p[1] in descendants):
1649 1647 descendants.add(n)
1650 1648 isdescendant = True
1651 1649 if isdescendant and ((ancestors is None) or (n in ancestors)):
1652 1650 # Only include nodes that are both descendants and ancestors.
1653 1651 orderedout.append(n)
1654 1652 if (ancestors is not None) and (n in heads):
1655 1653 # We're trying to figure out which heads are reachable
1656 1654 # from roots.
1657 1655 # Mark this head as having been reached
1658 1656 heads[n] = True
1659 1657 elif ancestors is None:
1660 1658 # Otherwise, we're trying to discover the heads.
1661 1659 # Assume this is a head because if it isn't, the next step
1662 1660 # will eventually remove it.
1663 1661 heads[n] = True
1664 1662 # But, obviously its parents aren't.
1665 1663 for p in self.parents(n):
1666 1664 heads.pop(p, None)
1667 1665 heads = [head for head, flag in heads.items() if flag]
1668 1666 roots = list(roots)
1669 1667 assert orderedout
1670 1668 assert roots
1671 1669 assert heads
1672 1670 return (orderedout, roots, heads)
1673 1671
1674 1672 def headrevs(self, revs=None):
1675 1673 if revs is None:
1676 1674 try:
1677 1675 return self.index.headrevs()
1678 1676 except AttributeError:
1679 1677 return self._headrevs()
1680 1678 if rustdagop is not None and self.index.rust_ext_compat:
1681 1679 return rustdagop.headrevs(self.index, revs)
1682 1680 return dagop.headrevs(revs, self._uncheckedparentrevs)
1683 1681
1684 1682 def computephases(self, roots):
1685 1683 return self.index.computephasesmapsets(roots)
1686 1684
1687 1685 def _headrevs(self):
1688 1686 count = len(self)
1689 1687 if not count:
1690 1688 return [nullrev]
1691 1689 # we won't iter over filtered rev so nobody is a head at start
1692 1690 ishead = [0] * (count + 1)
1693 1691 index = self.index
1694 1692 for r in self:
1695 1693 ishead[r] = 1 # I may be an head
1696 1694 e = index[r]
1697 1695 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1698 1696 return [r for r, val in enumerate(ishead) if val]
1699 1697
1700 1698 def heads(self, start=None, stop=None):
1701 1699 """return the list of all nodes that have no children
1702 1700
1703 1701 if start is specified, only heads that are descendants of
1704 1702 start will be returned
1705 1703 if stop is specified, it will consider all the revs from stop
1706 1704 as if they had no children
1707 1705 """
1708 1706 if start is None and stop is None:
1709 1707 if not len(self):
1710 1708 return [self.nullid]
1711 1709 return [self.node(r) for r in self.headrevs()]
1712 1710
1713 1711 if start is None:
1714 1712 start = nullrev
1715 1713 else:
1716 1714 start = self.rev(start)
1717 1715
1718 1716 stoprevs = {self.rev(n) for n in stop or []}
1719 1717
1720 1718 revs = dagop.headrevssubset(
1721 1719 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1722 1720 )
1723 1721
1724 1722 return [self.node(rev) for rev in revs]
1725 1723
1726 1724 def children(self, node):
1727 1725 """find the children of a given node"""
1728 1726 c = []
1729 1727 p = self.rev(node)
1730 1728 for r in self.revs(start=p + 1):
1731 1729 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1732 1730 if prevs:
1733 1731 for pr in prevs:
1734 1732 if pr == p:
1735 1733 c.append(self.node(r))
1736 1734 elif p == nullrev:
1737 1735 c.append(self.node(r))
1738 1736 return c
1739 1737
1740 1738 def commonancestorsheads(self, a, b):
1741 1739 """calculate all the heads of the common ancestors of nodes a and b"""
1742 1740 a, b = self.rev(a), self.rev(b)
1743 1741 ancs = self._commonancestorsheads(a, b)
1744 1742 return pycompat.maplist(self.node, ancs)
1745 1743
1746 1744 def _commonancestorsheads(self, *revs):
1747 1745 """calculate all the heads of the common ancestors of revs"""
1748 1746 try:
1749 1747 ancs = self.index.commonancestorsheads(*revs)
1750 1748 except (AttributeError, OverflowError): # C implementation failed
1751 1749 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1752 1750 return ancs
1753 1751
1754 1752 def isancestor(self, a, b):
1755 1753 """return True if node a is an ancestor of node b
1756 1754
1757 1755 A revision is considered an ancestor of itself."""
1758 1756 a, b = self.rev(a), self.rev(b)
1759 1757 return self.isancestorrev(a, b)
1760 1758
1761 1759 def isancestorrev(self, a, b):
1762 1760 """return True if revision a is an ancestor of revision b
1763 1761
1764 1762 A revision is considered an ancestor of itself.
1765 1763
1766 1764 The implementation of this is trivial but the use of
1767 1765 reachableroots is not."""
1768 1766 if a == nullrev:
1769 1767 return True
1770 1768 elif a == b:
1771 1769 return True
1772 1770 elif a > b:
1773 1771 return False
1774 1772 return bool(self.reachableroots(a, [b], [a], includepath=False))
1775 1773
1776 1774 def reachableroots(self, minroot, heads, roots, includepath=False):
1777 1775 """return (heads(::(<roots> and <roots>::<heads>)))
1778 1776
1779 1777 If includepath is True, return (<roots>::<heads>)."""
1780 1778 try:
1781 1779 return self.index.reachableroots2(
1782 1780 minroot, heads, roots, includepath
1783 1781 )
1784 1782 except AttributeError:
1785 1783 return dagop._reachablerootspure(
1786 1784 self.parentrevs, minroot, roots, heads, includepath
1787 1785 )
1788 1786
1789 1787 def ancestor(self, a, b):
1790 1788 """calculate the "best" common ancestor of nodes a and b"""
1791 1789
1792 1790 a, b = self.rev(a), self.rev(b)
1793 1791 try:
1794 1792 ancs = self.index.ancestors(a, b)
1795 1793 except (AttributeError, OverflowError):
1796 1794 ancs = ancestor.ancestors(self.parentrevs, a, b)
1797 1795 if ancs:
1798 1796 # choose a consistent winner when there's a tie
1799 1797 return min(map(self.node, ancs))
1800 1798 return self.nullid
1801 1799
1802 1800 def _match(self, id):
1803 1801 if isinstance(id, int):
1804 1802 # rev
1805 1803 return self.node(id)
1806 1804 if len(id) == self.nodeconstants.nodelen:
1807 1805 # possibly a binary node
1808 1806 # odds of a binary node being all hex in ASCII are 1 in 10**25
1809 1807 try:
1810 1808 node = id
1811 1809 self.rev(node) # quick search the index
1812 1810 return node
1813 1811 except error.LookupError:
1814 1812 pass # may be partial hex id
1815 1813 try:
1816 1814 # str(rev)
1817 1815 rev = int(id)
1818 1816 if b"%d" % rev != id:
1819 1817 raise ValueError
1820 1818 if rev < 0:
1821 1819 rev = len(self) + rev
1822 1820 if rev < 0 or rev >= len(self):
1823 1821 raise ValueError
1824 1822 return self.node(rev)
1825 1823 except (ValueError, OverflowError):
1826 1824 pass
1827 1825 if len(id) == 2 * self.nodeconstants.nodelen:
1828 1826 try:
1829 1827 # a full hex nodeid?
1830 1828 node = bin(id)
1831 1829 self.rev(node)
1832 1830 return node
1833 1831 except (binascii.Error, error.LookupError):
1834 1832 pass
1835 1833
1836 1834 def _partialmatch(self, id):
1837 1835 # we don't care wdirfilenodeids as they should be always full hash
1838 1836 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1839 1837 ambiguous = False
1840 1838 try:
1841 1839 partial = self.index.partialmatch(id)
1842 1840 if partial and self.hasnode(partial):
1843 1841 if maybewdir:
1844 1842 # single 'ff...' match in radix tree, ambiguous with wdir
1845 1843 ambiguous = True
1846 1844 else:
1847 1845 return partial
1848 1846 elif maybewdir:
1849 1847 # no 'ff...' match in radix tree, wdir identified
1850 1848 raise error.WdirUnsupported
1851 1849 else:
1852 1850 return None
1853 1851 except error.RevlogError:
1854 1852 # parsers.c radix tree lookup gave multiple matches
1855 1853 # fast path: for unfiltered changelog, radix tree is accurate
1856 1854 if not getattr(self, 'filteredrevs', None):
1857 1855 ambiguous = True
1858 1856 # fall through to slow path that filters hidden revisions
1859 1857 except (AttributeError, ValueError):
1860 1858 # we are pure python, or key is not hex
1861 1859 pass
1862 1860 if ambiguous:
1863 1861 raise error.AmbiguousPrefixLookupError(
1864 1862 id, self.display_id, _(b'ambiguous identifier')
1865 1863 )
1866 1864
1867 1865 if id in self._pcache:
1868 1866 return self._pcache[id]
1869 1867
1870 1868 if len(id) <= 40:
1871 1869 # hex(node)[:...]
1872 1870 l = len(id) // 2 * 2 # grab an even number of digits
1873 1871 try:
1874 1872 # we're dropping the last digit, so let's check that it's hex,
1875 1873 # to avoid the expensive computation below if it's not
1876 1874 if len(id) % 2 > 0:
1877 1875 if not (id[-1] in hexdigits):
1878 1876 return None
1879 1877 prefix = bin(id[:l])
1880 1878 except binascii.Error:
1881 1879 pass
1882 1880 else:
1883 1881 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1884 1882 nl = [
1885 1883 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1886 1884 ]
1887 1885 if self.nodeconstants.nullhex.startswith(id):
1888 1886 nl.append(self.nullid)
1889 1887 if len(nl) > 0:
1890 1888 if len(nl) == 1 and not maybewdir:
1891 1889 self._pcache[id] = nl[0]
1892 1890 return nl[0]
1893 1891 raise error.AmbiguousPrefixLookupError(
1894 1892 id, self.display_id, _(b'ambiguous identifier')
1895 1893 )
1896 1894 if maybewdir:
1897 1895 raise error.WdirUnsupported
1898 1896 return None
1899 1897
1900 1898 def lookup(self, id):
1901 1899 """locate a node based on:
1902 1900 - revision number or str(revision number)
1903 1901 - nodeid or subset of hex nodeid
1904 1902 """
1905 1903 n = self._match(id)
1906 1904 if n is not None:
1907 1905 return n
1908 1906 n = self._partialmatch(id)
1909 1907 if n:
1910 1908 return n
1911 1909
1912 1910 raise error.LookupError(id, self.display_id, _(b'no match found'))
1913 1911
1914 1912 def shortest(self, node, minlength=1):
1915 1913 """Find the shortest unambiguous prefix that matches node."""
1916 1914
1917 1915 def isvalid(prefix):
1918 1916 try:
1919 1917 matchednode = self._partialmatch(prefix)
1920 1918 except error.AmbiguousPrefixLookupError:
1921 1919 return False
1922 1920 except error.WdirUnsupported:
1923 1921 # single 'ff...' match
1924 1922 return True
1925 1923 if matchednode is None:
1926 1924 raise error.LookupError(node, self.display_id, _(b'no node'))
1927 1925 return True
1928 1926
1929 1927 def maybewdir(prefix):
1930 1928 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1931 1929
1932 1930 hexnode = hex(node)
1933 1931
1934 1932 def disambiguate(hexnode, minlength):
1935 1933 """Disambiguate against wdirid."""
1936 1934 for length in range(minlength, len(hexnode) + 1):
1937 1935 prefix = hexnode[:length]
1938 1936 if not maybewdir(prefix):
1939 1937 return prefix
1940 1938
1941 1939 if not getattr(self, 'filteredrevs', None):
1942 1940 try:
1943 1941 length = max(self.index.shortest(node), minlength)
1944 1942 return disambiguate(hexnode, length)
1945 1943 except error.RevlogError:
1946 1944 if node != self.nodeconstants.wdirid:
1947 1945 raise error.LookupError(
1948 1946 node, self.display_id, _(b'no node')
1949 1947 )
1950 1948 except AttributeError:
1951 1949 # Fall through to pure code
1952 1950 pass
1953 1951
1954 1952 if node == self.nodeconstants.wdirid:
1955 1953 for length in range(minlength, len(hexnode) + 1):
1956 1954 prefix = hexnode[:length]
1957 1955 if isvalid(prefix):
1958 1956 return prefix
1959 1957
1960 1958 for length in range(minlength, len(hexnode) + 1):
1961 1959 prefix = hexnode[:length]
1962 1960 if isvalid(prefix):
1963 1961 return disambiguate(hexnode, length)
1964 1962
1965 1963 def cmp(self, node, text):
1966 1964 """compare text with a given file revision
1967 1965
1968 1966 returns True if text is different than what is stored.
1969 1967 """
1970 1968 p1, p2 = self.parents(node)
1971 1969 return storageutil.hashrevisionsha1(text, p1, p2) != node
1972 1970
1973 1971 def _getsegmentforrevs(self, startrev, endrev):
1974 1972 """Obtain a segment of raw data corresponding to a range of revisions.
1975 1973
1976 1974 Accepts the start and end revisions and an optional already-open
1977 1975 file handle to be used for reading. If the file handle is read, its
1978 1976 seek position will not be preserved.
1979 1977
1980 1978 Requests for data may be satisfied by a cache.
1981 1979
1982 1980 Returns a 2-tuple of (offset, data) for the requested range of
1983 1981 revisions. Offset is the integer offset from the beginning of the
1984 1982 revlog and data is a str or buffer of the raw byte data.
1985 1983
1986 1984 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1987 1985 to determine where each revision's data begins and ends.
1988 1986 """
1989 1987 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1990 1988 # (functions are expensive).
1991 1989 index = self.index
1992 1990 istart = index[startrev]
1993 1991 start = int(istart[0] >> 16)
1994 1992 if startrev == endrev:
1995 1993 end = start + istart[1]
1996 1994 else:
1997 1995 iend = index[endrev]
1998 1996 end = int(iend[0] >> 16) + iend[1]
1999 1997
2000 1998 if self._inline:
2001 1999 start += (startrev + 1) * self.index.entry_size
2002 2000 end += (endrev + 1) * self.index.entry_size
2003 2001 length = end - start
2004 2002
2005 2003 return start, self._segmentfile.read_chunk(start, length)
2006 2004
2007 2005 def _chunk(self, rev):
2008 2006 """Obtain a single decompressed chunk for a revision.
2009 2007
2010 2008 Accepts an integer revision and an optional already-open file handle
2011 2009 to be used for reading. If used, the seek position of the file will not
2012 2010 be preserved.
2013 2011
2014 2012 Returns a str holding uncompressed data for the requested revision.
2015 2013 """
2016 2014 compression_mode = self.index[rev][10]
2017 2015 data = self._getsegmentforrevs(rev, rev)[1]
2018 2016 if compression_mode == COMP_MODE_PLAIN:
2019 2017 return data
2020 2018 elif compression_mode == COMP_MODE_DEFAULT:
2021 2019 return self._decompressor(data)
2022 2020 elif compression_mode == COMP_MODE_INLINE:
2023 2021 return self.decompress(data)
2024 2022 else:
2025 2023 msg = b'unknown compression mode %d'
2026 2024 msg %= compression_mode
2027 2025 raise error.RevlogError(msg)
2028 2026
2029 2027 def _chunks(self, revs, targetsize=None):
2030 2028 """Obtain decompressed chunks for the specified revisions.
2031 2029
2032 2030 Accepts an iterable of numeric revisions that are assumed to be in
2033 2031 ascending order. Also accepts an optional already-open file handle
2034 2032 to be used for reading. If used, the seek position of the file will
2035 2033 not be preserved.
2036 2034
2037 2035 This function is similar to calling ``self._chunk()`` multiple times,
2038 2036 but is faster.
2039 2037
2040 2038 Returns a list with decompressed data for each requested revision.
2041 2039 """
2042 2040 if not revs:
2043 2041 return []
2044 2042 start = self.start
2045 2043 length = self.length
2046 2044 inline = self._inline
2047 2045 iosize = self.index.entry_size
2048 2046 buffer = util.buffer
2049 2047
2050 2048 l = []
2051 2049 ladd = l.append
2052 2050
2053 2051 if not self._withsparseread:
2054 2052 slicedchunks = (revs,)
2055 2053 else:
2056 2054 slicedchunks = deltautil.slicechunk(
2057 2055 self, revs, targetsize=targetsize
2058 2056 )
2059 2057
2060 2058 for revschunk in slicedchunks:
2061 2059 firstrev = revschunk[0]
2062 2060 # Skip trailing revisions with empty diff
2063 2061 for lastrev in revschunk[::-1]:
2064 2062 if length(lastrev) != 0:
2065 2063 break
2066 2064
2067 2065 try:
2068 2066 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2069 2067 except OverflowError:
2070 2068 # issue4215 - we can't cache a run of chunks greater than
2071 2069 # 2G on Windows
2072 2070 return [self._chunk(rev) for rev in revschunk]
2073 2071
2074 2072 decomp = self.decompress
2075 2073 # self._decompressor might be None, but will not be used in that case
2076 2074 def_decomp = self._decompressor
2077 2075 for rev in revschunk:
2078 2076 chunkstart = start(rev)
2079 2077 if inline:
2080 2078 chunkstart += (rev + 1) * iosize
2081 2079 chunklength = length(rev)
2082 2080 comp_mode = self.index[rev][10]
2083 2081 c = buffer(data, chunkstart - offset, chunklength)
2084 2082 if comp_mode == COMP_MODE_PLAIN:
2085 2083 ladd(c)
2086 2084 elif comp_mode == COMP_MODE_INLINE:
2087 2085 ladd(decomp(c))
2088 2086 elif comp_mode == COMP_MODE_DEFAULT:
2089 2087 ladd(def_decomp(c))
2090 2088 else:
2091 2089 msg = b'unknown compression mode %d'
2092 2090 msg %= comp_mode
2093 2091 raise error.RevlogError(msg)
2094 2092
2095 2093 return l
2096 2094
2097 2095 def deltaparent(self, rev):
2098 2096 """return deltaparent of the given revision"""
2099 2097 base = self.index[rev][3]
2100 2098 if base == rev:
2101 2099 return nullrev
2102 2100 elif self._generaldelta:
2103 2101 return base
2104 2102 else:
2105 2103 return rev - 1
2106 2104
2107 2105 def issnapshot(self, rev):
2108 2106 """tells whether rev is a snapshot"""
2109 2107 if not self._sparserevlog:
2110 2108 return self.deltaparent(rev) == nullrev
2111 2109 elif hasattr(self.index, 'issnapshot'):
2112 2110 # directly assign the method to cache the testing and access
2113 2111 self.issnapshot = self.index.issnapshot
2114 2112 return self.issnapshot(rev)
2115 2113 if rev == nullrev:
2116 2114 return True
2117 2115 entry = self.index[rev]
2118 2116 base = entry[3]
2119 2117 if base == rev:
2120 2118 return True
2121 2119 if base == nullrev:
2122 2120 return True
2123 2121 p1 = entry[5]
2124 2122 while self.length(p1) == 0:
2125 2123 b = self.deltaparent(p1)
2126 2124 if b == p1:
2127 2125 break
2128 2126 p1 = b
2129 2127 p2 = entry[6]
2130 2128 while self.length(p2) == 0:
2131 2129 b = self.deltaparent(p2)
2132 2130 if b == p2:
2133 2131 break
2134 2132 p2 = b
2135 2133 if base == p1 or base == p2:
2136 2134 return False
2137 2135 return self.issnapshot(base)
2138 2136
2139 2137 def snapshotdepth(self, rev):
2140 2138 """number of snapshot in the chain before this one"""
2141 2139 if not self.issnapshot(rev):
2142 2140 raise error.ProgrammingError(b'revision %d not a snapshot')
2143 2141 return len(self._deltachain(rev)[0]) - 1
2144 2142
2145 2143 def revdiff(self, rev1, rev2):
2146 2144 """return or calculate a delta between two revisions
2147 2145
2148 2146 The delta calculated is in binary form and is intended to be written to
2149 2147 revlog data directly. So this function needs raw revision data.
2150 2148 """
2151 2149 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2152 2150 return bytes(self._chunk(rev2))
2153 2151
2154 2152 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2155 2153
2156 2154 def revision(self, nodeorrev):
2157 2155 """return an uncompressed revision of a given node or revision
2158 2156 number.
2159 2157 """
2160 2158 return self._revisiondata(nodeorrev)
2161 2159
2162 2160 def sidedata(self, nodeorrev):
2163 2161 """a map of extra data related to the changeset but not part of the hash
2164 2162
2165 2163 This function currently return a dictionary. However, more advanced
2166 2164 mapping object will likely be used in the future for a more
2167 2165 efficient/lazy code.
2168 2166 """
2169 2167 # deal with <nodeorrev> argument type
2170 2168 if isinstance(nodeorrev, int):
2171 2169 rev = nodeorrev
2172 2170 else:
2173 2171 rev = self.rev(nodeorrev)
2174 2172 return self._sidedata(rev)
2175 2173
2176 2174 def _revisiondata(self, nodeorrev, raw=False):
2177 2175 # deal with <nodeorrev> argument type
2178 2176 if isinstance(nodeorrev, int):
2179 2177 rev = nodeorrev
2180 2178 node = self.node(rev)
2181 2179 else:
2182 2180 node = nodeorrev
2183 2181 rev = None
2184 2182
2185 2183 # fast path the special `nullid` rev
2186 2184 if node == self.nullid:
2187 2185 return b""
2188 2186
2189 2187 # ``rawtext`` is the text as stored inside the revlog. Might be the
2190 2188 # revision or might need to be processed to retrieve the revision.
2191 2189 rev, rawtext, validated = self._rawtext(node, rev)
2192 2190
2193 2191 if raw and validated:
2194 2192 # if we don't want to process the raw text and that raw
2195 2193 # text is cached, we can exit early.
2196 2194 return rawtext
2197 2195 if rev is None:
2198 2196 rev = self.rev(node)
2199 2197 # the revlog's flag for this revision
2200 2198 # (usually alter its state or content)
2201 2199 flags = self.flags(rev)
2202 2200
2203 2201 if validated and flags == REVIDX_DEFAULT_FLAGS:
2204 2202 # no extra flags set, no flag processor runs, text = rawtext
2205 2203 return rawtext
2206 2204
2207 2205 if raw:
2208 2206 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2209 2207 text = rawtext
2210 2208 else:
2211 2209 r = flagutil.processflagsread(self, rawtext, flags)
2212 2210 text, validatehash = r
2213 2211 if validatehash:
2214 2212 self.checkhash(text, node, rev=rev)
2215 2213 if not validated:
2216 2214 self._revisioncache = (node, rev, rawtext)
2217 2215
2218 2216 return text
2219 2217
2220 2218 def _rawtext(self, node, rev):
2221 2219 """return the possibly unvalidated rawtext for a revision
2222 2220
2223 2221 returns (rev, rawtext, validated)
2224 2222 """
2225 2223
2226 2224 # revision in the cache (could be useful to apply delta)
2227 2225 cachedrev = None
2228 2226 # An intermediate text to apply deltas to
2229 2227 basetext = None
2230 2228
2231 2229 # Check if we have the entry in cache
2232 2230 # The cache entry looks like (node, rev, rawtext)
2233 2231 if self._revisioncache:
2234 2232 if self._revisioncache[0] == node:
2235 2233 return (rev, self._revisioncache[2], True)
2236 2234 cachedrev = self._revisioncache[1]
2237 2235
2238 2236 if rev is None:
2239 2237 rev = self.rev(node)
2240 2238
2241 2239 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2242 2240 if stopped:
2243 2241 basetext = self._revisioncache[2]
2244 2242
2245 2243 # drop cache to save memory, the caller is expected to
2246 2244 # update self._revisioncache after validating the text
2247 2245 self._revisioncache = None
2248 2246
2249 2247 targetsize = None
2250 2248 rawsize = self.index[rev][2]
2251 2249 if 0 <= rawsize:
2252 2250 targetsize = 4 * rawsize
2253 2251
2254 2252 bins = self._chunks(chain, targetsize=targetsize)
2255 2253 if basetext is None:
2256 2254 basetext = bytes(bins[0])
2257 2255 bins = bins[1:]
2258 2256
2259 2257 rawtext = mdiff.patches(basetext, bins)
2260 2258 del basetext # let us have a chance to free memory early
2261 2259 return (rev, rawtext, False)
2262 2260
2263 2261 def _sidedata(self, rev):
2264 2262 """Return the sidedata for a given revision number."""
2265 2263 index_entry = self.index[rev]
2266 2264 sidedata_offset = index_entry[8]
2267 2265 sidedata_size = index_entry[9]
2268 2266
2269 2267 if self._inline:
2270 2268 sidedata_offset += self.index.entry_size * (1 + rev)
2271 2269 if sidedata_size == 0:
2272 2270 return {}
2273 2271
2274 2272 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2275 2273 filename = self._sidedatafile
2276 2274 end = self._docket.sidedata_end
2277 2275 offset = sidedata_offset
2278 2276 length = sidedata_size
2279 2277 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2280 2278 raise error.RevlogError(m)
2281 2279
2282 2280 comp_segment = self._segmentfile_sidedata.read_chunk(
2283 2281 sidedata_offset, sidedata_size
2284 2282 )
2285 2283
2286 2284 comp = self.index[rev][11]
2287 2285 if comp == COMP_MODE_PLAIN:
2288 2286 segment = comp_segment
2289 2287 elif comp == COMP_MODE_DEFAULT:
2290 2288 segment = self._decompressor(comp_segment)
2291 2289 elif comp == COMP_MODE_INLINE:
2292 2290 segment = self.decompress(comp_segment)
2293 2291 else:
2294 2292 msg = b'unknown compression mode %d'
2295 2293 msg %= comp
2296 2294 raise error.RevlogError(msg)
2297 2295
2298 2296 sidedata = sidedatautil.deserialize_sidedata(segment)
2299 2297 return sidedata
2300 2298
2301 2299 def rawdata(self, nodeorrev):
2302 2300 """return an uncompressed raw data of a given node or revision number."""
2303 2301 return self._revisiondata(nodeorrev, raw=True)
2304 2302
2305 2303 def hash(self, text, p1, p2):
2306 2304 """Compute a node hash.
2307 2305
2308 2306 Available as a function so that subclasses can replace the hash
2309 2307 as needed.
2310 2308 """
2311 2309 return storageutil.hashrevisionsha1(text, p1, p2)
2312 2310
2313 2311 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2314 2312 """Check node hash integrity.
2315 2313
2316 2314 Available as a function so that subclasses can extend hash mismatch
2317 2315 behaviors as needed.
2318 2316 """
2319 2317 try:
2320 2318 if p1 is None and p2 is None:
2321 2319 p1, p2 = self.parents(node)
2322 2320 if node != self.hash(text, p1, p2):
2323 2321 # Clear the revision cache on hash failure. The revision cache
2324 2322 # only stores the raw revision and clearing the cache does have
2325 2323 # the side-effect that we won't have a cache hit when the raw
2326 2324 # revision data is accessed. But this case should be rare and
2327 2325 # it is extra work to teach the cache about the hash
2328 2326 # verification state.
2329 2327 if self._revisioncache and self._revisioncache[0] == node:
2330 2328 self._revisioncache = None
2331 2329
2332 2330 revornode = rev
2333 2331 if revornode is None:
2334 2332 revornode = templatefilters.short(hex(node))
2335 2333 raise error.RevlogError(
2336 2334 _(b"integrity check failed on %s:%s")
2337 2335 % (self.display_id, pycompat.bytestr(revornode))
2338 2336 )
2339 2337 except error.RevlogError:
2340 2338 if self._censorable and storageutil.iscensoredtext(text):
2341 2339 raise error.CensoredNodeError(self.display_id, node, text)
2342 2340 raise
2343 2341
2344 2342 @property
2345 2343 def _split_index_file(self):
2346 2344 """the path where to expect the index of an ongoing splitting operation
2347 2345
2348 2346 The file will only exist if a splitting operation is in progress, but
2349 2347 it is always expected at the same location."""
2350 2348 parts = self.radix.split(b'/')
2351 2349 if len(parts) > 1:
2352 2350 # adds a '-s' prefix to the ``data/` or `meta/` base
2353 2351 head = parts[0] + b'-s'
2354 2352 mids = parts[1:-1]
2355 2353 tail = parts[-1] + b'.i'
2356 2354 pieces = [head] + mids + [tail]
2357 2355 return b'/'.join(pieces)
2358 2356 else:
2359 2357 # the revlog is stored at the root of the store (changelog or
2360 2358 # manifest), no risk of collision.
2361 2359 return self.radix + b'.i.s'
2362 2360
2363 2361 def _enforceinlinesize(self, tr, side_write=True):
2364 2362 """Check if the revlog is too big for inline and convert if so.
2365 2363
2366 2364 This should be called after revisions are added to the revlog. If the
2367 2365 revlog has grown too large to be an inline revlog, it will convert it
2368 2366 to use multiple index and data files.
2369 2367 """
2370 2368 tiprev = len(self) - 1
2371 2369 total_size = self.start(tiprev) + self.length(tiprev)
2372 2370 if not self._inline or total_size < _maxinline:
2373 2371 return
2374 2372
2375 2373 troffset = tr.findoffset(self._indexfile)
2376 2374 if troffset is None:
2377 2375 raise error.RevlogError(
2378 2376 _(b"%s not found in the transaction") % self._indexfile
2379 2377 )
2380 2378 if troffset:
2381 2379 tr.addbackup(self._indexfile, for_offset=True)
2382 2380 tr.add(self._datafile, 0)
2383 2381
2384 2382 existing_handles = False
2385 2383 if self._writinghandles is not None:
2386 2384 existing_handles = True
2387 2385 fp = self._writinghandles[0]
2388 2386 fp.flush()
2389 2387 fp.close()
2390 2388 # We can't use the cached file handle after close(). So prevent
2391 2389 # its usage.
2392 2390 self._writinghandles = None
2393 2391 self._segmentfile.writing_handle = None
2394 2392 # No need to deal with sidedata writing handle as it is only
2395 2393 # relevant with revlog-v2 which is never inline, not reaching
2396 2394 # this code
2397 2395 if side_write:
2398 2396 old_index_file_path = self._indexfile
2399 2397 new_index_file_path = self._split_index_file
2400 2398 opener = self.opener
2401 2399 weak_self = weakref.ref(self)
2402 2400
2403 2401 # the "split" index replace the real index when the transaction is finalized
2404 2402 def finalize_callback(tr):
2405 2403 opener.rename(
2406 2404 new_index_file_path,
2407 2405 old_index_file_path,
2408 2406 checkambig=True,
2409 2407 )
2410 2408 maybe_self = weak_self()
2411 2409 if maybe_self is not None:
2412 2410 maybe_self._indexfile = old_index_file_path
2413 2411
2414 2412 def abort_callback(tr):
2415 2413 maybe_self = weak_self()
2416 2414 if maybe_self is not None:
2417 2415 maybe_self._indexfile = old_index_file_path
2418 2416
2419 2417 tr.registertmp(new_index_file_path)
2420 2418 if self.target[1] is not None:
2421 2419 callback_id = b'000-revlog-split-%d-%s' % self.target
2422 2420 else:
2423 2421 callback_id = b'000-revlog-split-%d' % self.target[0]
2424 2422 tr.addfinalize(callback_id, finalize_callback)
2425 2423 tr.addabort(callback_id, abort_callback)
2426 2424
2427 2425 new_dfh = self._datafp(b'w+')
2428 2426 new_dfh.truncate(0) # drop any potentially existing data
2429 2427 try:
2430 2428 with self.reading():
2431 2429 for r in self:
2432 2430 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2433 2431 new_dfh.flush()
2434 2432
2435 2433 if side_write:
2436 2434 self._indexfile = new_index_file_path
2437 2435 with self.__index_new_fp() as fp:
2438 2436 self._format_flags &= ~FLAG_INLINE_DATA
2439 2437 self._inline = False
2440 2438 for i in self:
2441 2439 e = self.index.entry_binary(i)
2442 2440 if i == 0 and self._docket is None:
2443 2441 header = self._format_flags | self._format_version
2444 2442 header = self.index.pack_header(header)
2445 2443 e = header + e
2446 2444 fp.write(e)
2447 2445 if self._docket is not None:
2448 2446 self._docket.index_end = fp.tell()
2449 2447
2450 2448 # If we don't use side-write, the temp file replace the real
2451 2449 # index when we exit the context manager
2452 2450
2453 2451 nodemaputil.setup_persistent_nodemap(tr, self)
2454 2452 self._segmentfile = randomaccessfile.randomaccessfile(
2455 2453 self.opener,
2456 2454 self._datafile,
2457 2455 self._chunkcachesize,
2458 2456 )
2459 2457
2460 2458 if existing_handles:
2461 2459 # switched from inline to conventional reopen the index
2462 2460 ifh = self.__index_write_fp()
2463 2461 self._writinghandles = (ifh, new_dfh, None)
2464 2462 self._segmentfile.writing_handle = new_dfh
2465 2463 new_dfh = None
2466 2464 # No need to deal with sidedata writing handle as it is only
2467 2465 # relevant with revlog-v2 which is never inline, not reaching
2468 2466 # this code
2469 2467 finally:
2470 2468 if new_dfh is not None:
2471 2469 new_dfh.close()
2472 2470
2473 2471 def _nodeduplicatecallback(self, transaction, node):
2474 2472 """called when trying to add a node already stored."""
2475 2473
2476 2474 @contextlib.contextmanager
2477 2475 def reading(self):
2478 2476 """Context manager that keeps data and sidedata files open for reading"""
2479 2477 if len(self.index) == 0:
2480 2478 yield # nothing to be read
2481 2479 else:
2482 2480 with self._segmentfile.reading():
2483 2481 with self._segmentfile_sidedata.reading():
2484 2482 yield
2485 2483
2486 2484 @contextlib.contextmanager
2487 2485 def _writing(self, transaction):
2488 2486 if self._trypending:
2489 2487 msg = b'try to write in a `trypending` revlog: %s'
2490 2488 msg %= self.display_id
2491 2489 raise error.ProgrammingError(msg)
2492 2490 if self._writinghandles is not None:
2493 2491 yield
2494 2492 else:
2495 2493 ifh = dfh = sdfh = None
2496 2494 try:
2497 2495 r = len(self)
2498 2496 # opening the data file.
2499 2497 dsize = 0
2500 2498 if r:
2501 2499 dsize = self.end(r - 1)
2502 2500 dfh = None
2503 2501 if not self._inline:
2504 2502 try:
2505 2503 dfh = self._datafp(b"r+")
2506 2504 if self._docket is None:
2507 2505 dfh.seek(0, os.SEEK_END)
2508 2506 else:
2509 2507 dfh.seek(self._docket.data_end, os.SEEK_SET)
2510 2508 except FileNotFoundError:
2511 2509 dfh = self._datafp(b"w+")
2512 2510 transaction.add(self._datafile, dsize)
2513 2511 if self._sidedatafile is not None:
2514 2512 # revlog-v2 does not inline, help Pytype
2515 2513 assert dfh is not None
2516 2514 try:
2517 2515 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2518 2516 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2519 2517 except FileNotFoundError:
2520 2518 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2521 2519 transaction.add(
2522 2520 self._sidedatafile, self._docket.sidedata_end
2523 2521 )
2524 2522
2525 2523 # opening the index file.
2526 2524 isize = r * self.index.entry_size
2527 2525 ifh = self.__index_write_fp()
2528 2526 if self._inline:
2529 2527 transaction.add(self._indexfile, dsize + isize)
2530 2528 else:
2531 2529 transaction.add(self._indexfile, isize)
2532 2530 # exposing all file handle for writing.
2533 2531 self._writinghandles = (ifh, dfh, sdfh)
2534 2532 self._segmentfile.writing_handle = ifh if self._inline else dfh
2535 2533 self._segmentfile_sidedata.writing_handle = sdfh
2536 2534 yield
2537 2535 if self._docket is not None:
2538 2536 self._write_docket(transaction)
2539 2537 finally:
2540 2538 self._writinghandles = None
2541 2539 self._segmentfile.writing_handle = None
2542 2540 self._segmentfile_sidedata.writing_handle = None
2543 2541 if dfh is not None:
2544 2542 dfh.close()
2545 2543 if sdfh is not None:
2546 2544 sdfh.close()
2547 2545 # closing the index file last to avoid exposing referent to
2548 2546 # potential unflushed data content.
2549 2547 if ifh is not None:
2550 2548 ifh.close()
2551 2549
2552 2550 def _write_docket(self, transaction):
2553 2551 """write the current docket on disk
2554 2552
2555 2553 Exist as a method to help changelog to implement transaction logic
2556 2554
2557 2555 We could also imagine using the same transaction logic for all revlog
2558 2556 since docket are cheap."""
2559 2557 self._docket.write(transaction)
2560 2558
2561 2559 def addrevision(
2562 2560 self,
2563 2561 text,
2564 2562 transaction,
2565 2563 link,
2566 2564 p1,
2567 2565 p2,
2568 2566 cachedelta=None,
2569 2567 node=None,
2570 2568 flags=REVIDX_DEFAULT_FLAGS,
2571 2569 deltacomputer=None,
2572 2570 sidedata=None,
2573 2571 ):
2574 2572 """add a revision to the log
2575 2573
2576 2574 text - the revision data to add
2577 2575 transaction - the transaction object used for rollback
2578 2576 link - the linkrev data to add
2579 2577 p1, p2 - the parent nodeids of the revision
2580 2578 cachedelta - an optional precomputed delta
2581 2579 node - nodeid of revision; typically node is not specified, and it is
2582 2580 computed by default as hash(text, p1, p2), however subclasses might
2583 2581 use different hashing method (and override checkhash() in such case)
2584 2582 flags - the known flags to set on the revision
2585 2583 deltacomputer - an optional deltacomputer instance shared between
2586 2584 multiple calls
2587 2585 """
2588 2586 if link == nullrev:
2589 2587 raise error.RevlogError(
2590 2588 _(b"attempted to add linkrev -1 to %s") % self.display_id
2591 2589 )
2592 2590
2593 2591 if sidedata is None:
2594 2592 sidedata = {}
2595 2593 elif sidedata and not self.hassidedata:
2596 2594 raise error.ProgrammingError(
2597 2595 _(b"trying to add sidedata to a revlog who don't support them")
2598 2596 )
2599 2597
2600 2598 if flags:
2601 2599 node = node or self.hash(text, p1, p2)
2602 2600
2603 2601 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2604 2602
2605 2603 # If the flag processor modifies the revision data, ignore any provided
2606 2604 # cachedelta.
2607 2605 if rawtext != text:
2608 2606 cachedelta = None
2609 2607
2610 2608 if len(rawtext) > _maxentrysize:
2611 2609 raise error.RevlogError(
2612 2610 _(
2613 2611 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2614 2612 )
2615 2613 % (self.display_id, len(rawtext))
2616 2614 )
2617 2615
2618 2616 node = node or self.hash(rawtext, p1, p2)
2619 2617 rev = self.index.get_rev(node)
2620 2618 if rev is not None:
2621 2619 return rev
2622 2620
2623 2621 if validatehash:
2624 2622 self.checkhash(rawtext, node, p1=p1, p2=p2)
2625 2623
2626 2624 return self.addrawrevision(
2627 2625 rawtext,
2628 2626 transaction,
2629 2627 link,
2630 2628 p1,
2631 2629 p2,
2632 2630 node,
2633 2631 flags,
2634 2632 cachedelta=cachedelta,
2635 2633 deltacomputer=deltacomputer,
2636 2634 sidedata=sidedata,
2637 2635 )
2638 2636
2639 2637 def addrawrevision(
2640 2638 self,
2641 2639 rawtext,
2642 2640 transaction,
2643 2641 link,
2644 2642 p1,
2645 2643 p2,
2646 2644 node,
2647 2645 flags,
2648 2646 cachedelta=None,
2649 2647 deltacomputer=None,
2650 2648 sidedata=None,
2651 2649 ):
2652 2650 """add a raw revision with known flags, node and parents
2653 2651 useful when reusing a revision not stored in this revlog (ex: received
2654 2652 over wire, or read from an external bundle).
2655 2653 """
2656 2654 with self._writing(transaction):
2657 2655 return self._addrevision(
2658 2656 node,
2659 2657 rawtext,
2660 2658 transaction,
2661 2659 link,
2662 2660 p1,
2663 2661 p2,
2664 2662 flags,
2665 2663 cachedelta,
2666 2664 deltacomputer=deltacomputer,
2667 2665 sidedata=sidedata,
2668 2666 )
2669 2667
2670 2668 def compress(self, data):
2671 2669 """Generate a possibly-compressed representation of data."""
2672 2670 if not data:
2673 2671 return b'', data
2674 2672
2675 2673 compressed = self._compressor.compress(data)
2676 2674
2677 2675 if compressed:
2678 2676 # The revlog compressor added the header in the returned data.
2679 2677 return b'', compressed
2680 2678
2681 2679 if data[0:1] == b'\0':
2682 2680 return b'', data
2683 2681 return b'u', data
2684 2682
2685 2683 def decompress(self, data):
2686 2684 """Decompress a revlog chunk.
2687 2685
2688 2686 The chunk is expected to begin with a header identifying the
2689 2687 format type so it can be routed to an appropriate decompressor.
2690 2688 """
2691 2689 if not data:
2692 2690 return data
2693 2691
2694 2692 # Revlogs are read much more frequently than they are written and many
2695 2693 # chunks only take microseconds to decompress, so performance is
2696 2694 # important here.
2697 2695 #
2698 2696 # We can make a few assumptions about revlogs:
2699 2697 #
2700 2698 # 1) the majority of chunks will be compressed (as opposed to inline
2701 2699 # raw data).
2702 2700 # 2) decompressing *any* data will likely by at least 10x slower than
2703 2701 # returning raw inline data.
2704 2702 # 3) we want to prioritize common and officially supported compression
2705 2703 # engines
2706 2704 #
2707 2705 # It follows that we want to optimize for "decompress compressed data
2708 2706 # when encoded with common and officially supported compression engines"
2709 2707 # case over "raw data" and "data encoded by less common or non-official
2710 2708 # compression engines." That is why we have the inline lookup first
2711 2709 # followed by the compengines lookup.
2712 2710 #
2713 2711 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2714 2712 # compressed chunks. And this matters for changelog and manifest reads.
2715 2713 t = data[0:1]
2716 2714
2717 2715 if t == b'x':
2718 2716 try:
2719 2717 return _zlibdecompress(data)
2720 2718 except zlib.error as e:
2721 2719 raise error.RevlogError(
2722 2720 _(b'revlog decompress error: %s')
2723 2721 % stringutil.forcebytestr(e)
2724 2722 )
2725 2723 # '\0' is more common than 'u' so it goes first.
2726 2724 elif t == b'\0':
2727 2725 return data
2728 2726 elif t == b'u':
2729 2727 return util.buffer(data, 1)
2730 2728
2731 2729 compressor = self._get_decompressor(t)
2732 2730
2733 2731 return compressor.decompress(data)
2734 2732
2735 2733 def _addrevision(
2736 2734 self,
2737 2735 node,
2738 2736 rawtext,
2739 2737 transaction,
2740 2738 link,
2741 2739 p1,
2742 2740 p2,
2743 2741 flags,
2744 2742 cachedelta,
2745 2743 alwayscache=False,
2746 2744 deltacomputer=None,
2747 2745 sidedata=None,
2748 2746 ):
2749 2747 """internal function to add revisions to the log
2750 2748
2751 2749 see addrevision for argument descriptions.
2752 2750
2753 2751 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2754 2752
2755 2753 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2756 2754 be used.
2757 2755
2758 2756 invariants:
2759 2757 - rawtext is optional (can be None); if not set, cachedelta must be set.
2760 2758 if both are set, they must correspond to each other.
2761 2759 """
2762 2760 if node == self.nullid:
2763 2761 raise error.RevlogError(
2764 2762 _(b"%s: attempt to add null revision") % self.display_id
2765 2763 )
2766 2764 if (
2767 2765 node == self.nodeconstants.wdirid
2768 2766 or node in self.nodeconstants.wdirfilenodeids
2769 2767 ):
2770 2768 raise error.RevlogError(
2771 2769 _(b"%s: attempt to add wdir revision") % self.display_id
2772 2770 )
2773 2771 if self._writinghandles is None:
2774 2772 msg = b'adding revision outside `revlog._writing` context'
2775 2773 raise error.ProgrammingError(msg)
2776 2774
2777 2775 btext = [rawtext]
2778 2776
2779 2777 curr = len(self)
2780 2778 prev = curr - 1
2781 2779
2782 2780 offset = self._get_data_offset(prev)
2783 2781
2784 2782 if self._concurrencychecker:
2785 2783 ifh, dfh, sdfh = self._writinghandles
2786 2784 # XXX no checking for the sidedata file
2787 2785 if self._inline:
2788 2786 # offset is "as if" it were in the .d file, so we need to add on
2789 2787 # the size of the entry metadata.
2790 2788 self._concurrencychecker(
2791 2789 ifh, self._indexfile, offset + curr * self.index.entry_size
2792 2790 )
2793 2791 else:
2794 2792 # Entries in the .i are a consistent size.
2795 2793 self._concurrencychecker(
2796 2794 ifh, self._indexfile, curr * self.index.entry_size
2797 2795 )
2798 2796 self._concurrencychecker(dfh, self._datafile, offset)
2799 2797
2800 2798 p1r, p2r = self.rev(p1), self.rev(p2)
2801 2799
2802 2800 # full versions are inserted when the needed deltas
2803 2801 # become comparable to the uncompressed text
2804 2802 if rawtext is None:
2805 2803 # need rawtext size, before changed by flag processors, which is
2806 2804 # the non-raw size. use revlog explicitly to avoid filelog's extra
2807 2805 # logic that might remove metadata size.
2808 2806 textlen = mdiff.patchedsize(
2809 2807 revlog.size(self, cachedelta[0]), cachedelta[1]
2810 2808 )
2811 2809 else:
2812 2810 textlen = len(rawtext)
2813 2811
2814 2812 if deltacomputer is None:
2815 2813 write_debug = None
2816 2814 if self._debug_delta:
2817 2815 write_debug = transaction._report
2818 2816 deltacomputer = deltautil.deltacomputer(
2819 2817 self, write_debug=write_debug
2820 2818 )
2821 2819
2822 2820 if cachedelta is not None and len(cachedelta) == 2:
2823 2821 # If the cached delta has no information about how it should be
2824 2822 # reused, add the default reuse instruction according to the
2825 2823 # revlog's configuration.
2826 2824 if self._generaldelta and self._lazydeltabase:
2827 2825 delta_base_reuse = DELTA_BASE_REUSE_TRY
2828 2826 else:
2829 2827 delta_base_reuse = DELTA_BASE_REUSE_NO
2830 2828 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2831 2829
2832 2830 revinfo = revlogutils.revisioninfo(
2833 2831 node,
2834 2832 p1,
2835 2833 p2,
2836 2834 btext,
2837 2835 textlen,
2838 2836 cachedelta,
2839 2837 flags,
2840 2838 )
2841 2839
2842 2840 deltainfo = deltacomputer.finddeltainfo(revinfo)
2843 2841
2844 2842 compression_mode = COMP_MODE_INLINE
2845 2843 if self._docket is not None:
2846 2844 default_comp = self._docket.default_compression_header
2847 2845 r = deltautil.delta_compression(default_comp, deltainfo)
2848 2846 compression_mode, deltainfo = r
2849 2847
2850 2848 sidedata_compression_mode = COMP_MODE_INLINE
2851 2849 if sidedata and self.hassidedata:
2852 2850 sidedata_compression_mode = COMP_MODE_PLAIN
2853 2851 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2854 2852 sidedata_offset = self._docket.sidedata_end
2855 2853 h, comp_sidedata = self.compress(serialized_sidedata)
2856 2854 if (
2857 2855 h != b'u'
2858 2856 and comp_sidedata[0:1] != b'\0'
2859 2857 and len(comp_sidedata) < len(serialized_sidedata)
2860 2858 ):
2861 2859 assert not h
2862 2860 if (
2863 2861 comp_sidedata[0:1]
2864 2862 == self._docket.default_compression_header
2865 2863 ):
2866 2864 sidedata_compression_mode = COMP_MODE_DEFAULT
2867 2865 serialized_sidedata = comp_sidedata
2868 2866 else:
2869 2867 sidedata_compression_mode = COMP_MODE_INLINE
2870 2868 serialized_sidedata = comp_sidedata
2871 2869 else:
2872 2870 serialized_sidedata = b""
2873 2871 # Don't store the offset if the sidedata is empty, that way
2874 2872 # we can easily detect empty sidedata and they will be no different
2875 2873 # than ones we manually add.
2876 2874 sidedata_offset = 0
2877 2875
2878 2876 rank = RANK_UNKNOWN
2879 2877 if self._compute_rank:
2880 2878 if (p1r, p2r) == (nullrev, nullrev):
2881 2879 rank = 1
2882 2880 elif p1r != nullrev and p2r == nullrev:
2883 2881 rank = 1 + self.fast_rank(p1r)
2884 2882 elif p1r == nullrev and p2r != nullrev:
2885 2883 rank = 1 + self.fast_rank(p2r)
2886 2884 else: # merge node
2887 2885 if rustdagop is not None and self.index.rust_ext_compat:
2888 2886 rank = rustdagop.rank(self.index, p1r, p2r)
2889 2887 else:
2890 2888 pmin, pmax = sorted((p1r, p2r))
2891 2889 rank = 1 + self.fast_rank(pmax)
2892 2890 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2893 2891
2894 2892 e = revlogutils.entry(
2895 2893 flags=flags,
2896 2894 data_offset=offset,
2897 2895 data_compressed_length=deltainfo.deltalen,
2898 2896 data_uncompressed_length=textlen,
2899 2897 data_compression_mode=compression_mode,
2900 2898 data_delta_base=deltainfo.base,
2901 2899 link_rev=link,
2902 2900 parent_rev_1=p1r,
2903 2901 parent_rev_2=p2r,
2904 2902 node_id=node,
2905 2903 sidedata_offset=sidedata_offset,
2906 2904 sidedata_compressed_length=len(serialized_sidedata),
2907 2905 sidedata_compression_mode=sidedata_compression_mode,
2908 2906 rank=rank,
2909 2907 )
2910 2908
2911 2909 self.index.append(e)
2912 2910 entry = self.index.entry_binary(curr)
2913 2911 if curr == 0 and self._docket is None:
2914 2912 header = self._format_flags | self._format_version
2915 2913 header = self.index.pack_header(header)
2916 2914 entry = header + entry
2917 2915 self._writeentry(
2918 2916 transaction,
2919 2917 entry,
2920 2918 deltainfo.data,
2921 2919 link,
2922 2920 offset,
2923 2921 serialized_sidedata,
2924 2922 sidedata_offset,
2925 2923 )
2926 2924
2927 2925 rawtext = btext[0]
2928 2926
2929 2927 if alwayscache and rawtext is None:
2930 2928 rawtext = deltacomputer.buildtext(revinfo)
2931 2929
2932 2930 if type(rawtext) == bytes: # only accept immutable objects
2933 2931 self._revisioncache = (node, curr, rawtext)
2934 2932 self._chainbasecache[curr] = deltainfo.chainbase
2935 2933 return curr
2936 2934
2937 2935 def _get_data_offset(self, prev):
2938 2936 """Returns the current offset in the (in-transaction) data file.
2939 2937 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2940 2938 file to store that information: since sidedata can be rewritten to the
2941 2939 end of the data file within a transaction, you can have cases where, for
2942 2940 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2943 2941 to `n - 1`'s sidedata being written after `n`'s data.
2944 2942
2945 2943 TODO cache this in a docket file before getting out of experimental."""
2946 2944 if self._docket is None:
2947 2945 return self.end(prev)
2948 2946 else:
2949 2947 return self._docket.data_end
2950 2948
2951 2949 def _writeentry(
2952 2950 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2953 2951 ):
2954 2952 # Files opened in a+ mode have inconsistent behavior on various
2955 2953 # platforms. Windows requires that a file positioning call be made
2956 2954 # when the file handle transitions between reads and writes. See
2957 2955 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2958 2956 # platforms, Python or the platform itself can be buggy. Some versions
2959 2957 # of Solaris have been observed to not append at the end of the file
2960 2958 # if the file was seeked to before the end. See issue4943 for more.
2961 2959 #
2962 2960 # We work around this issue by inserting a seek() before writing.
2963 2961 # Note: This is likely not necessary on Python 3. However, because
2964 2962 # the file handle is reused for reads and may be seeked there, we need
2965 2963 # to be careful before changing this.
2966 2964 if self._writinghandles is None:
2967 2965 msg = b'adding revision outside `revlog._writing` context'
2968 2966 raise error.ProgrammingError(msg)
2969 2967 ifh, dfh, sdfh = self._writinghandles
2970 2968 if self._docket is None:
2971 2969 ifh.seek(0, os.SEEK_END)
2972 2970 else:
2973 2971 ifh.seek(self._docket.index_end, os.SEEK_SET)
2974 2972 if dfh:
2975 2973 if self._docket is None:
2976 2974 dfh.seek(0, os.SEEK_END)
2977 2975 else:
2978 2976 dfh.seek(self._docket.data_end, os.SEEK_SET)
2979 2977 if sdfh:
2980 2978 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2981 2979
2982 2980 curr = len(self) - 1
2983 2981 if not self._inline:
2984 2982 transaction.add(self._datafile, offset)
2985 2983 if self._sidedatafile:
2986 2984 transaction.add(self._sidedatafile, sidedata_offset)
2987 2985 transaction.add(self._indexfile, curr * len(entry))
2988 2986 if data[0]:
2989 2987 dfh.write(data[0])
2990 2988 dfh.write(data[1])
2991 2989 if sidedata:
2992 2990 sdfh.write(sidedata)
2993 2991 ifh.write(entry)
2994 2992 else:
2995 2993 offset += curr * self.index.entry_size
2996 2994 transaction.add(self._indexfile, offset)
2997 2995 ifh.write(entry)
2998 2996 ifh.write(data[0])
2999 2997 ifh.write(data[1])
3000 2998 assert not sidedata
3001 2999 self._enforceinlinesize(transaction)
3002 3000 if self._docket is not None:
3003 3001 # revlog-v2 always has 3 writing handles, help Pytype
3004 3002 wh1 = self._writinghandles[0]
3005 3003 wh2 = self._writinghandles[1]
3006 3004 wh3 = self._writinghandles[2]
3007 3005 assert wh1 is not None
3008 3006 assert wh2 is not None
3009 3007 assert wh3 is not None
3010 3008 self._docket.index_end = wh1.tell()
3011 3009 self._docket.data_end = wh2.tell()
3012 3010 self._docket.sidedata_end = wh3.tell()
3013 3011
3014 3012 nodemaputil.setup_persistent_nodemap(transaction, self)
3015 3013
3016 3014 def addgroup(
3017 3015 self,
3018 3016 deltas,
3019 3017 linkmapper,
3020 3018 transaction,
3021 3019 alwayscache=False,
3022 3020 addrevisioncb=None,
3023 3021 duplicaterevisioncb=None,
3024 3022 debug_info=None,
3025 3023 delta_base_reuse_policy=None,
3026 3024 ):
3027 3025 """
3028 3026 add a delta group
3029 3027
3030 3028 given a set of deltas, add them to the revision log. the
3031 3029 first delta is against its parent, which should be in our
3032 3030 log, the rest are against the previous delta.
3033 3031
3034 3032 If ``addrevisioncb`` is defined, it will be called with arguments of
3035 3033 this revlog and the node that was added.
3036 3034 """
3037 3035
3038 3036 if self._adding_group:
3039 3037 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3040 3038
3041 3039 # read the default delta-base reuse policy from revlog config if the
3042 3040 # group did not specify one.
3043 3041 if delta_base_reuse_policy is None:
3044 3042 if self._generaldelta and self._lazydeltabase:
3045 3043 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3046 3044 else:
3047 3045 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3048 3046
3049 3047 self._adding_group = True
3050 3048 empty = True
3051 3049 try:
3052 3050 with self._writing(transaction):
3053 3051 write_debug = None
3054 3052 if self._debug_delta:
3055 3053 write_debug = transaction._report
3056 3054 deltacomputer = deltautil.deltacomputer(
3057 3055 self,
3058 3056 write_debug=write_debug,
3059 3057 debug_info=debug_info,
3060 3058 )
3061 3059 # loop through our set of deltas
3062 3060 for data in deltas:
3063 3061 (
3064 3062 node,
3065 3063 p1,
3066 3064 p2,
3067 3065 linknode,
3068 3066 deltabase,
3069 3067 delta,
3070 3068 flags,
3071 3069 sidedata,
3072 3070 ) = data
3073 3071 link = linkmapper(linknode)
3074 3072 flags = flags or REVIDX_DEFAULT_FLAGS
3075 3073
3076 3074 rev = self.index.get_rev(node)
3077 3075 if rev is not None:
3078 3076 # this can happen if two branches make the same change
3079 3077 self._nodeduplicatecallback(transaction, rev)
3080 3078 if duplicaterevisioncb:
3081 3079 duplicaterevisioncb(self, rev)
3082 3080 empty = False
3083 3081 continue
3084 3082
3085 3083 for p in (p1, p2):
3086 3084 if not self.index.has_node(p):
3087 3085 raise error.LookupError(
3088 3086 p, self.radix, _(b'unknown parent')
3089 3087 )
3090 3088
3091 3089 if not self.index.has_node(deltabase):
3092 3090 raise error.LookupError(
3093 3091 deltabase, self.display_id, _(b'unknown delta base')
3094 3092 )
3095 3093
3096 3094 baserev = self.rev(deltabase)
3097 3095
3098 3096 if baserev != nullrev and self.iscensored(baserev):
3099 3097 # if base is censored, delta must be full replacement in a
3100 3098 # single patch operation
3101 3099 hlen = struct.calcsize(b">lll")
3102 3100 oldlen = self.rawsize(baserev)
3103 3101 newlen = len(delta) - hlen
3104 3102 if delta[:hlen] != mdiff.replacediffheader(
3105 3103 oldlen, newlen
3106 3104 ):
3107 3105 raise error.CensoredBaseError(
3108 3106 self.display_id, self.node(baserev)
3109 3107 )
3110 3108
3111 3109 if not flags and self._peek_iscensored(baserev, delta):
3112 3110 flags |= REVIDX_ISCENSORED
3113 3111
3114 3112 # We assume consumers of addrevisioncb will want to retrieve
3115 3113 # the added revision, which will require a call to
3116 3114 # revision(). revision() will fast path if there is a cache
3117 3115 # hit. So, we tell _addrevision() to always cache in this case.
3118 3116 # We're only using addgroup() in the context of changegroup
3119 3117 # generation so the revision data can always be handled as raw
3120 3118 # by the flagprocessor.
3121 3119 rev = self._addrevision(
3122 3120 node,
3123 3121 None,
3124 3122 transaction,
3125 3123 link,
3126 3124 p1,
3127 3125 p2,
3128 3126 flags,
3129 3127 (baserev, delta, delta_base_reuse_policy),
3130 3128 alwayscache=alwayscache,
3131 3129 deltacomputer=deltacomputer,
3132 3130 sidedata=sidedata,
3133 3131 )
3134 3132
3135 3133 if addrevisioncb:
3136 3134 addrevisioncb(self, rev)
3137 3135 empty = False
3138 3136 finally:
3139 3137 self._adding_group = False
3140 3138 return not empty
3141 3139
3142 3140 def iscensored(self, rev):
3143 3141 """Check if a file revision is censored."""
3144 3142 if not self._censorable:
3145 3143 return False
3146 3144
3147 3145 return self.flags(rev) & REVIDX_ISCENSORED
3148 3146
3149 3147 def _peek_iscensored(self, baserev, delta):
3150 3148 """Quickly check if a delta produces a censored revision."""
3151 3149 if not self._censorable:
3152 3150 return False
3153 3151
3154 3152 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3155 3153
3156 3154 def getstrippoint(self, minlink):
3157 3155 """find the minimum rev that must be stripped to strip the linkrev
3158 3156
3159 3157 Returns a tuple containing the minimum rev and a set of all revs that
3160 3158 have linkrevs that will be broken by this strip.
3161 3159 """
3162 3160 return storageutil.resolvestripinfo(
3163 3161 minlink,
3164 3162 len(self) - 1,
3165 3163 self.headrevs(),
3166 3164 self.linkrev,
3167 3165 self.parentrevs,
3168 3166 )
3169 3167
3170 3168 def strip(self, minlink, transaction):
3171 3169 """truncate the revlog on the first revision with a linkrev >= minlink
3172 3170
3173 3171 This function is called when we're stripping revision minlink and
3174 3172 its descendants from the repository.
3175 3173
3176 3174 We have to remove all revisions with linkrev >= minlink, because
3177 3175 the equivalent changelog revisions will be renumbered after the
3178 3176 strip.
3179 3177
3180 3178 So we truncate the revlog on the first of these revisions, and
3181 3179 trust that the caller has saved the revisions that shouldn't be
3182 3180 removed and that it'll re-add them after this truncation.
3183 3181 """
3184 3182 if len(self) == 0:
3185 3183 return
3186 3184
3187 3185 rev, _ = self.getstrippoint(minlink)
3188 3186 if rev == len(self):
3189 3187 return
3190 3188
3191 3189 # first truncate the files on disk
3192 3190 data_end = self.start(rev)
3193 3191 if not self._inline:
3194 3192 transaction.add(self._datafile, data_end)
3195 3193 end = rev * self.index.entry_size
3196 3194 else:
3197 3195 end = data_end + (rev * self.index.entry_size)
3198 3196
3199 3197 if self._sidedatafile:
3200 3198 sidedata_end = self.sidedata_cut_off(rev)
3201 3199 transaction.add(self._sidedatafile, sidedata_end)
3202 3200
3203 3201 transaction.add(self._indexfile, end)
3204 3202 if self._docket is not None:
3205 3203 # XXX we could, leverage the docket while stripping. However it is
3206 3204 # not powerfull enough at the time of this comment
3207 3205 self._docket.index_end = end
3208 3206 self._docket.data_end = data_end
3209 3207 self._docket.sidedata_end = sidedata_end
3210 3208 self._docket.write(transaction, stripping=True)
3211 3209
3212 3210 # then reset internal state in memory to forget those revisions
3213 3211 self._revisioncache = None
3214 3212 self._chaininfocache = util.lrucachedict(500)
3215 3213 self._segmentfile.clear_cache()
3216 3214 self._segmentfile_sidedata.clear_cache()
3217 3215
3218 3216 del self.index[rev:-1]
3219 3217
3220 3218 def checksize(self):
3221 3219 """Check size of index and data files
3222 3220
3223 3221 return a (dd, di) tuple.
3224 3222 - dd: extra bytes for the "data" file
3225 3223 - di: extra bytes for the "index" file
3226 3224
3227 3225 A healthy revlog will return (0, 0).
3228 3226 """
3229 3227 expected = 0
3230 3228 if len(self):
3231 3229 expected = max(0, self.end(len(self) - 1))
3232 3230
3233 3231 try:
3234 3232 with self._datafp() as f:
3235 3233 f.seek(0, io.SEEK_END)
3236 3234 actual = f.tell()
3237 3235 dd = actual - expected
3238 3236 except FileNotFoundError:
3239 3237 dd = 0
3240 3238
3241 3239 try:
3242 3240 f = self.opener(self._indexfile)
3243 3241 f.seek(0, io.SEEK_END)
3244 3242 actual = f.tell()
3245 3243 f.close()
3246 3244 s = self.index.entry_size
3247 3245 i = max(0, actual // s)
3248 3246 di = actual - (i * s)
3249 3247 if self._inline:
3250 3248 databytes = 0
3251 3249 for r in self:
3252 3250 databytes += max(0, self.length(r))
3253 3251 dd = 0
3254 3252 di = actual - len(self) * s - databytes
3255 3253 except FileNotFoundError:
3256 3254 di = 0
3257 3255
3258 3256 return (dd, di)
3259 3257
3260 3258 def files(self):
3261 3259 res = [self._indexfile]
3262 3260 if self._docket_file is None:
3263 3261 if not self._inline:
3264 3262 res.append(self._datafile)
3265 3263 else:
3266 3264 res.append(self._docket_file)
3267 3265 res.extend(self._docket.old_index_filepaths(include_empty=False))
3268 3266 if self._docket.data_end:
3269 3267 res.append(self._datafile)
3270 3268 res.extend(self._docket.old_data_filepaths(include_empty=False))
3271 3269 if self._docket.sidedata_end:
3272 3270 res.append(self._sidedatafile)
3273 3271 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3274 3272 return res
3275 3273
3276 3274 def emitrevisions(
3277 3275 self,
3278 3276 nodes,
3279 3277 nodesorder=None,
3280 3278 revisiondata=False,
3281 3279 assumehaveparentrevisions=False,
3282 3280 deltamode=repository.CG_DELTAMODE_STD,
3283 3281 sidedata_helpers=None,
3284 3282 debug_info=None,
3285 3283 ):
3286 3284 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3287 3285 raise error.ProgrammingError(
3288 3286 b'unhandled value for nodesorder: %s' % nodesorder
3289 3287 )
3290 3288
3291 3289 if nodesorder is None and not self._generaldelta:
3292 3290 nodesorder = b'storage'
3293 3291
3294 3292 if (
3295 3293 not self._storedeltachains
3296 3294 and deltamode != repository.CG_DELTAMODE_PREV
3297 3295 ):
3298 3296 deltamode = repository.CG_DELTAMODE_FULL
3299 3297
3300 3298 return storageutil.emitrevisions(
3301 3299 self,
3302 3300 nodes,
3303 3301 nodesorder,
3304 3302 revlogrevisiondelta,
3305 3303 deltaparentfn=self.deltaparent,
3306 3304 candeltafn=self._candelta,
3307 3305 rawsizefn=self.rawsize,
3308 3306 revdifffn=self.revdiff,
3309 3307 flagsfn=self.flags,
3310 3308 deltamode=deltamode,
3311 3309 revisiondata=revisiondata,
3312 3310 assumehaveparentrevisions=assumehaveparentrevisions,
3313 3311 sidedata_helpers=sidedata_helpers,
3314 3312 debug_info=debug_info,
3315 3313 )
3316 3314
3317 3315 DELTAREUSEALWAYS = b'always'
3318 3316 DELTAREUSESAMEREVS = b'samerevs'
3319 3317 DELTAREUSENEVER = b'never'
3320 3318
3321 3319 DELTAREUSEFULLADD = b'fulladd'
3322 3320
3323 3321 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3324 3322
3325 3323 def clone(
3326 3324 self,
3327 3325 tr,
3328 3326 destrevlog,
3329 3327 addrevisioncb=None,
3330 3328 deltareuse=DELTAREUSESAMEREVS,
3331 3329 forcedeltabothparents=None,
3332 3330 sidedata_helpers=None,
3333 3331 ):
3334 3332 """Copy this revlog to another, possibly with format changes.
3335 3333
3336 3334 The destination revlog will contain the same revisions and nodes.
3337 3335 However, it may not be bit-for-bit identical due to e.g. delta encoding
3338 3336 differences.
3339 3337
3340 3338 The ``deltareuse`` argument control how deltas from the existing revlog
3341 3339 are preserved in the destination revlog. The argument can have the
3342 3340 following values:
3343 3341
3344 3342 DELTAREUSEALWAYS
3345 3343 Deltas will always be reused (if possible), even if the destination
3346 3344 revlog would not select the same revisions for the delta. This is the
3347 3345 fastest mode of operation.
3348 3346 DELTAREUSESAMEREVS
3349 3347 Deltas will be reused if the destination revlog would pick the same
3350 3348 revisions for the delta. This mode strikes a balance between speed
3351 3349 and optimization.
3352 3350 DELTAREUSENEVER
3353 3351 Deltas will never be reused. This is the slowest mode of execution.
3354 3352 This mode can be used to recompute deltas (e.g. if the diff/delta
3355 3353 algorithm changes).
3356 3354 DELTAREUSEFULLADD
3357 3355 Revision will be re-added as if their were new content. This is
3358 3356 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3359 3357 eg: large file detection and handling.
3360 3358
3361 3359 Delta computation can be slow, so the choice of delta reuse policy can
3362 3360 significantly affect run time.
3363 3361
3364 3362 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3365 3363 two extremes. Deltas will be reused if they are appropriate. But if the
3366 3364 delta could choose a better revision, it will do so. This means if you
3367 3365 are converting a non-generaldelta revlog to a generaldelta revlog,
3368 3366 deltas will be recomputed if the delta's parent isn't a parent of the
3369 3367 revision.
3370 3368
3371 3369 In addition to the delta policy, the ``forcedeltabothparents``
3372 3370 argument controls whether to force compute deltas against both parents
3373 3371 for merges. By default, the current default is used.
3374 3372
3375 3373 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3376 3374 `sidedata_helpers`.
3377 3375 """
3378 3376 if deltareuse not in self.DELTAREUSEALL:
3379 3377 raise ValueError(
3380 3378 _(b'value for deltareuse invalid: %s') % deltareuse
3381 3379 )
3382 3380
3383 3381 if len(destrevlog):
3384 3382 raise ValueError(_(b'destination revlog is not empty'))
3385 3383
3386 3384 if getattr(self, 'filteredrevs', None):
3387 3385 raise ValueError(_(b'source revlog has filtered revisions'))
3388 3386 if getattr(destrevlog, 'filteredrevs', None):
3389 3387 raise ValueError(_(b'destination revlog has filtered revisions'))
3390 3388
3391 3389 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3392 3390 # if possible.
3393 3391 old_delta_config = destrevlog.delta_config
3394 3392 destrevlog.delta_config = destrevlog.delta_config.copy()
3395 3393
3396 3394 try:
3397 3395 if deltareuse == self.DELTAREUSEALWAYS:
3398 3396 destrevlog.delta_config.lazy_delta_base = True
3399 3397 destrevlog.delta_config.lazy_delta = True
3400 3398 elif deltareuse == self.DELTAREUSESAMEREVS:
3401 3399 destrevlog.delta_config.lazy_delta_base = False
3402 3400 destrevlog.delta_config.lazy_delta = True
3403 3401 elif deltareuse == self.DELTAREUSENEVER:
3404 3402 destrevlog.delta_config.lazy_delta_base = False
3405 3403 destrevlog.delta_config.lazy_delta = False
3406 3404
3407 3405 delta_both_parents = (
3408 3406 forcedeltabothparents or old_delta_config.delta_both_parents
3409 3407 )
3410 3408 destrevlog.delta_config.delta_both_parents = delta_both_parents
3411 3409
3412 3410 with self.reading():
3413 3411 self._clone(
3414 3412 tr,
3415 3413 destrevlog,
3416 3414 addrevisioncb,
3417 3415 deltareuse,
3418 3416 forcedeltabothparents,
3419 3417 sidedata_helpers,
3420 3418 )
3421 3419
3422 3420 finally:
3423 3421 destrevlog.delta_config = old_delta_config
3424 3422
3425 3423 def _clone(
3426 3424 self,
3427 3425 tr,
3428 3426 destrevlog,
3429 3427 addrevisioncb,
3430 3428 deltareuse,
3431 3429 forcedeltabothparents,
3432 3430 sidedata_helpers,
3433 3431 ):
3434 3432 """perform the core duty of `revlog.clone` after parameter processing"""
3435 3433 write_debug = None
3436 3434 if self._debug_delta:
3437 3435 write_debug = tr._report
3438 3436 deltacomputer = deltautil.deltacomputer(
3439 3437 destrevlog,
3440 3438 write_debug=write_debug,
3441 3439 )
3442 3440 index = self.index
3443 3441 for rev in self:
3444 3442 entry = index[rev]
3445 3443
3446 3444 # Some classes override linkrev to take filtered revs into
3447 3445 # account. Use raw entry from index.
3448 3446 flags = entry[0] & 0xFFFF
3449 3447 linkrev = entry[4]
3450 3448 p1 = index[entry[5]][7]
3451 3449 p2 = index[entry[6]][7]
3452 3450 node = entry[7]
3453 3451
3454 3452 # (Possibly) reuse the delta from the revlog if allowed and
3455 3453 # the revlog chunk is a delta.
3456 3454 cachedelta = None
3457 3455 rawtext = None
3458 3456 if deltareuse == self.DELTAREUSEFULLADD:
3459 3457 text = self._revisiondata(rev)
3460 3458 sidedata = self.sidedata(rev)
3461 3459
3462 3460 if sidedata_helpers is not None:
3463 3461 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3464 3462 self, sidedata_helpers, sidedata, rev
3465 3463 )
3466 3464 flags = flags | new_flags[0] & ~new_flags[1]
3467 3465
3468 3466 destrevlog.addrevision(
3469 3467 text,
3470 3468 tr,
3471 3469 linkrev,
3472 3470 p1,
3473 3471 p2,
3474 3472 cachedelta=cachedelta,
3475 3473 node=node,
3476 3474 flags=flags,
3477 3475 deltacomputer=deltacomputer,
3478 3476 sidedata=sidedata,
3479 3477 )
3480 3478 else:
3481 3479 if destrevlog._lazydelta:
3482 3480 dp = self.deltaparent(rev)
3483 3481 if dp != nullrev:
3484 3482 cachedelta = (dp, bytes(self._chunk(rev)))
3485 3483
3486 3484 sidedata = None
3487 3485 if not cachedelta:
3488 3486 rawtext = self._revisiondata(rev)
3489 3487 sidedata = self.sidedata(rev)
3490 3488 if sidedata is None:
3491 3489 sidedata = self.sidedata(rev)
3492 3490
3493 3491 if sidedata_helpers is not None:
3494 3492 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3495 3493 self, sidedata_helpers, sidedata, rev
3496 3494 )
3497 3495 flags = flags | new_flags[0] & ~new_flags[1]
3498 3496
3499 3497 with destrevlog._writing(tr):
3500 3498 destrevlog._addrevision(
3501 3499 node,
3502 3500 rawtext,
3503 3501 tr,
3504 3502 linkrev,
3505 3503 p1,
3506 3504 p2,
3507 3505 flags,
3508 3506 cachedelta,
3509 3507 deltacomputer=deltacomputer,
3510 3508 sidedata=sidedata,
3511 3509 )
3512 3510
3513 3511 if addrevisioncb:
3514 3512 addrevisioncb(self, rev, node)
3515 3513
3516 3514 def censorrevision(self, tr, censornode, tombstone=b''):
3517 3515 if self._format_version == REVLOGV0:
3518 3516 raise error.RevlogError(
3519 3517 _(b'cannot censor with version %d revlogs')
3520 3518 % self._format_version
3521 3519 )
3522 3520 elif self._format_version == REVLOGV1:
3523 3521 rewrite.v1_censor(self, tr, censornode, tombstone)
3524 3522 else:
3525 3523 rewrite.v2_censor(self, tr, censornode, tombstone)
3526 3524
3527 3525 def verifyintegrity(self, state):
3528 3526 """Verifies the integrity of the revlog.
3529 3527
3530 3528 Yields ``revlogproblem`` instances describing problems that are
3531 3529 found.
3532 3530 """
3533 3531 dd, di = self.checksize()
3534 3532 if dd:
3535 3533 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3536 3534 if di:
3537 3535 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3538 3536
3539 3537 version = self._format_version
3540 3538
3541 3539 # The verifier tells us what version revlog we should be.
3542 3540 if version != state[b'expectedversion']:
3543 3541 yield revlogproblem(
3544 3542 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3545 3543 % (self.display_id, version, state[b'expectedversion'])
3546 3544 )
3547 3545
3548 3546 state[b'skipread'] = set()
3549 3547 state[b'safe_renamed'] = set()
3550 3548
3551 3549 for rev in self:
3552 3550 node = self.node(rev)
3553 3551
3554 3552 # Verify contents. 4 cases to care about:
3555 3553 #
3556 3554 # common: the most common case
3557 3555 # rename: with a rename
3558 3556 # meta: file content starts with b'\1\n', the metadata
3559 3557 # header defined in filelog.py, but without a rename
3560 3558 # ext: content stored externally
3561 3559 #
3562 3560 # More formally, their differences are shown below:
3563 3561 #
3564 3562 # | common | rename | meta | ext
3565 3563 # -------------------------------------------------------
3566 3564 # flags() | 0 | 0 | 0 | not 0
3567 3565 # renamed() | False | True | False | ?
3568 3566 # rawtext[0:2]=='\1\n'| False | True | True | ?
3569 3567 #
3570 3568 # "rawtext" means the raw text stored in revlog data, which
3571 3569 # could be retrieved by "rawdata(rev)". "text"
3572 3570 # mentioned below is "revision(rev)".
3573 3571 #
3574 3572 # There are 3 different lengths stored physically:
3575 3573 # 1. L1: rawsize, stored in revlog index
3576 3574 # 2. L2: len(rawtext), stored in revlog data
3577 3575 # 3. L3: len(text), stored in revlog data if flags==0, or
3578 3576 # possibly somewhere else if flags!=0
3579 3577 #
3580 3578 # L1 should be equal to L2. L3 could be different from them.
3581 3579 # "text" may or may not affect commit hash depending on flag
3582 3580 # processors (see flagutil.addflagprocessor).
3583 3581 #
3584 3582 # | common | rename | meta | ext
3585 3583 # -------------------------------------------------
3586 3584 # rawsize() | L1 | L1 | L1 | L1
3587 3585 # size() | L1 | L2-LM | L1(*) | L1 (?)
3588 3586 # len(rawtext) | L2 | L2 | L2 | L2
3589 3587 # len(text) | L2 | L2 | L2 | L3
3590 3588 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3591 3589 #
3592 3590 # LM: length of metadata, depending on rawtext
3593 3591 # (*): not ideal, see comment in filelog.size
3594 3592 # (?): could be "- len(meta)" if the resolved content has
3595 3593 # rename metadata
3596 3594 #
3597 3595 # Checks needed to be done:
3598 3596 # 1. length check: L1 == L2, in all cases.
3599 3597 # 2. hash check: depending on flag processor, we may need to
3600 3598 # use either "text" (external), or "rawtext" (in revlog).
3601 3599
3602 3600 try:
3603 3601 skipflags = state.get(b'skipflags', 0)
3604 3602 if skipflags:
3605 3603 skipflags &= self.flags(rev)
3606 3604
3607 3605 _verify_revision(self, skipflags, state, node)
3608 3606
3609 3607 l1 = self.rawsize(rev)
3610 3608 l2 = len(self.rawdata(node))
3611 3609
3612 3610 if l1 != l2:
3613 3611 yield revlogproblem(
3614 3612 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3615 3613 node=node,
3616 3614 )
3617 3615
3618 3616 except error.CensoredNodeError:
3619 3617 if state[b'erroroncensored']:
3620 3618 yield revlogproblem(
3621 3619 error=_(b'censored file data'), node=node
3622 3620 )
3623 3621 state[b'skipread'].add(node)
3624 3622 except Exception as e:
3625 3623 yield revlogproblem(
3626 3624 error=_(b'unpacking %s: %s')
3627 3625 % (short(node), stringutil.forcebytestr(e)),
3628 3626 node=node,
3629 3627 )
3630 3628 state[b'skipread'].add(node)
3631 3629
3632 3630 def storageinfo(
3633 3631 self,
3634 3632 exclusivefiles=False,
3635 3633 sharedfiles=False,
3636 3634 revisionscount=False,
3637 3635 trackedsize=False,
3638 3636 storedsize=False,
3639 3637 ):
3640 3638 d = {}
3641 3639
3642 3640 if exclusivefiles:
3643 3641 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3644 3642 if not self._inline:
3645 3643 d[b'exclusivefiles'].append((self.opener, self._datafile))
3646 3644
3647 3645 if sharedfiles:
3648 3646 d[b'sharedfiles'] = []
3649 3647
3650 3648 if revisionscount:
3651 3649 d[b'revisionscount'] = len(self)
3652 3650
3653 3651 if trackedsize:
3654 3652 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3655 3653
3656 3654 if storedsize:
3657 3655 d[b'storedsize'] = sum(
3658 3656 self.opener.stat(path).st_size for path in self.files()
3659 3657 )
3660 3658
3661 3659 return d
3662 3660
3663 3661 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3664 3662 if not self.hassidedata:
3665 3663 return
3666 3664 # revlog formats with sidedata support does not support inline
3667 3665 assert not self._inline
3668 3666 if not helpers[1] and not helpers[2]:
3669 3667 # Nothing to generate or remove
3670 3668 return
3671 3669
3672 3670 new_entries = []
3673 3671 # append the new sidedata
3674 3672 with self._writing(transaction):
3675 3673 ifh, dfh, sdfh = self._writinghandles
3676 3674 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3677 3675
3678 3676 current_offset = sdfh.tell()
3679 3677 for rev in range(startrev, endrev + 1):
3680 3678 entry = self.index[rev]
3681 3679 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3682 3680 store=self,
3683 3681 sidedata_helpers=helpers,
3684 3682 sidedata={},
3685 3683 rev=rev,
3686 3684 )
3687 3685
3688 3686 serialized_sidedata = sidedatautil.serialize_sidedata(
3689 3687 new_sidedata
3690 3688 )
3691 3689
3692 3690 sidedata_compression_mode = COMP_MODE_INLINE
3693 3691 if serialized_sidedata and self.hassidedata:
3694 3692 sidedata_compression_mode = COMP_MODE_PLAIN
3695 3693 h, comp_sidedata = self.compress(serialized_sidedata)
3696 3694 if (
3697 3695 h != b'u'
3698 3696 and comp_sidedata[0] != b'\0'
3699 3697 and len(comp_sidedata) < len(serialized_sidedata)
3700 3698 ):
3701 3699 assert not h
3702 3700 if (
3703 3701 comp_sidedata[0]
3704 3702 == self._docket.default_compression_header
3705 3703 ):
3706 3704 sidedata_compression_mode = COMP_MODE_DEFAULT
3707 3705 serialized_sidedata = comp_sidedata
3708 3706 else:
3709 3707 sidedata_compression_mode = COMP_MODE_INLINE
3710 3708 serialized_sidedata = comp_sidedata
3711 3709 if entry[8] != 0 or entry[9] != 0:
3712 3710 # rewriting entries that already have sidedata is not
3713 3711 # supported yet, because it introduces garbage data in the
3714 3712 # revlog.
3715 3713 msg = b"rewriting existing sidedata is not supported yet"
3716 3714 raise error.Abort(msg)
3717 3715
3718 3716 # Apply (potential) flags to add and to remove after running
3719 3717 # the sidedata helpers
3720 3718 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3721 3719 entry_update = (
3722 3720 current_offset,
3723 3721 len(serialized_sidedata),
3724 3722 new_offset_flags,
3725 3723 sidedata_compression_mode,
3726 3724 )
3727 3725
3728 3726 # the sidedata computation might have move the file cursors around
3729 3727 sdfh.seek(current_offset, os.SEEK_SET)
3730 3728 sdfh.write(serialized_sidedata)
3731 3729 new_entries.append(entry_update)
3732 3730 current_offset += len(serialized_sidedata)
3733 3731 self._docket.sidedata_end = sdfh.tell()
3734 3732
3735 3733 # rewrite the new index entries
3736 3734 ifh.seek(startrev * self.index.entry_size)
3737 3735 for i, e in enumerate(new_entries):
3738 3736 rev = startrev + i
3739 3737 self.index.replace_sidedata_info(rev, *e)
3740 3738 packed = self.index.entry_binary(rev)
3741 3739 if rev == 0 and self._docket is None:
3742 3740 header = self._format_flags | self._format_version
3743 3741 header = self.index.pack_header(header)
3744 3742 packed = header + packed
3745 3743 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now