##// END OF EJS Templates
exchange: improve computation of relevant markers for large repos...
exchange: improve computation of relevant markers for large repos Compute the candidate nodes with relevant markers directly from keys of the predecessors/successors/children dictionaries of obsstore. This is faster than iterating over all nodes directly. This test could be further improved for repositories with relative few markers compared to the repository size, but this is no longer hot already. With the current loop structure, the obshashrange use works as well as before as it passes lists with a single node. Adjust the interface by allowing revision lists as well as node lists. This helps cases that computes ancestors as it reduces the materialisation cost. Use this in _pushdiscoveryobsmarker and _getbundleobsmarkerpart. Improve the latter further by directly using ancestors(). Performance benchmarks show notable and welcome improvement to no-op push and pull (that would also apply to other push/pull). This apply to push and pull done without evolve. ### push/pull Benchmark parameter # bin-env-vars.hg.flavor = default # benchmark.variants.explicit-rev = none # benchmark.variants.protocol = ssh # benchmark.variants.revs = none ## benchmark.name = hg.command.pull # data-env-vars.name = mercurial-devel-2024-03-22-zstd-sparse-revlog before: 5.968537 seconds after: 5.668507 seconds (-5.03%, -0.30) # data-env-vars.name = tryton-devel-2024-03-22-zstd-sparse-revlog before: 1.446232 seconds after: 0.835553 seconds (-42.23%, -0.61) # data-env-vars.name = netbsd-src-draft-2024-09-19-zstd-sparse-revlog before: 5.777412 seconds after: 2.523454 seconds (-56.32%, -3.25) ## benchmark.name = hg.command.push # data-env-vars.name = mercurial-devel-2024-03-22-zstd-sparse-revlog before: 6.155501 seconds after: 5.885072 seconds (-4.39%, -0.27) # data-env-vars.name = tryton-devel-2024-03-22-zstd-sparse-revlog before: 1.491054 seconds after: 0.934882 seconds (-37.30%, -0.56) # data-env-vars.name = netbsd-src-draft-2024-09-19-zstd-sparse-revlog before: 5.902494 seconds after: 2.957644 seconds (-49.89%, -2.94) There is not notable different in these result using the "rust" flavor instead of the "default". The performance impact on the same operation when using evolve were also tested and no impact was noted.

File last commit:

r52757:1c5810ce default
r52789:8583d138 default
Show More
chainsaw.py
228 lines | 7.4 KiB | text/x-python | PythonLexer
# chainsaw.py
#
# Copyright 2022 Georges Racinet <georges.racinet@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""chainsaw is a collection of single-minded and dangerous tools. (EXPERIMENTAL)
"Don't use a chainsaw to cut your food!"
The chainsaw is a collection of commands that are so much geared towards a
specific use case in a specific context or environment that they are totally
inappropriate and **really dangerous** in other contexts.
The help text of each command explicitly summarizes its context of application
and the wanted end result.
It is recommended to run these commands with the ``HGPLAIN`` environment
variable (see :hg:`help scripting`).
"""
from __future__ import annotations
import shutil
from ..i18n import _
from .. import (
cmdutil,
commands,
error,
localrepo,
registrar,
)
from ..utils import (
urlutil,
)
cmdtable = {}
command = registrar.command(cmdtable)
@command(
b'admin::chainsaw-update',
[
(
b'',
b'purge-unknown',
True,
_(
b'Remove unversioned files before update. Disabling this can '
b'in some cases interfere with the update.'
b'See also :hg:`purge`.'
),
),
(
b'',
b'purge-ignored',
True,
_(
b'Remove ignored files before update. Disable this for '
b'instance to reuse previous compiler object files. '
b'See also :hg:`purge`.'
),
),
(
b'',
b'rev',
b'',
_(b'revision to update to'),
),
(
b'',
b'source',
b'',
_(b'repository to clone from'),
),
(
b'',
b'dest',
b'',
_(b'repository to update to REV (possibly cloning)'),
),
(
b'',
b'initial-clone-minimal',
False,
_(
b'Pull only the prescribed revision upon initial cloning. '
b'This has the side effect of ignoring clone-bundles, '
b'which if often slower on the client side and stressful '
b'to the server than applying available clone bundles.'
),
),
],
_(
b'hg admin::chainsaw-update [OPTION] --rev REV --source SOURCE --dest DEST'
),
helpbasic=True,
norepo=True,
)
def update(ui, **opts):
"""pull and update to a given revision, no matter what, (EXPERIMENTAL)
Context of application: *some* Continuous Integration (CI) systems,
packaging or deployment tools.
Wanted end result: local repository at the given REPO_PATH, having the
latest changes to the given revision and with a clean working directory
updated at the given revision.
chainsaw-update pulls from one source, then updates the working directory
to the given revision, overcoming anything that would stand in the way.
By default, it will:
- clone if the local repo does not exist yet, **removing any directory
at the given path** that would not be a Mercurial repository.
The initial clone is full by default, so that clonebundles can be
applied. Use the --initial-clone-minimal flag to avoid this.
- break locks if needed, leading to possible corruption if there
is a concurrent write access.
- perform recovery actions if needed
- revert any local modification.
- purge unknown and ignored files.
- go as far as to reclone if everything else failed (not implemented yet).
DO NOT use it for anything else than performing a series
of unattended updates, with full exclusive repository access each time
and without any other local work than running build scripts.
In case the local repository is a share (see :hg:`help share`), exclusive
write access to the share source is also mandatory.
It is recommended to run these commands with the ``HGPLAIN`` environment
variable (see :hg:`scripting`).
Motivation: in Continuous Integration and Delivery systems (CI/CD), the
occasional remnant or bogus lock are common sources of waste of time (both
working time and calendar time). CI/CD scripts tend to grow with counter-
measures, often done in urgency. Also, whilst it is neat to keep
repositories from one job to the next (especially with large
repositories), an exceptional recloning is better than missing a release
deadline.
"""
rev = opts['rev']
source = opts['source']
repo_path = opts['dest']
if not rev:
raise error.InputError(_(b'specify a target revision with --rev'))
if not source:
raise error.InputError(_(b'specify a pull path with --source'))
if not repo_path:
raise error.InputError(_(b'specify a repo path with --dest'))
repo_path = urlutil.urllocalpath(repo_path)
try:
repo = localrepo.instance(ui, repo_path, create=False)
repo_created = False
ui.status(_(b'loaded repository at "%s"\n' % repo_path))
except error.RepoError:
try:
shutil.rmtree(repo_path)
except FileNotFoundError:
ui.status(_(b'no such directory: "%s"\n' % repo_path))
else:
ui.status(
_(
b'removed non-repository file or directory '
b'at "%s"' % repo_path
)
)
ui.status(_(b'creating repository at "%s"\n' % repo_path))
repo = localrepo.instance(ui, repo_path, create=True)
repo_created = True
if repo.svfs.tryunlink(b'lock'):
ui.status(_(b'had to break store lock\n'))
if repo.vfs.tryunlink(b'wlock'):
ui.status(_(b'had to break working copy lock\n'))
# If another process relock after the breacking above, the next locking
# will have to wait.
with repo.wlock(), repo.lock():
ui.status(_(b'recovering after interrupted transaction, if any\n'))
repo.recover()
ui.status(_(b'pulling from %s\n') % source)
if repo_created and not opts.get('initial_clone_minimal'):
pull_revs = []
else:
pull_revs = [rev]
overrides = {(b'ui', b'quiet'): True}
with repo.ui.configoverride(overrides, b'chainsaw-update'):
pull = cmdutil.findcmd(b'pull', commands.table)[1][0]
ret = pull(
repo.ui,
repo,
source,
rev=pull_revs,
remote_hidden=False,
)
if ret:
return ret
purge = cmdutil.findcmd(b'purge', commands.table)[1][0]
ret = purge(
ui,
repo,
dirs=True,
all=opts.get('purge_ignored'),
files=opts.get('purge_unknown'),
confirm=False,
)
if ret:
return ret
ui.status(_(b'updating to revision \'%s\'\n') % rev)
update = cmdutil.findcmd(b'update', commands.table)[1][0]
ret = update(ui, repo, rev=rev, clean=True)
if ret:
return ret
ui.status(
_(
b'chainsaw-update to revision \'%s\' '
b'for repository at \'%s\' done\n'
)
% (rev, repo.root)
)