upstream/mercurial-mirror Files · contrib/perf-utils/search-discovery-case

stream-clone: filter possible missing requirements using all supported one...

stream-clone: filter possible missing requirements using all supported one The `supportedformat` requirements is missing some important requirements and it seems better to filter out with all requirements we know, not just an "arbitrary" subset. The `supportedformat` set is lacking some important requirements (for example `revlog-compression-zstd`). This is getting fixed on default (for Mercurial 6.1) However, fixing that in 6.1 means the stream requirements sent over the wire will contains more items. And if we don't apply this fix on older version, they might end up complaining about lacking support for feature they actually support for years. This patch does not fix the deeper problem (advertised stream requirement lacking some of them), but focus on the trivial part : Lets use the full set of supported requirement for looking for unsupported ones. This patch should be simple to backport to older version of Mercurial and packager should be encouraged to do so. This is a graft of from default. Differential Revision: https://phab.mercurial-scm.org/D12091

marmoute - - Load All Authors

File last commit:

r47505:6b26e643 default


                r49522:6d2ddea0

stable

Download file

             search-discovery-case
        
                    198 lines
            
             | 4.9 KiB
            
                | text/plain
            
             |
                TextLexer

/ contrib / perf-utils / search-discovery-case

History | Annotation | Raw |Copy content |Copy permalink

				#!/usr/bin/env python3
				# Search for interesting discovery instance
				#
				# search-discovery-case REPO [REPO]…
				#
				# This use a subsetmaker extension (next to this script) to generate a steam of
				# random discovery instance. When interesting case are discovered, information
				# about them are print on the stdout.
				from __future__ import print_function

				import json
				import os
				import queue
				import random
				import signal
				import subprocess
				import sys
				import threading

				this_script = os.path.abspath(sys.argv[0])
				this_dir = os.path.dirname(this_script)
				hg_dir = os.path.join(this_dir, '..', '..')
				HG_REPO = os.path.normpath(hg_dir)
				HG_BIN = os.path.join(HG_REPO, 'hg')

				JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))


				SLICING = ('scratch', 'randomantichain', 'rev')


				def nb_revs(repo_path):
				cmd = [
				HG_BIN,
				'--repository',
				repo_path,
				'log',
				'--template',
				'{rev}',
				'--rev',
				'tip',
				]
				s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
				out, err = s.communicate()
				return int(out)


				repos = []
				for repo in sys.argv[1:]:
				size = nb_revs(repo)
				repos.append((repo, size))


				def pick_one(repo):
				pick = random.choice(SLICING)
				seed = random.randint(0, 100000)
				if pick == 'scratch':
				start = int(repo[1] * 0.3)
				end = int(repo[1] * 0.7)
				nb = random.randint(start, end)
				return ('scratch', nb, seed)
				elif pick == 'randomantichain':
				return ('randomantichain', seed)
				elif pick == 'rev':
				start = int(repo[1] * 0.3)
				end = int(repo[1])
				rev = random.randint(start, end)
				return ('rev', rev)
				else:
				assert False


				done = threading.Event()
				cases = queue.Queue(maxsize=10 * JOB)
				results = queue.Queue()


				def worker():
				while not done.is_set():
				c = cases.get()
				if c is None:
				return
				try:
				res = process(c)
				results.put((c, res))
				except Exception as exc:
				print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
				c = (c[0], c[2], c[1])
				try:
				res = process(c)
				results.put((c, res))
				except Exception as exc:
				print('processing-failed: %s %s' % (c, exc), file=sys.stderr)


				SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')


				CMD_BASE = (
				HG_BIN,
				'debugdiscovery',
				'--template',
				'json',
				'--config',
				'extensions.subset=%s' % SUBSET_PATH,
				)
				# '--local-as-revs "$left" --local-as-revs "$right"'
				# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
				# )


				def to_revsets(case):
				t = case[0]
				if t == 'scratch':
				return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
				elif t == 'randomantichain':
				return '::randomantichain(all(), "%d")' % case[1]
				elif t == 'rev':
				return '::%d' % case[1]
				else:
				assert False


				def process(case):
				(repo, left, right) = case
				cmd = list(CMD_BASE)
				cmd.append('-R')
				cmd.append(repo[0])
				cmd.append('--local-as-revs')
				cmd.append(to_revsets(left))
				cmd.append('--remote-as-revs')
				cmd.append(to_revsets(right))
				s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
				out, err = s.communicate()
				return json.loads(out)[0]


				def interesting_boundary(res):
				"""check if a case is interesting or not

				For now we are mostly interrested in case were we do multiple roundstrip
				and where the boundary is somewhere in the middle of the undecided set.

				Ideally, we would make this configurable, but this is not a focus for now

				return None or (round-trip, undecided-common, undecided-missing)
				"""
				roundtrips = res["total-roundtrips"]
				if roundtrips <= 1:
				return None
				undecided_common = res["nb-ini_und-common"]
				undecided_missing = res["nb-ini_und-missing"]
				if undecided_common == 0:
				return None
				if undecided_missing == 0:
				return None
				return (roundtrips, undecided_common, undecided_missing)


				def end(args, *kwargs):
				done.set()


				def format_case(case):
				return '-'.join(str(s) for s in case)


				signal.signal(signal.SIGINT, end)

				for i in range(JOB):
				threading.Thread(target=worker).start()

				nb_cases = 0
				while not done.is_set():
				repo = random.choice(repos)
				left = pick_one(repo)
				right = pick_one(repo)
				cases.put((repo, left, right))
				while not results.empty():
				# results has a single reader so this is fine
				c, res = results.get_nowait()
				boundary = interesting_boundary(res)
				if boundary is not None:
				print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)
				sys.stdout.flush()

				nb_cases += 1
				if not nb_cases % 100:
				print('[%d cases generated]' % nb_cases, file=sys.stderr)

				for i in range(JOB):
				try:
				cases.put_nowait(None)
				except queue.Full:
				pass

				print('[%d cases generated]' % nb_cases, file=sys.stderr)
				print('[ouput generation is over]' % nb_cases, file=sys.stderr)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages