upstream/mercurial-mirror Files · contrib/perf-utils/search-discovery-case

perf-util: add a `compare-discovery-case` script...

perf-util: add a `compare-discovery-case` script This script run the same discovery case using multiple variants of the algorithm and report differences in behavior, especially regarding the numbers of roundtrip. Differential Revision: https://phab.mercurial-scm.org/D12399

marmoute - - Load All Authors

File last commit:

r49880:a2bd6b23 default


                r49882:a78c45a2

default

Download file

             search-discovery-case
        
                    207 lines
            
             | 5.1 KiB
            
                | text/plain
            
             |
                TextLexer

/ contrib / perf-utils / search-discovery-case

History | Source | Raw |Copy content |Copy permalink

marmoute perf-helpers: add a search-discovery-case script...	r47505	#!/usr/bin/env python3
		# Search for interesting discovery instance
		#
		# search-discovery-case REPO [REPO]…
		#
		# This use a subsetmaker extension (next to this script) to generate a steam of
		# random discovery instance. When interesting case are discovered, information
		# about them are print on the stdout.

		import json
		import os
		import queue
		import random
		import signal
		import subprocess
		import sys
		import threading

		this_script = os.path.abspath(sys.argv[0])
		this_dir = os.path.dirname(this_script)
		hg_dir = os.path.join(this_dir, '..', '..')
		HG_REPO = os.path.normpath(hg_dir)
		HG_BIN = os.path.join(HG_REPO, 'hg')

		JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))


		SLICING = ('scratch', 'randomantichain', 'rev')


		def nb_revs(repo_path):
		cmd = [
		HG_BIN,
		'--repository',
		repo_path,
		'log',
		'--template',
		'{rev}',
		'--rev',
		'tip',
		]
		s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
		out, err = s.communicate()
		return int(out)


		repos = []
		for repo in sys.argv[1:]:
		size = nb_revs(repo)
		repos.append((repo, size))


		def pick_one(repo):
		pick = random.choice(SLICING)
		seed = random.randint(0, 100000)
		if pick == 'scratch':
		start = int(repo[1] * 0.3)
		end = int(repo[1] * 0.7)
		nb = random.randint(start, end)
		return ('scratch', nb, seed)
		elif pick == 'randomantichain':
		return ('randomantichain', seed)
		elif pick == 'rev':
		start = int(repo[1] * 0.3)
		end = int(repo[1])
		rev = random.randint(start, end)
		return ('rev', rev)
		else:
		assert False


		done = threading.Event()
		cases = queue.Queue(maxsize=10 * JOB)
		results = queue.Queue()


		def worker():
		while not done.is_set():
		c = cases.get()
		if c is None:
		return
		try:
		res = process(c)
		results.put((c, res))
		except Exception as exc:
		print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
		c = (c[0], c[2], c[1])
		try:
		res = process(c)
		results.put((c, res))
		except Exception as exc:
		print('processing-failed: %s %s' % (c, exc), file=sys.stderr)


		SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')


		CMD_BASE = (
		HG_BIN,
		'debugdiscovery',
		'--template',
		'json',
		'--config',
		'extensions.subset=%s' % SUBSET_PATH,
		)
		# '--local-as-revs "$left" --local-as-revs "$right"'
		# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
		# )


		def to_revsets(case):
		t = case[0]
		if t == 'scratch':
		return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
		elif t == 'randomantichain':
		return '::randomantichain(all(), "%d")' % case[1]
		elif t == 'rev':
		return '::%d' % case[1]
		else:
		assert False


		def process(case):
		(repo, left, right) = case
		cmd = list(CMD_BASE)
		cmd.append('-R')
		cmd.append(repo[0])
		cmd.append('--local-as-revs')
		cmd.append(to_revsets(left))
		cmd.append('--remote-as-revs')
		cmd.append(to_revsets(right))
		s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
		out, err = s.communicate()
		return json.loads(out)[0]


		def interesting_boundary(res):
		"""check if a case is interesting or not

		For now we are mostly interrested in case were we do multiple roundstrip
		and where the boundary is somewhere in the middle of the undecided set.

		Ideally, we would make this configurable, but this is not a focus for now

		return None or (round-trip, undecided-common, undecided-missing)
		"""
		roundtrips = res["total-roundtrips"]
		if roundtrips <= 1:
		return None
marmoute search-discovery-case: display more information about the interresting case...	r49880	total_revs = res["nb-revs"]
		common_revs = res["nb-revs-common"]
		missing_revs = res["nb-revs-missing"]
marmoute perf-helpers: add a search-discovery-case script...	r47505	undecided_common = res["nb-ini_und-common"]
		undecided_missing = res["nb-ini_und-missing"]
		if undecided_common == 0:
		return None
		if undecided_missing == 0:
		return None
marmoute search-discovery-case: display more information about the interresting case...	r49880	return (
		roundtrips,
		undecided_common,
		undecided_missing,
		total_revs,
		common_revs,
		missing_revs,
		)
marmoute perf-helpers: add a search-discovery-case script...	r47505

		def end(args, *kwargs):
		done.set()


		def format_case(case):
		return '-'.join(str(s) for s in case)


		signal.signal(signal.SIGINT, end)

		for i in range(JOB):
		threading.Thread(target=worker).start()

		nb_cases = 0
		while not done.is_set():
		repo = random.choice(repos)
		left = pick_one(repo)
		right = pick_one(repo)
		cases.put((repo, left, right))
		while not results.empty():
		# results has a single reader so this is fine
		c, res = results.get_nowait()
		boundary = interesting_boundary(res)
		if boundary is not None:
		print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)
		sys.stdout.flush()

		nb_cases += 1
		if not nb_cases % 100:
		print('[%d cases generated]' % nb_cases, file=sys.stderr)

		for i in range(JOB):
		try:
		cases.put_nowait(None)
		except queue.Full:
		pass

		print('[%d cases generated]' % nb_cases, file=sys.stderr)
		print('[ouput generation is over]' % nb_cases, file=sys.stderr)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages