upstream/mercurial-mirror Files · contrib/perf-utils/search-discovery-case

contrib: add a partial-merge tool for sorted lists (such as Python imports)...

contrib: add a partial-merge tool for sorted lists (such as Python imports) This is a pretty naive tool that uses a regular expression for matching lines. It is based on a Google-internal tool that worked in a similar way. For now, the regular expression is hard-coded to attempt to match single-line Python imports. The only commit I've found in the hg core repo where the tool helped was commit 9cd6292abfdf. I think that's because we often use multiple imports per import statement. I think this tool is still a decent first step (especially once the regex is made configurable in the next patch). The merging should ideally use a proper Python parser and do the merge at the AST (or CST?) level, but that's significantly harder, especially if you want to preserve comments and whitespace. It's also less generic. Differential Revision: https://phab.mercurial-scm.org/D12380

Gregory Szorc - - Load All Authors

File last commit:

r49730:6000f5b2 default


                r49874:681b25ea

default

Download file

             search-discovery-case
        
                    197 lines
            
             | 4.9 KiB
            
                | text/plain
            
             |
                TextLexer

/ contrib / perf-utils / search-discovery-case

History | Annotation | Raw |Copy content |Copy permalink

				#!/usr/bin/env python3
				# Search for interesting discovery instance
				#
				# search-discovery-case REPO [REPO]…
				#
				# This use a subsetmaker extension (next to this script) to generate a steam of
				# random discovery instance. When interesting case are discovered, information
				# about them are print on the stdout.

				import json
				import os
				import queue
				import random
				import signal
				import subprocess
				import sys
				import threading

				this_script = os.path.abspath(sys.argv[0])
				this_dir = os.path.dirname(this_script)
				hg_dir = os.path.join(this_dir, '..', '..')
				HG_REPO = os.path.normpath(hg_dir)
				HG_BIN = os.path.join(HG_REPO, 'hg')

				JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))


				SLICING = ('scratch', 'randomantichain', 'rev')


				def nb_revs(repo_path):
				cmd = [
				HG_BIN,
				'--repository',
				repo_path,
				'log',
				'--template',
				'{rev}',
				'--rev',
				'tip',
				]
				s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
				out, err = s.communicate()
				return int(out)


				repos = []
				for repo in sys.argv[1:]:
				size = nb_revs(repo)
				repos.append((repo, size))


				def pick_one(repo):
				pick = random.choice(SLICING)
				seed = random.randint(0, 100000)
				if pick == 'scratch':
				start = int(repo[1] * 0.3)
				end = int(repo[1] * 0.7)
				nb = random.randint(start, end)
				return ('scratch', nb, seed)
				elif pick == 'randomantichain':
				return ('randomantichain', seed)
				elif pick == 'rev':
				start = int(repo[1] * 0.3)
				end = int(repo[1])
				rev = random.randint(start, end)
				return ('rev', rev)
				else:
				assert False


				done = threading.Event()
				cases = queue.Queue(maxsize=10 * JOB)
				results = queue.Queue()


				def worker():
				while not done.is_set():
				c = cases.get()
				if c is None:
				return
				try:
				res = process(c)
				results.put((c, res))
				except Exception as exc:
				print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
				c = (c[0], c[2], c[1])
				try:
				res = process(c)
				results.put((c, res))
				except Exception as exc:
				print('processing-failed: %s %s' % (c, exc), file=sys.stderr)


				SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')


				CMD_BASE = (
				HG_BIN,
				'debugdiscovery',
				'--template',
				'json',
				'--config',
				'extensions.subset=%s' % SUBSET_PATH,
				)
				# '--local-as-revs "$left" --local-as-revs "$right"'
				# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
				# )


				def to_revsets(case):
				t = case[0]
				if t == 'scratch':
				return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
				elif t == 'randomantichain':
				return '::randomantichain(all(), "%d")' % case[1]
				elif t == 'rev':
				return '::%d' % case[1]
				else:
				assert False


				def process(case):
				(repo, left, right) = case
				cmd = list(CMD_BASE)
				cmd.append('-R')
				cmd.append(repo[0])
				cmd.append('--local-as-revs')
				cmd.append(to_revsets(left))
				cmd.append('--remote-as-revs')
				cmd.append(to_revsets(right))
				s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
				out, err = s.communicate()
				return json.loads(out)[0]


				def interesting_boundary(res):
				"""check if a case is interesting or not

				For now we are mostly interrested in case were we do multiple roundstrip
				and where the boundary is somewhere in the middle of the undecided set.

				Ideally, we would make this configurable, but this is not a focus for now

				return None or (round-trip, undecided-common, undecided-missing)
				"""
				roundtrips = res["total-roundtrips"]
				if roundtrips <= 1:
				return None
				undecided_common = res["nb-ini_und-common"]
				undecided_missing = res["nb-ini_und-missing"]
				if undecided_common == 0:
				return None
				if undecided_missing == 0:
				return None
				return (roundtrips, undecided_common, undecided_missing)


				def end(args, *kwargs):
				done.set()


				def format_case(case):
				return '-'.join(str(s) for s in case)


				signal.signal(signal.SIGINT, end)

				for i in range(JOB):
				threading.Thread(target=worker).start()

				nb_cases = 0
				while not done.is_set():
				repo = random.choice(repos)
				left = pick_one(repo)
				right = pick_one(repo)
				cases.put((repo, left, right))
				while not results.empty():
				# results has a single reader so this is fine
				c, res = results.get_nowait()
				boundary = interesting_boundary(res)
				if boundary is not None:
				print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)
				sys.stdout.flush()

				nb_cases += 1
				if not nb_cases % 100:
				print('[%d cases generated]' % nb_cases, file=sys.stderr)

				for i in range(JOB):
				try:
				cases.put_nowait(None)
				except queue.Full:
				pass

				print('[%d cases generated]' % nb_cases, file=sys.stderr)
				print('[ouput generation is over]' % nb_cases, file=sys.stderr)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages