|
|
#!/usr/bin/env python3
|
|
|
# Search for interesting discovery instance
|
|
|
#
|
|
|
# search-discovery-case REPO [REPO]…
|
|
|
#
|
|
|
# This use a subsetmaker extension (next to this script) to generate a steam of
|
|
|
# random discovery instance. When interesting case are discovered, information
|
|
|
# about them are print on the stdout.
|
|
|
|
|
|
import json
|
|
|
import os
|
|
|
import queue
|
|
|
import random
|
|
|
import signal
|
|
|
import subprocess
|
|
|
import sys
|
|
|
import threading
|
|
|
|
|
|
this_script = os.path.abspath(sys.argv[0])
|
|
|
this_dir = os.path.dirname(this_script)
|
|
|
hg_dir = os.path.join(this_dir, '..', '..')
|
|
|
HG_REPO = os.path.normpath(hg_dir)
|
|
|
HG_BIN = os.path.join(HG_REPO, 'hg')
|
|
|
|
|
|
JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))
|
|
|
|
|
|
|
|
|
SLICING = ('scratch', 'randomantichain', 'rev')
|
|
|
|
|
|
|
|
|
def nb_revs(repo_path):
|
|
|
cmd = [
|
|
|
HG_BIN,
|
|
|
'--repository',
|
|
|
repo_path,
|
|
|
'log',
|
|
|
'--template',
|
|
|
'{rev}',
|
|
|
'--rev',
|
|
|
'tip',
|
|
|
]
|
|
|
s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
|
|
out, err = s.communicate()
|
|
|
return int(out)
|
|
|
|
|
|
|
|
|
repos = []
|
|
|
for repo in sys.argv[1:]:
|
|
|
size = nb_revs(repo)
|
|
|
repos.append((repo, size))
|
|
|
|
|
|
|
|
|
def pick_one(repo):
|
|
|
pick = random.choice(SLICING)
|
|
|
seed = random.randint(0, 100000)
|
|
|
if pick == 'scratch':
|
|
|
start = int(repo[1] * 0.3)
|
|
|
end = int(repo[1] * 0.7)
|
|
|
nb = random.randint(start, end)
|
|
|
return ('scratch', nb, seed)
|
|
|
elif pick == 'randomantichain':
|
|
|
return ('randomantichain', seed)
|
|
|
elif pick == 'rev':
|
|
|
start = int(repo[1] * 0.3)
|
|
|
end = int(repo[1])
|
|
|
rev = random.randint(start, end)
|
|
|
return ('rev', rev)
|
|
|
else:
|
|
|
assert False
|
|
|
|
|
|
|
|
|
done = threading.Event()
|
|
|
cases = queue.Queue(maxsize=10 * JOB)
|
|
|
results = queue.Queue()
|
|
|
|
|
|
|
|
|
def worker():
|
|
|
while not done.is_set():
|
|
|
c = cases.get()
|
|
|
if c is None:
|
|
|
return
|
|
|
try:
|
|
|
res = process(c)
|
|
|
results.put((c, res))
|
|
|
except Exception as exc:
|
|
|
print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
|
|
|
c = (c[0], c[2], c[1])
|
|
|
try:
|
|
|
res = process(c)
|
|
|
results.put((c, res))
|
|
|
except Exception as exc:
|
|
|
print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
|
|
|
|
|
|
|
|
|
SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')
|
|
|
|
|
|
|
|
|
CMD_BASE = (
|
|
|
HG_BIN,
|
|
|
'debugdiscovery',
|
|
|
'--template',
|
|
|
'json',
|
|
|
'--config',
|
|
|
'extensions.subset=%s' % SUBSET_PATH,
|
|
|
)
|
|
|
# '--local-as-revs "$left" --local-as-revs "$right"'
|
|
|
# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
|
|
|
# )
|
|
|
|
|
|
|
|
|
def to_revsets(case):
|
|
|
t = case[0]
|
|
|
if t == 'scratch':
|
|
|
return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
|
|
|
elif t == 'randomantichain':
|
|
|
return '::randomantichain(all(), "%d")' % case[1]
|
|
|
elif t == 'rev':
|
|
|
return '::%d' % case[1]
|
|
|
else:
|
|
|
assert False
|
|
|
|
|
|
|
|
|
def process(case):
|
|
|
(repo, left, right) = case
|
|
|
cmd = list(CMD_BASE)
|
|
|
cmd.append('-R')
|
|
|
cmd.append(repo[0])
|
|
|
cmd.append('--local-as-revs')
|
|
|
cmd.append(to_revsets(left))
|
|
|
cmd.append('--remote-as-revs')
|
|
|
cmd.append(to_revsets(right))
|
|
|
s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
|
|
out, err = s.communicate()
|
|
|
return json.loads(out)[0]
|
|
|
|
|
|
|
|
|
def interesting_boundary(res):
|
|
|
"""check if a case is interesting or not
|
|
|
|
|
|
For now we are mostly interrested in case were we do multiple roundstrip
|
|
|
and where the boundary is somewhere in the middle of the undecided set.
|
|
|
|
|
|
Ideally, we would make this configurable, but this is not a focus for now
|
|
|
|
|
|
return None or (round-trip, undecided-common, undecided-missing)
|
|
|
"""
|
|
|
roundtrips = res["total-roundtrips"]
|
|
|
if roundtrips <= 1:
|
|
|
return None
|
|
|
undecided_common = res["nb-ini_und-common"]
|
|
|
undecided_missing = res["nb-ini_und-missing"]
|
|
|
if undecided_common == 0:
|
|
|
return None
|
|
|
if undecided_missing == 0:
|
|
|
return None
|
|
|
return (roundtrips, undecided_common, undecided_missing)
|
|
|
|
|
|
|
|
|
def end(*args, **kwargs):
|
|
|
done.set()
|
|
|
|
|
|
|
|
|
def format_case(case):
|
|
|
return '-'.join(str(s) for s in case)
|
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, end)
|
|
|
|
|
|
for i in range(JOB):
|
|
|
threading.Thread(target=worker).start()
|
|
|
|
|
|
nb_cases = 0
|
|
|
while not done.is_set():
|
|
|
repo = random.choice(repos)
|
|
|
left = pick_one(repo)
|
|
|
right = pick_one(repo)
|
|
|
cases.put((repo, left, right))
|
|
|
while not results.empty():
|
|
|
# results has a single reader so this is fine
|
|
|
c, res = results.get_nowait()
|
|
|
boundary = interesting_boundary(res)
|
|
|
if boundary is not None:
|
|
|
print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
nb_cases += 1
|
|
|
if not nb_cases % 100:
|
|
|
print('[%d cases generated]' % nb_cases, file=sys.stderr)
|
|
|
|
|
|
for i in range(JOB):
|
|
|
try:
|
|
|
cases.put_nowait(None)
|
|
|
except queue.Full:
|
|
|
pass
|
|
|
|
|
|
print('[%d cases generated]' % nb_cases, file=sys.stderr)
|
|
|
print('[ouput generation is over]' % nb_cases, file=sys.stderr)
|
|
|
|