Show More
@@ -0,0 +1,198 b'' | |||
|
1 | #!/usr/bin/env python3 | |
|
2 | # Search for interesting discovery instance | |
|
3 | # | |
|
4 | # search-discovery-case REPO [REPO]… | |
|
5 | # | |
|
6 | # This use a subsetmaker extension (next to this script) to generate a steam of | |
|
7 | # random discovery instance. When interesting case are discovered, information | |
|
8 | # about them are print on the stdout. | |
|
9 | from __future__ import print_function | |
|
10 | ||
|
11 | import json | |
|
12 | import os | |
|
13 | import queue | |
|
14 | import random | |
|
15 | import signal | |
|
16 | import subprocess | |
|
17 | import sys | |
|
18 | import threading | |
|
19 | ||
|
20 | this_script = os.path.abspath(sys.argv[0]) | |
|
21 | this_dir = os.path.dirname(this_script) | |
|
22 | hg_dir = os.path.join(this_dir, '..', '..') | |
|
23 | HG_REPO = os.path.normpath(hg_dir) | |
|
24 | HG_BIN = os.path.join(HG_REPO, 'hg') | |
|
25 | ||
|
26 | JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8)) | |
|
27 | ||
|
28 | ||
|
29 | SLICING = ('scratch', 'randomantichain', 'rev') | |
|
30 | ||
|
31 | ||
|
32 | def nb_revs(repo_path): | |
|
33 | cmd = [ | |
|
34 | HG_BIN, | |
|
35 | '--repository', | |
|
36 | repo_path, | |
|
37 | 'log', | |
|
38 | '--template', | |
|
39 | '{rev}', | |
|
40 | '--rev', | |
|
41 | 'tip', | |
|
42 | ] | |
|
43 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
|
44 | out, err = s.communicate() | |
|
45 | return int(out) | |
|
46 | ||
|
47 | ||
|
48 | repos = [] | |
|
49 | for repo in sys.argv[1:]: | |
|
50 | size = nb_revs(repo) | |
|
51 | repos.append((repo, size)) | |
|
52 | ||
|
53 | ||
|
54 | def pick_one(repo): | |
|
55 | pick = random.choice(SLICING) | |
|
56 | seed = random.randint(0, 100000) | |
|
57 | if pick == 'scratch': | |
|
58 | start = int(repo[1] * 0.3) | |
|
59 | end = int(repo[1] * 0.7) | |
|
60 | nb = random.randint(start, end) | |
|
61 | return ('scratch', nb, seed) | |
|
62 | elif pick == 'randomantichain': | |
|
63 | return ('randomantichain', seed) | |
|
64 | elif pick == 'rev': | |
|
65 | start = int(repo[1] * 0.3) | |
|
66 | end = int(repo[1]) | |
|
67 | rev = random.randint(start, end) | |
|
68 | return ('rev', rev) | |
|
69 | else: | |
|
70 | assert False | |
|
71 | ||
|
72 | ||
|
73 | done = threading.Event() | |
|
74 | cases = queue.Queue(maxsize=10 * JOB) | |
|
75 | results = queue.Queue() | |
|
76 | ||
|
77 | ||
|
78 | def worker(): | |
|
79 | while not done.is_set(): | |
|
80 | c = cases.get() | |
|
81 | if c is None: | |
|
82 | return | |
|
83 | try: | |
|
84 | res = process(c) | |
|
85 | results.put((c, res)) | |
|
86 | except Exception as exc: | |
|
87 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |
|
88 | c = (c[0], c[2], c[1]) | |
|
89 | try: | |
|
90 | res = process(c) | |
|
91 | results.put((c, res)) | |
|
92 | except Exception as exc: | |
|
93 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |
|
94 | ||
|
95 | ||
|
96 | SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py') | |
|
97 | ||
|
98 | ||
|
99 | CMD_BASE = ( | |
|
100 | HG_BIN, | |
|
101 | 'debugdiscovery', | |
|
102 | '--template', | |
|
103 | 'json', | |
|
104 | '--config', | |
|
105 | 'extensions.subset=%s' % SUBSET_PATH, | |
|
106 | ) | |
|
107 | # '--local-as-revs "$left" --local-as-revs "$right"' | |
|
108 | # > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt | |
|
109 | # ) | |
|
110 | ||
|
111 | ||
|
112 | def to_revsets(case): | |
|
113 | t = case[0] | |
|
114 | if t == 'scratch': | |
|
115 | return 'not scratch(all(), %d, "%d")' % (case[1], case[2]) | |
|
116 | elif t == 'randomantichain': | |
|
117 | return '::randomantichain(all(), "%d")' % case[1] | |
|
118 | elif t == 'rev': | |
|
119 | return '::%d' % case[1] | |
|
120 | else: | |
|
121 | assert False | |
|
122 | ||
|
123 | ||
|
124 | def process(case): | |
|
125 | (repo, left, right) = case | |
|
126 | cmd = list(CMD_BASE) | |
|
127 | cmd.append('-R') | |
|
128 | cmd.append(repo[0]) | |
|
129 | cmd.append('--local-as-revs') | |
|
130 | cmd.append(to_revsets(left)) | |
|
131 | cmd.append('--remote-as-revs') | |
|
132 | cmd.append(to_revsets(right)) | |
|
133 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
|
134 | out, err = s.communicate() | |
|
135 | return json.loads(out)[0] | |
|
136 | ||
|
137 | ||
|
138 | def interesting_boundary(res): | |
|
139 | """check if a case is interesting or not | |
|
140 | ||
|
141 | For now we are mostly interrested in case were we do multiple roundstrip | |
|
142 | and where the boundary is somewhere in the middle of the undecided set. | |
|
143 | ||
|
144 | Ideally, we would make this configurable, but this is not a focus for now | |
|
145 | ||
|
146 | return None or (round-trip, undecided-common, undecided-missing) | |
|
147 | """ | |
|
148 | roundtrips = res["total-roundtrips"] | |
|
149 | if roundtrips <= 1: | |
|
150 | return None | |
|
151 | undecided_common = res["nb-ini_und-common"] | |
|
152 | undecided_missing = res["nb-ini_und-missing"] | |
|
153 | if undecided_common == 0: | |
|
154 | return None | |
|
155 | if undecided_missing == 0: | |
|
156 | return None | |
|
157 | return (roundtrips, undecided_common, undecided_missing) | |
|
158 | ||
|
159 | ||
|
160 | def end(*args, **kwargs): | |
|
161 | done.set() | |
|
162 | ||
|
163 | ||
|
164 | def format_case(case): | |
|
165 | return '-'.join(str(s) for s in case) | |
|
166 | ||
|
167 | ||
|
168 | signal.signal(signal.SIGINT, end) | |
|
169 | ||
|
170 | for i in range(JOB): | |
|
171 | threading.Thread(target=worker).start() | |
|
172 | ||
|
173 | nb_cases = 0 | |
|
174 | while not done.is_set(): | |
|
175 | repo = random.choice(repos) | |
|
176 | left = pick_one(repo) | |
|
177 | right = pick_one(repo) | |
|
178 | cases.put((repo, left, right)) | |
|
179 | while not results.empty(): | |
|
180 | # results has a single reader so this is fine | |
|
181 | c, res = results.get_nowait() | |
|
182 | boundary = interesting_boundary(res) | |
|
183 | if boundary is not None: | |
|
184 | print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary) | |
|
185 | sys.stdout.flush() | |
|
186 | ||
|
187 | nb_cases += 1 | |
|
188 | if not nb_cases % 100: | |
|
189 | print('[%d cases generated]' % nb_cases, file=sys.stderr) | |
|
190 | ||
|
191 | for i in range(JOB): | |
|
192 | try: | |
|
193 | cases.put_nowait(None) | |
|
194 | except queue.Full: | |
|
195 | pass | |
|
196 | ||
|
197 | print('[%d cases generated]' % nb_cases, file=sys.stderr) | |
|
198 | print('[ouput generation is over]' % nb_cases, file=sys.stderr) |
General Comments 0
You need to be logged in to leave comments.
Login now