Show More
@@ -0,0 +1,198 b'' | |||||
|
1 | #!/usr/bin/env python3 | |||
|
2 | # Search for interesting discovery instance | |||
|
3 | # | |||
|
4 | # search-discovery-case REPO [REPO]β¦ | |||
|
5 | # | |||
|
6 | # This use a subsetmaker extension (next to this script) to generate a steam of | |||
|
7 | # random discovery instance. When interesting case are discovered, information | |||
|
8 | # about them are print on the stdout. | |||
|
9 | from __future__ import print_function | |||
|
10 | ||||
|
11 | import json | |||
|
12 | import os | |||
|
13 | import queue | |||
|
14 | import random | |||
|
15 | import signal | |||
|
16 | import subprocess | |||
|
17 | import sys | |||
|
18 | import threading | |||
|
19 | ||||
|
20 | this_script = os.path.abspath(sys.argv[0]) | |||
|
21 | this_dir = os.path.dirname(this_script) | |||
|
22 | hg_dir = os.path.join(this_dir, '..', '..') | |||
|
23 | HG_REPO = os.path.normpath(hg_dir) | |||
|
24 | HG_BIN = os.path.join(HG_REPO, 'hg') | |||
|
25 | ||||
|
26 | JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8)) | |||
|
27 | ||||
|
28 | ||||
|
29 | SLICING = ('scratch', 'randomantichain', 'rev') | |||
|
30 | ||||
|
31 | ||||
|
32 | def nb_revs(repo_path): | |||
|
33 | cmd = [ | |||
|
34 | HG_BIN, | |||
|
35 | '--repository', | |||
|
36 | repo_path, | |||
|
37 | 'log', | |||
|
38 | '--template', | |||
|
39 | '{rev}', | |||
|
40 | '--rev', | |||
|
41 | 'tip', | |||
|
42 | ] | |||
|
43 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |||
|
44 | out, err = s.communicate() | |||
|
45 | return int(out) | |||
|
46 | ||||
|
47 | ||||
|
48 | repos = [] | |||
|
49 | for repo in sys.argv[1:]: | |||
|
50 | size = nb_revs(repo) | |||
|
51 | repos.append((repo, size)) | |||
|
52 | ||||
|
53 | ||||
|
54 | def pick_one(repo): | |||
|
55 | pick = random.choice(SLICING) | |||
|
56 | seed = random.randint(0, 100000) | |||
|
57 | if pick == 'scratch': | |||
|
58 | start = int(repo[1] * 0.3) | |||
|
59 | end = int(repo[1] * 0.7) | |||
|
60 | nb = random.randint(start, end) | |||
|
61 | return ('scratch', nb, seed) | |||
|
62 | elif pick == 'randomantichain': | |||
|
63 | return ('randomantichain', seed) | |||
|
64 | elif pick == 'rev': | |||
|
65 | start = int(repo[1] * 0.3) | |||
|
66 | end = int(repo[1]) | |||
|
67 | rev = random.randint(start, end) | |||
|
68 | return ('rev', rev) | |||
|
69 | else: | |||
|
70 | assert False | |||
|
71 | ||||
|
72 | ||||
|
73 | done = threading.Event() | |||
|
74 | cases = queue.Queue(maxsize=10 * JOB) | |||
|
75 | results = queue.Queue() | |||
|
76 | ||||
|
77 | ||||
|
78 | def worker(): | |||
|
79 | while not done.is_set(): | |||
|
80 | c = cases.get() | |||
|
81 | if c is None: | |||
|
82 | return | |||
|
83 | try: | |||
|
84 | res = process(c) | |||
|
85 | results.put((c, res)) | |||
|
86 | except Exception as exc: | |||
|
87 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |||
|
88 | c = (c[0], c[2], c[1]) | |||
|
89 | try: | |||
|
90 | res = process(c) | |||
|
91 | results.put((c, res)) | |||
|
92 | except Exception as exc: | |||
|
93 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |||
|
94 | ||||
|
95 | ||||
|
96 | SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py') | |||
|
97 | ||||
|
98 | ||||
|
99 | CMD_BASE = ( | |||
|
100 | HG_BIN, | |||
|
101 | 'debugdiscovery', | |||
|
102 | '--template', | |||
|
103 | 'json', | |||
|
104 | '--config', | |||
|
105 | 'extensions.subset=%s' % SUBSET_PATH, | |||
|
106 | ) | |||
|
107 | # '--local-as-revs "$left" --local-as-revs "$right"' | |||
|
108 | # > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt | |||
|
109 | # ) | |||
|
110 | ||||
|
111 | ||||
|
112 | def to_revsets(case): | |||
|
113 | t = case[0] | |||
|
114 | if t == 'scratch': | |||
|
115 | return 'not scratch(all(), %d, "%d")' % (case[1], case[2]) | |||
|
116 | elif t == 'randomantichain': | |||
|
117 | return '::randomantichain(all(), "%d")' % case[1] | |||
|
118 | elif t == 'rev': | |||
|
119 | return '::%d' % case[1] | |||
|
120 | else: | |||
|
121 | assert False | |||
|
122 | ||||
|
123 | ||||
|
124 | def process(case): | |||
|
125 | (repo, left, right) = case | |||
|
126 | cmd = list(CMD_BASE) | |||
|
127 | cmd.append('-R') | |||
|
128 | cmd.append(repo[0]) | |||
|
129 | cmd.append('--local-as-revs') | |||
|
130 | cmd.append(to_revsets(left)) | |||
|
131 | cmd.append('--remote-as-revs') | |||
|
132 | cmd.append(to_revsets(right)) | |||
|
133 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |||
|
134 | out, err = s.communicate() | |||
|
135 | return json.loads(out)[0] | |||
|
136 | ||||
|
137 | ||||
|
138 | def interesting_boundary(res): | |||
|
139 | """check if a case is interesting or not | |||
|
140 | ||||
|
141 | For now we are mostly interrested in case were we do multiple roundstrip | |||
|
142 | and where the boundary is somewhere in the middle of the undecided set. | |||
|
143 | ||||
|
144 | Ideally, we would make this configurable, but this is not a focus for now | |||
|
145 | ||||
|
146 | return None or (round-trip, undecided-common, undecided-missing) | |||
|
147 | """ | |||
|
148 | roundtrips = res["total-roundtrips"] | |||
|
149 | if roundtrips <= 1: | |||
|
150 | return None | |||
|
151 | undecided_common = res["nb-ini_und-common"] | |||
|
152 | undecided_missing = res["nb-ini_und-missing"] | |||
|
153 | if undecided_common == 0: | |||
|
154 | return None | |||
|
155 | if undecided_missing == 0: | |||
|
156 | return None | |||
|
157 | return (roundtrips, undecided_common, undecided_missing) | |||
|
158 | ||||
|
159 | ||||
|
160 | def end(*args, **kwargs): | |||
|
161 | done.set() | |||
|
162 | ||||
|
163 | ||||
|
164 | def format_case(case): | |||
|
165 | return '-'.join(str(s) for s in case) | |||
|
166 | ||||
|
167 | ||||
|
168 | signal.signal(signal.SIGINT, end) | |||
|
169 | ||||
|
170 | for i in range(JOB): | |||
|
171 | threading.Thread(target=worker).start() | |||
|
172 | ||||
|
173 | nb_cases = 0 | |||
|
174 | while not done.is_set(): | |||
|
175 | repo = random.choice(repos) | |||
|
176 | left = pick_one(repo) | |||
|
177 | right = pick_one(repo) | |||
|
178 | cases.put((repo, left, right)) | |||
|
179 | while not results.empty(): | |||
|
180 | # results has a single reader so this is fine | |||
|
181 | c, res = results.get_nowait() | |||
|
182 | boundary = interesting_boundary(res) | |||
|
183 | if boundary is not None: | |||
|
184 | print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary) | |||
|
185 | sys.stdout.flush() | |||
|
186 | ||||
|
187 | nb_cases += 1 | |||
|
188 | if not nb_cases % 100: | |||
|
189 | print('[%d cases generated]' % nb_cases, file=sys.stderr) | |||
|
190 | ||||
|
191 | for i in range(JOB): | |||
|
192 | try: | |||
|
193 | cases.put_nowait(None) | |||
|
194 | except queue.Full: | |||
|
195 | pass | |||
|
196 | ||||
|
197 | print('[%d cases generated]' % nb_cases, file=sys.stderr) | |||
|
198 | print('[ouput generation is over]' % nb_cases, file=sys.stderr) |
General Comments 0
You need to be logged in to leave comments.
Login now