Show More
@@ -1,197 +1,207 b'' | |||
|
1 | 1 | #!/usr/bin/env python3 |
|
2 | 2 | # Search for interesting discovery instance |
|
3 | 3 | # |
|
4 | 4 | # search-discovery-case REPO [REPO]… |
|
5 | 5 | # |
|
6 | 6 | # This use a subsetmaker extension (next to this script) to generate a steam of |
|
7 | 7 | # random discovery instance. When interesting case are discovered, information |
|
8 | 8 | # about them are print on the stdout. |
|
9 | 9 | |
|
10 | 10 | import json |
|
11 | 11 | import os |
|
12 | 12 | import queue |
|
13 | 13 | import random |
|
14 | 14 | import signal |
|
15 | 15 | import subprocess |
|
16 | 16 | import sys |
|
17 | 17 | import threading |
|
18 | 18 | |
|
19 | 19 | this_script = os.path.abspath(sys.argv[0]) |
|
20 | 20 | this_dir = os.path.dirname(this_script) |
|
21 | 21 | hg_dir = os.path.join(this_dir, '..', '..') |
|
22 | 22 | HG_REPO = os.path.normpath(hg_dir) |
|
23 | 23 | HG_BIN = os.path.join(HG_REPO, 'hg') |
|
24 | 24 | |
|
25 | 25 | JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8)) |
|
26 | 26 | |
|
27 | 27 | |
|
28 | 28 | SLICING = ('scratch', 'randomantichain', 'rev') |
|
29 | 29 | |
|
30 | 30 | |
|
31 | 31 | def nb_revs(repo_path): |
|
32 | 32 | cmd = [ |
|
33 | 33 | HG_BIN, |
|
34 | 34 | '--repository', |
|
35 | 35 | repo_path, |
|
36 | 36 | 'log', |
|
37 | 37 | '--template', |
|
38 | 38 | '{rev}', |
|
39 | 39 | '--rev', |
|
40 | 40 | 'tip', |
|
41 | 41 | ] |
|
42 | 42 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
|
43 | 43 | out, err = s.communicate() |
|
44 | 44 | return int(out) |
|
45 | 45 | |
|
46 | 46 | |
|
47 | 47 | repos = [] |
|
48 | 48 | for repo in sys.argv[1:]: |
|
49 | 49 | size = nb_revs(repo) |
|
50 | 50 | repos.append((repo, size)) |
|
51 | 51 | |
|
52 | 52 | |
|
53 | 53 | def pick_one(repo): |
|
54 | 54 | pick = random.choice(SLICING) |
|
55 | 55 | seed = random.randint(0, 100000) |
|
56 | 56 | if pick == 'scratch': |
|
57 | 57 | start = int(repo[1] * 0.3) |
|
58 | 58 | end = int(repo[1] * 0.7) |
|
59 | 59 | nb = random.randint(start, end) |
|
60 | 60 | return ('scratch', nb, seed) |
|
61 | 61 | elif pick == 'randomantichain': |
|
62 | 62 | return ('randomantichain', seed) |
|
63 | 63 | elif pick == 'rev': |
|
64 | 64 | start = int(repo[1] * 0.3) |
|
65 | 65 | end = int(repo[1]) |
|
66 | 66 | rev = random.randint(start, end) |
|
67 | 67 | return ('rev', rev) |
|
68 | 68 | else: |
|
69 | 69 | assert False |
|
70 | 70 | |
|
71 | 71 | |
|
72 | 72 | done = threading.Event() |
|
73 | 73 | cases = queue.Queue(maxsize=10 * JOB) |
|
74 | 74 | results = queue.Queue() |
|
75 | 75 | |
|
76 | 76 | |
|
77 | 77 | def worker(): |
|
78 | 78 | while not done.is_set(): |
|
79 | 79 | c = cases.get() |
|
80 | 80 | if c is None: |
|
81 | 81 | return |
|
82 | 82 | try: |
|
83 | 83 | res = process(c) |
|
84 | 84 | results.put((c, res)) |
|
85 | 85 | except Exception as exc: |
|
86 | 86 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) |
|
87 | 87 | c = (c[0], c[2], c[1]) |
|
88 | 88 | try: |
|
89 | 89 | res = process(c) |
|
90 | 90 | results.put((c, res)) |
|
91 | 91 | except Exception as exc: |
|
92 | 92 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) |
|
93 | 93 | |
|
94 | 94 | |
|
95 | 95 | SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py') |
|
96 | 96 | |
|
97 | 97 | |
|
98 | 98 | CMD_BASE = ( |
|
99 | 99 | HG_BIN, |
|
100 | 100 | 'debugdiscovery', |
|
101 | 101 | '--template', |
|
102 | 102 | 'json', |
|
103 | 103 | '--config', |
|
104 | 104 | 'extensions.subset=%s' % SUBSET_PATH, |
|
105 | 105 | ) |
|
106 | 106 | # '--local-as-revs "$left" --local-as-revs "$right"' |
|
107 | 107 | # > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt |
|
108 | 108 | # ) |
|
109 | 109 | |
|
110 | 110 | |
|
111 | 111 | def to_revsets(case): |
|
112 | 112 | t = case[0] |
|
113 | 113 | if t == 'scratch': |
|
114 | 114 | return 'not scratch(all(), %d, "%d")' % (case[1], case[2]) |
|
115 | 115 | elif t == 'randomantichain': |
|
116 | 116 | return '::randomantichain(all(), "%d")' % case[1] |
|
117 | 117 | elif t == 'rev': |
|
118 | 118 | return '::%d' % case[1] |
|
119 | 119 | else: |
|
120 | 120 | assert False |
|
121 | 121 | |
|
122 | 122 | |
|
123 | 123 | def process(case): |
|
124 | 124 | (repo, left, right) = case |
|
125 | 125 | cmd = list(CMD_BASE) |
|
126 | 126 | cmd.append('-R') |
|
127 | 127 | cmd.append(repo[0]) |
|
128 | 128 | cmd.append('--local-as-revs') |
|
129 | 129 | cmd.append(to_revsets(left)) |
|
130 | 130 | cmd.append('--remote-as-revs') |
|
131 | 131 | cmd.append(to_revsets(right)) |
|
132 | 132 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
|
133 | 133 | out, err = s.communicate() |
|
134 | 134 | return json.loads(out)[0] |
|
135 | 135 | |
|
136 | 136 | |
|
137 | 137 | def interesting_boundary(res): |
|
138 | 138 | """check if a case is interesting or not |
|
139 | 139 | |
|
140 | 140 | For now we are mostly interrested in case were we do multiple roundstrip |
|
141 | 141 | and where the boundary is somewhere in the middle of the undecided set. |
|
142 | 142 | |
|
143 | 143 | Ideally, we would make this configurable, but this is not a focus for now |
|
144 | 144 | |
|
145 | 145 | return None or (round-trip, undecided-common, undecided-missing) |
|
146 | 146 | """ |
|
147 | 147 | roundtrips = res["total-roundtrips"] |
|
148 | 148 | if roundtrips <= 1: |
|
149 | 149 | return None |
|
150 | total_revs = res["nb-revs"] | |
|
151 | common_revs = res["nb-revs-common"] | |
|
152 | missing_revs = res["nb-revs-missing"] | |
|
150 | 153 | undecided_common = res["nb-ini_und-common"] |
|
151 | 154 | undecided_missing = res["nb-ini_und-missing"] |
|
152 | 155 | if undecided_common == 0: |
|
153 | 156 | return None |
|
154 | 157 | if undecided_missing == 0: |
|
155 | 158 | return None |
|
156 | return (roundtrips, undecided_common, undecided_missing) | |
|
159 | return ( | |
|
160 | roundtrips, | |
|
161 | undecided_common, | |
|
162 | undecided_missing, | |
|
163 | total_revs, | |
|
164 | common_revs, | |
|
165 | missing_revs, | |
|
166 | ) | |
|
157 | 167 | |
|
158 | 168 | |
|
159 | 169 | def end(*args, **kwargs): |
|
160 | 170 | done.set() |
|
161 | 171 | |
|
162 | 172 | |
|
163 | 173 | def format_case(case): |
|
164 | 174 | return '-'.join(str(s) for s in case) |
|
165 | 175 | |
|
166 | 176 | |
|
167 | 177 | signal.signal(signal.SIGINT, end) |
|
168 | 178 | |
|
169 | 179 | for i in range(JOB): |
|
170 | 180 | threading.Thread(target=worker).start() |
|
171 | 181 | |
|
172 | 182 | nb_cases = 0 |
|
173 | 183 | while not done.is_set(): |
|
174 | 184 | repo = random.choice(repos) |
|
175 | 185 | left = pick_one(repo) |
|
176 | 186 | right = pick_one(repo) |
|
177 | 187 | cases.put((repo, left, right)) |
|
178 | 188 | while not results.empty(): |
|
179 | 189 | # results has a single reader so this is fine |
|
180 | 190 | c, res = results.get_nowait() |
|
181 | 191 | boundary = interesting_boundary(res) |
|
182 | 192 | if boundary is not None: |
|
183 | 193 | print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary) |
|
184 | 194 | sys.stdout.flush() |
|
185 | 195 | |
|
186 | 196 | nb_cases += 1 |
|
187 | 197 | if not nb_cases % 100: |
|
188 | 198 | print('[%d cases generated]' % nb_cases, file=sys.stderr) |
|
189 | 199 | |
|
190 | 200 | for i in range(JOB): |
|
191 | 201 | try: |
|
192 | 202 | cases.put_nowait(None) |
|
193 | 203 | except queue.Full: |
|
194 | 204 | pass |
|
195 | 205 | |
|
196 | 206 | print('[%d cases generated]' % nb_cases, file=sys.stderr) |
|
197 | 207 | print('[ouput generation is over]' % nb_cases, file=sys.stderr) |
General Comments 0
You need to be logged in to leave comments.
Login now