Show More
@@ -1,197 +1,207 b'' | |||||
1 | #!/usr/bin/env python3 |
|
1 | #!/usr/bin/env python3 | |
2 | # Search for interesting discovery instance |
|
2 | # Search for interesting discovery instance | |
3 | # |
|
3 | # | |
4 | # search-discovery-case REPO [REPO]β¦ |
|
4 | # search-discovery-case REPO [REPO]β¦ | |
5 | # |
|
5 | # | |
6 | # This use a subsetmaker extension (next to this script) to generate a steam of |
|
6 | # This use a subsetmaker extension (next to this script) to generate a steam of | |
7 | # random discovery instance. When interesting case are discovered, information |
|
7 | # random discovery instance. When interesting case are discovered, information | |
8 | # about them are print on the stdout. |
|
8 | # about them are print on the stdout. | |
9 |
|
9 | |||
10 | import json |
|
10 | import json | |
11 | import os |
|
11 | import os | |
12 | import queue |
|
12 | import queue | |
13 | import random |
|
13 | import random | |
14 | import signal |
|
14 | import signal | |
15 | import subprocess |
|
15 | import subprocess | |
16 | import sys |
|
16 | import sys | |
17 | import threading |
|
17 | import threading | |
18 |
|
18 | |||
19 | this_script = os.path.abspath(sys.argv[0]) |
|
19 | this_script = os.path.abspath(sys.argv[0]) | |
20 | this_dir = os.path.dirname(this_script) |
|
20 | this_dir = os.path.dirname(this_script) | |
21 | hg_dir = os.path.join(this_dir, '..', '..') |
|
21 | hg_dir = os.path.join(this_dir, '..', '..') | |
22 | HG_REPO = os.path.normpath(hg_dir) |
|
22 | HG_REPO = os.path.normpath(hg_dir) | |
23 | HG_BIN = os.path.join(HG_REPO, 'hg') |
|
23 | HG_BIN = os.path.join(HG_REPO, 'hg') | |
24 |
|
24 | |||
25 | JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8)) |
|
25 | JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8)) | |
26 |
|
26 | |||
27 |
|
27 | |||
28 | SLICING = ('scratch', 'randomantichain', 'rev') |
|
28 | SLICING = ('scratch', 'randomantichain', 'rev') | |
29 |
|
29 | |||
30 |
|
30 | |||
31 | def nb_revs(repo_path): |
|
31 | def nb_revs(repo_path): | |
32 | cmd = [ |
|
32 | cmd = [ | |
33 | HG_BIN, |
|
33 | HG_BIN, | |
34 | '--repository', |
|
34 | '--repository', | |
35 | repo_path, |
|
35 | repo_path, | |
36 | 'log', |
|
36 | 'log', | |
37 | '--template', |
|
37 | '--template', | |
38 | '{rev}', |
|
38 | '{rev}', | |
39 | '--rev', |
|
39 | '--rev', | |
40 | 'tip', |
|
40 | 'tip', | |
41 | ] |
|
41 | ] | |
42 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
|
42 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
43 | out, err = s.communicate() |
|
43 | out, err = s.communicate() | |
44 | return int(out) |
|
44 | return int(out) | |
45 |
|
45 | |||
46 |
|
46 | |||
47 | repos = [] |
|
47 | repos = [] | |
48 | for repo in sys.argv[1:]: |
|
48 | for repo in sys.argv[1:]: | |
49 | size = nb_revs(repo) |
|
49 | size = nb_revs(repo) | |
50 | repos.append((repo, size)) |
|
50 | repos.append((repo, size)) | |
51 |
|
51 | |||
52 |
|
52 | |||
53 | def pick_one(repo): |
|
53 | def pick_one(repo): | |
54 | pick = random.choice(SLICING) |
|
54 | pick = random.choice(SLICING) | |
55 | seed = random.randint(0, 100000) |
|
55 | seed = random.randint(0, 100000) | |
56 | if pick == 'scratch': |
|
56 | if pick == 'scratch': | |
57 | start = int(repo[1] * 0.3) |
|
57 | start = int(repo[1] * 0.3) | |
58 | end = int(repo[1] * 0.7) |
|
58 | end = int(repo[1] * 0.7) | |
59 | nb = random.randint(start, end) |
|
59 | nb = random.randint(start, end) | |
60 | return ('scratch', nb, seed) |
|
60 | return ('scratch', nb, seed) | |
61 | elif pick == 'randomantichain': |
|
61 | elif pick == 'randomantichain': | |
62 | return ('randomantichain', seed) |
|
62 | return ('randomantichain', seed) | |
63 | elif pick == 'rev': |
|
63 | elif pick == 'rev': | |
64 | start = int(repo[1] * 0.3) |
|
64 | start = int(repo[1] * 0.3) | |
65 | end = int(repo[1]) |
|
65 | end = int(repo[1]) | |
66 | rev = random.randint(start, end) |
|
66 | rev = random.randint(start, end) | |
67 | return ('rev', rev) |
|
67 | return ('rev', rev) | |
68 | else: |
|
68 | else: | |
69 | assert False |
|
69 | assert False | |
70 |
|
70 | |||
71 |
|
71 | |||
72 | done = threading.Event() |
|
72 | done = threading.Event() | |
73 | cases = queue.Queue(maxsize=10 * JOB) |
|
73 | cases = queue.Queue(maxsize=10 * JOB) | |
74 | results = queue.Queue() |
|
74 | results = queue.Queue() | |
75 |
|
75 | |||
76 |
|
76 | |||
77 | def worker(): |
|
77 | def worker(): | |
78 | while not done.is_set(): |
|
78 | while not done.is_set(): | |
79 | c = cases.get() |
|
79 | c = cases.get() | |
80 | if c is None: |
|
80 | if c is None: | |
81 | return |
|
81 | return | |
82 | try: |
|
82 | try: | |
83 | res = process(c) |
|
83 | res = process(c) | |
84 | results.put((c, res)) |
|
84 | results.put((c, res)) | |
85 | except Exception as exc: |
|
85 | except Exception as exc: | |
86 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) |
|
86 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |
87 | c = (c[0], c[2], c[1]) |
|
87 | c = (c[0], c[2], c[1]) | |
88 | try: |
|
88 | try: | |
89 | res = process(c) |
|
89 | res = process(c) | |
90 | results.put((c, res)) |
|
90 | results.put((c, res)) | |
91 | except Exception as exc: |
|
91 | except Exception as exc: | |
92 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) |
|
92 | print('processing-failed: %s %s' % (c, exc), file=sys.stderr) | |
93 |
|
93 | |||
94 |
|
94 | |||
95 | SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py') |
|
95 | SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py') | |
96 |
|
96 | |||
97 |
|
97 | |||
98 | CMD_BASE = ( |
|
98 | CMD_BASE = ( | |
99 | HG_BIN, |
|
99 | HG_BIN, | |
100 | 'debugdiscovery', |
|
100 | 'debugdiscovery', | |
101 | '--template', |
|
101 | '--template', | |
102 | 'json', |
|
102 | 'json', | |
103 | '--config', |
|
103 | '--config', | |
104 | 'extensions.subset=%s' % SUBSET_PATH, |
|
104 | 'extensions.subset=%s' % SUBSET_PATH, | |
105 | ) |
|
105 | ) | |
106 | # '--local-as-revs "$left" --local-as-revs "$right"' |
|
106 | # '--local-as-revs "$left" --local-as-revs "$right"' | |
107 | # > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt |
|
107 | # > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt | |
108 | # ) |
|
108 | # ) | |
109 |
|
109 | |||
110 |
|
110 | |||
111 | def to_revsets(case): |
|
111 | def to_revsets(case): | |
112 | t = case[0] |
|
112 | t = case[0] | |
113 | if t == 'scratch': |
|
113 | if t == 'scratch': | |
114 | return 'not scratch(all(), %d, "%d")' % (case[1], case[2]) |
|
114 | return 'not scratch(all(), %d, "%d")' % (case[1], case[2]) | |
115 | elif t == 'randomantichain': |
|
115 | elif t == 'randomantichain': | |
116 | return '::randomantichain(all(), "%d")' % case[1] |
|
116 | return '::randomantichain(all(), "%d")' % case[1] | |
117 | elif t == 'rev': |
|
117 | elif t == 'rev': | |
118 | return '::%d' % case[1] |
|
118 | return '::%d' % case[1] | |
119 | else: |
|
119 | else: | |
120 | assert False |
|
120 | assert False | |
121 |
|
121 | |||
122 |
|
122 | |||
123 | def process(case): |
|
123 | def process(case): | |
124 | (repo, left, right) = case |
|
124 | (repo, left, right) = case | |
125 | cmd = list(CMD_BASE) |
|
125 | cmd = list(CMD_BASE) | |
126 | cmd.append('-R') |
|
126 | cmd.append('-R') | |
127 | cmd.append(repo[0]) |
|
127 | cmd.append(repo[0]) | |
128 | cmd.append('--local-as-revs') |
|
128 | cmd.append('--local-as-revs') | |
129 | cmd.append(to_revsets(left)) |
|
129 | cmd.append(to_revsets(left)) | |
130 | cmd.append('--remote-as-revs') |
|
130 | cmd.append('--remote-as-revs') | |
131 | cmd.append(to_revsets(right)) |
|
131 | cmd.append(to_revsets(right)) | |
132 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
|
132 | s = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
133 | out, err = s.communicate() |
|
133 | out, err = s.communicate() | |
134 | return json.loads(out)[0] |
|
134 | return json.loads(out)[0] | |
135 |
|
135 | |||
136 |
|
136 | |||
137 | def interesting_boundary(res): |
|
137 | def interesting_boundary(res): | |
138 | """check if a case is interesting or not |
|
138 | """check if a case is interesting or not | |
139 |
|
139 | |||
140 | For now we are mostly interrested in case were we do multiple roundstrip |
|
140 | For now we are mostly interrested in case were we do multiple roundstrip | |
141 | and where the boundary is somewhere in the middle of the undecided set. |
|
141 | and where the boundary is somewhere in the middle of the undecided set. | |
142 |
|
142 | |||
143 | Ideally, we would make this configurable, but this is not a focus for now |
|
143 | Ideally, we would make this configurable, but this is not a focus for now | |
144 |
|
144 | |||
145 | return None or (round-trip, undecided-common, undecided-missing) |
|
145 | return None or (round-trip, undecided-common, undecided-missing) | |
146 | """ |
|
146 | """ | |
147 | roundtrips = res["total-roundtrips"] |
|
147 | roundtrips = res["total-roundtrips"] | |
148 | if roundtrips <= 1: |
|
148 | if roundtrips <= 1: | |
149 | return None |
|
149 | return None | |
|
150 | total_revs = res["nb-revs"] | |||
|
151 | common_revs = res["nb-revs-common"] | |||
|
152 | missing_revs = res["nb-revs-missing"] | |||
150 | undecided_common = res["nb-ini_und-common"] |
|
153 | undecided_common = res["nb-ini_und-common"] | |
151 | undecided_missing = res["nb-ini_und-missing"] |
|
154 | undecided_missing = res["nb-ini_und-missing"] | |
152 | if undecided_common == 0: |
|
155 | if undecided_common == 0: | |
153 | return None |
|
156 | return None | |
154 | if undecided_missing == 0: |
|
157 | if undecided_missing == 0: | |
155 | return None |
|
158 | return None | |
156 | return (roundtrips, undecided_common, undecided_missing) |
|
159 | return ( | |
|
160 | roundtrips, | |||
|
161 | undecided_common, | |||
|
162 | undecided_missing, | |||
|
163 | total_revs, | |||
|
164 | common_revs, | |||
|
165 | missing_revs, | |||
|
166 | ) | |||
157 |
|
167 | |||
158 |
|
168 | |||
159 | def end(*args, **kwargs): |
|
169 | def end(*args, **kwargs): | |
160 | done.set() |
|
170 | done.set() | |
161 |
|
171 | |||
162 |
|
172 | |||
163 | def format_case(case): |
|
173 | def format_case(case): | |
164 | return '-'.join(str(s) for s in case) |
|
174 | return '-'.join(str(s) for s in case) | |
165 |
|
175 | |||
166 |
|
176 | |||
167 | signal.signal(signal.SIGINT, end) |
|
177 | signal.signal(signal.SIGINT, end) | |
168 |
|
178 | |||
169 | for i in range(JOB): |
|
179 | for i in range(JOB): | |
170 | threading.Thread(target=worker).start() |
|
180 | threading.Thread(target=worker).start() | |
171 |
|
181 | |||
172 | nb_cases = 0 |
|
182 | nb_cases = 0 | |
173 | while not done.is_set(): |
|
183 | while not done.is_set(): | |
174 | repo = random.choice(repos) |
|
184 | repo = random.choice(repos) | |
175 | left = pick_one(repo) |
|
185 | left = pick_one(repo) | |
176 | right = pick_one(repo) |
|
186 | right = pick_one(repo) | |
177 | cases.put((repo, left, right)) |
|
187 | cases.put((repo, left, right)) | |
178 | while not results.empty(): |
|
188 | while not results.empty(): | |
179 | # results has a single reader so this is fine |
|
189 | # results has a single reader so this is fine | |
180 | c, res = results.get_nowait() |
|
190 | c, res = results.get_nowait() | |
181 | boundary = interesting_boundary(res) |
|
191 | boundary = interesting_boundary(res) | |
182 | if boundary is not None: |
|
192 | if boundary is not None: | |
183 | print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary) |
|
193 | print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary) | |
184 | sys.stdout.flush() |
|
194 | sys.stdout.flush() | |
185 |
|
195 | |||
186 | nb_cases += 1 |
|
196 | nb_cases += 1 | |
187 | if not nb_cases % 100: |
|
197 | if not nb_cases % 100: | |
188 | print('[%d cases generated]' % nb_cases, file=sys.stderr) |
|
198 | print('[%d cases generated]' % nb_cases, file=sys.stderr) | |
189 |
|
199 | |||
190 | for i in range(JOB): |
|
200 | for i in range(JOB): | |
191 | try: |
|
201 | try: | |
192 | cases.put_nowait(None) |
|
202 | cases.put_nowait(None) | |
193 | except queue.Full: |
|
203 | except queue.Full: | |
194 | pass |
|
204 | pass | |
195 |
|
205 | |||
196 | print('[%d cases generated]' % nb_cases, file=sys.stderr) |
|
206 | print('[%d cases generated]' % nb_cases, file=sys.stderr) | |
197 | print('[ouput generation is over]' % nb_cases, file=sys.stderr) |
|
207 | print('[ouput generation is over]' % nb_cases, file=sys.stderr) |
General Comments 0
You need to be logged in to leave comments.
Login now