upstream/mercurial-mirror Commit - r49880:a2bd6b23

1

#!/usr/bin/env python3

1

#!/usr/bin/env python3

2

# Search for interesting discovery instance

2

# Search for interesting discovery instance

3

#

3

#

4

# search-discovery-case REPO [REPO]…

4

# search-discovery-case REPO [REPO]…

5

#

5

#

6

# This use a subsetmaker extension (next to this script) to generate a steam of

6

# This use a subsetmaker extension (next to this script) to generate a steam of

7

# random discovery instance. When interesting case are discovered, information

7

# random discovery instance. When interesting case are discovered, information

8

# about them are print on the stdout.

8

# about them are print on the stdout.

9

10

import json

10

import json

11

import os

11

import os

12

import queue

12

import queue

13

import random

13

import random

14

import signal

14

import signal

15

import subprocess

15

import subprocess

16

import sys

16

import sys

17

import threading

17

import threading

18

19

this_script = os.path.abspath(sys.argv[0])

19

this_script = os.path.abspath(sys.argv[0])

20

this_dir = os.path.dirname(this_script)

20

this_dir = os.path.dirname(this_script)

21

hg_dir = os.path.join(this_dir, '..', '..')

21

hg_dir = os.path.join(this_dir, '..', '..')

22

HG_REPO = os.path.normpath(hg_dir)

22

HG_REPO = os.path.normpath(hg_dir)

23

HG_BIN = os.path.join(HG_REPO, 'hg')

23

HG_BIN = os.path.join(HG_REPO, 'hg')

24

25

JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))

25

JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))

26

27

28

SLICING = ('scratch', 'randomantichain', 'rev')

28

SLICING = ('scratch', 'randomantichain', 'rev')

29

30

31

def nb_revs(repo_path):

31

def nb_revs(repo_path):

32

cmd = [

32

cmd = [

33

HG_BIN,

33

HG_BIN,

34

'--repository',

34

'--repository',

35

repo_path,

35

repo_path,

36

'log',

36

'log',

37

'--template',

37

'--template',

38

'{rev}',

38

'{rev}',

39

'--rev',

39

'--rev',

40

'tip',

40

'tip',

41

]

41

]

42

s = subprocess.Popen(cmd, stdout=subprocess.PIPE)

42

s = subprocess.Popen(cmd, stdout=subprocess.PIPE)

43

out, err = s.communicate()

43

out, err = s.communicate()

44

return int(out)

44

return int(out)

45

46

47

repos = []

47

repos = []

48

for repo in sys.argv[1:]:

48

for repo in sys.argv[1:]:

49

size = nb_revs(repo)

49

size = nb_revs(repo)

50

repos.append((repo, size))

50

repos.append((repo, size))

51

52

53

def pick_one(repo):

53

def pick_one(repo):

54

pick = random.choice(SLICING)

54

pick = random.choice(SLICING)

55

seed = random.randint(0, 100000)

55

seed = random.randint(0, 100000)

56

if pick == 'scratch':

56

if pick == 'scratch':

57

start = int(repo[1] * 0.3)

57

start = int(repo[1] * 0.3)

58

end = int(repo[1] * 0.7)

58

end = int(repo[1] * 0.7)

59

nb = random.randint(start, end)

59

nb = random.randint(start, end)

60

return ('scratch', nb, seed)

60

return ('scratch', nb, seed)

61

elif pick == 'randomantichain':

61

elif pick == 'randomantichain':

62

return ('randomantichain', seed)

62

return ('randomantichain', seed)

63

elif pick == 'rev':

63

elif pick == 'rev':

64

start = int(repo[1] * 0.3)

64

start = int(repo[1] * 0.3)

65

end = int(repo[1])

65

end = int(repo[1])

66

rev = random.randint(start, end)

66

rev = random.randint(start, end)

67

return ('rev', rev)

67

return ('rev', rev)

68

else:

68

else:

69

assert False

69

assert False

70

71

72

done = threading.Event()

72

done = threading.Event()

73

cases = queue.Queue(maxsize=10 * JOB)

73

cases = queue.Queue(maxsize=10 * JOB)

74

results = queue.Queue()

74

results = queue.Queue()

75

76

77

def worker():

77

def worker():

78

while not done.is_set():

78

while not done.is_set():

79

c = cases.get()

79

c = cases.get()

80

if c is None:

80

if c is None:

81

return

81

return

82

try:

82

try:

83

res = process(c)

83

res = process(c)

84

results.put((c, res))

84

results.put((c, res))

85

except Exception as exc:

85

except Exception as exc:

86

print('processing-failed: %s %s' % (c, exc), file=sys.stderr)

86

print('processing-failed: %s %s' % (c, exc), file=sys.stderr)

87

c = (c[0], c[2], c[1])

87

c = (c[0], c[2], c[1])

88

try:

88

try:

89

res = process(c)

89

res = process(c)

90

results.put((c, res))

90

results.put((c, res))

91

except Exception as exc:

91

except Exception as exc:

92

print('processing-failed: %s %s' % (c, exc), file=sys.stderr)

92

print('processing-failed: %s %s' % (c, exc), file=sys.stderr)

93

94

95

SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')

95

SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')

96

97

98

CMD_BASE = (

98

CMD_BASE = (

99

HG_BIN,

99

HG_BIN,

100

'debugdiscovery',

100

'debugdiscovery',

101

'--template',

101

'--template',

102

'json',

102

'json',

103

'--config',

103

'--config',

104

'extensions.subset=%s' % SUBSET_PATH,

104

'extensions.subset=%s' % SUBSET_PATH,

105

)

105

)

106

# '--local-as-revs "$left" --local-as-revs "$right"'

106

# '--local-as-revs "$left" --local-as-revs "$right"'

107

# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt

107

# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt

108

# )

108

# )

109

110

111

def to_revsets(case):

111

def to_revsets(case):

112

t = case[0]

112

t = case[0]

113

if t == 'scratch':

113

if t == 'scratch':

114

return 'not scratch(all(), %d, "%d")' % (case[1], case[2])

114

return 'not scratch(all(), %d, "%d")' % (case[1], case[2])

115

elif t == 'randomantichain':

115

elif t == 'randomantichain':

116

return '::randomantichain(all(), "%d")' % case[1]

116

return '::randomantichain(all(), "%d")' % case[1]

117

elif t == 'rev':

117

elif t == 'rev':

118

return '::%d' % case[1]

118

return '::%d' % case[1]

119

else:

119

else:

120

assert False

120

assert False

121

122

123

def process(case):

123

def process(case):

124

(repo, left, right) = case

124

(repo, left, right) = case

125

cmd = list(CMD_BASE)

125

cmd = list(CMD_BASE)

126

cmd.append('-R')

126

cmd.append('-R')

127

cmd.append(repo[0])

127

cmd.append(repo[0])

128

cmd.append('--local-as-revs')

128

cmd.append('--local-as-revs')

129

cmd.append(to_revsets(left))

129

cmd.append(to_revsets(left))

130

cmd.append('--remote-as-revs')

130

cmd.append('--remote-as-revs')

131

cmd.append(to_revsets(right))

131

cmd.append(to_revsets(right))

132

s = subprocess.Popen(cmd, stdout=subprocess.PIPE)

132

s = subprocess.Popen(cmd, stdout=subprocess.PIPE)

133

out, err = s.communicate()

133

out, err = s.communicate()

134

return json.loads(out)[0]

134

return json.loads(out)[0]

135

136

137

def interesting_boundary(res):

137

def interesting_boundary(res):

138

"""check if a case is interesting or not

138

"""check if a case is interesting or not

139

140

For now we are mostly interrested in case were we do multiple roundstrip

140

For now we are mostly interrested in case were we do multiple roundstrip

141

and where the boundary is somewhere in the middle of the undecided set.

141

and where the boundary is somewhere in the middle of the undecided set.

142

143

Ideally, we would make this configurable, but this is not a focus for now

143

Ideally, we would make this configurable, but this is not a focus for now

144

145

return None or (round-trip, undecided-common, undecided-missing)

145

return None or (round-trip, undecided-common, undecided-missing)

146

"""

146

"""

147

roundtrips = res["total-roundtrips"]

147

roundtrips = res["total-roundtrips"]

148

if roundtrips <= 1:

148

if roundtrips <= 1:

149

return None

149

return None

150

total_revs = res["nb-revs"]

151

common_revs = res["nb-revs-common"]

152

missing_revs = res["nb-revs-missing"]

150

undecided_common = res["nb-ini_und-common"]

153

undecided_common = res["nb-ini_und-common"]

151

undecided_missing = res["nb-ini_und-missing"]

154

undecided_missing = res["nb-ini_und-missing"]

152

if undecided_common == 0:

155

if undecided_common == 0:

153

return None

156

return None

154

if undecided_missing == 0:

157

if undecided_missing == 0:

155

return None

158

return None

156

return (roundtrips, undecided_common, undecided_missing)

159

return (

160

roundtrips,

161

undecided_common,

162

undecided_missing,

163

total_revs,

164

common_revs,

165

missing_revs,

166

)

157

167

158

168

159

def end(*args, **kwargs):

169

def end(*args, **kwargs):

160

done.set()

170

done.set()

161

171

162

172

163

def format_case(case):

173

def format_case(case):

164

return '-'.join(str(s) for s in case)

174

return '-'.join(str(s) for s in case)

165

175

166

176

167

signal.signal(signal.SIGINT, end)

177

signal.signal(signal.SIGINT, end)

168

178

169

for i in range(JOB):

179

for i in range(JOB):

170

threading.Thread(target=worker).start()

180

threading.Thread(target=worker).start()

171

181

172

nb_cases = 0

182

nb_cases = 0

173

while not done.is_set():

183

while not done.is_set():

174

repo = random.choice(repos)

184

repo = random.choice(repos)

175

left = pick_one(repo)

185

left = pick_one(repo)

176

right = pick_one(repo)

186

right = pick_one(repo)

177

cases.put((repo, left, right))

187

cases.put((repo, left, right))

178

while not results.empty():

188

while not results.empty():

179

# results has a single reader so this is fine

189

# results has a single reader so this is fine

180

c, res = results.get_nowait()

190

c, res = results.get_nowait()

181

boundary = interesting_boundary(res)

191

boundary = interesting_boundary(res)

182

if boundary is not None:

192

if boundary is not None:

183

print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)

193

print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)

184

sys.stdout.flush()

194

sys.stdout.flush()

185

195

186

nb_cases += 1

196

nb_cases += 1

187

if not nb_cases % 100:

197

if not nb_cases % 100:

188

print('[%d cases generated]' % nb_cases, file=sys.stderr)

198

print('[%d cases generated]' % nb_cases, file=sys.stderr)

189

199

190

for i in range(JOB):

200

for i in range(JOB):

191

try:

201

try:

192

cases.put_nowait(None)

202

cases.put_nowait(None)

193

except queue.Full:

203

except queue.Full:

194

pass

204

pass

195

205

196

print('[%d cases generated]' % nb_cases, file=sys.stderr)

206

print('[%d cases generated]' % nb_cases, file=sys.stderr)

197

print('[ouput generation is over]' % nb_cases, file=sys.stderr)

207

print('[ouput generation is over]' % nb_cases, file=sys.stderr)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python3
             # Search for interesting discovery instance
             #
             #  search-discovery-case REPO [REPO]…
             #
             # This use a subsetmaker extension (next to this script) to generate a steam of
             # random discovery instance. When interesting case are discovered, information
             # about them are print on the stdout.
             import json
             import os
             import queue
             import random
             import signal
             import subprocess
             import sys
             import threading
             this_script = os.path.abspath(sys.argv[0])
             this_dir = os.path.dirname(this_script)
             hg_dir = os.path.join(this_dir, '..', '..')
             HG_REPO = os.path.normpath(hg_dir)
             HG_BIN = os.path.join(HG_REPO, 'hg')
             JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))
             SLICING = ('scratch', 'randomantichain', 'rev')
             def nb_revs(repo_path):
                 cmd = [
                     HG_BIN,
                     '--repository',
                     repo_path,
                     'log',
                     '--template',
                     '{rev}',
                     '--rev',
                     'tip',
                 ]
                 s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                 out, err = s.communicate()
                 return int(out)
             repos = []
             for repo in sys.argv[1:]:
                 size = nb_revs(repo)
                 repos.append((repo, size))
             def pick_one(repo):
                 pick = random.choice(SLICING)
                 seed = random.randint(0, 100000)
                 if pick == 'scratch':
                     start = int(repo[1] * 0.3)
                     end = int(repo[1] * 0.7)
                     nb = random.randint(start, end)
                     return ('scratch', nb, seed)
                 elif pick == 'randomantichain':
                     return ('randomantichain', seed)
                 elif pick == 'rev':
                     start = int(repo[1] * 0.3)
                     end = int(repo[1])
                     rev = random.randint(start, end)
                     return ('rev', rev)
                 else:
                     assert False
             done = threading.Event()
             cases = queue.Queue(maxsize=10 * JOB)
             results = queue.Queue()
             def worker():
                 while not done.is_set():
                     c = cases.get()
                     if c is None:
                         return
                     try:
                         res = process(c)
                         results.put((c, res))
                     except Exception as exc:
                         print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
                     c = (c[0], c[2], c[1])
                     try:
                         res = process(c)
                         results.put((c, res))
                     except Exception as exc:
                         print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
             SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')
             CMD_BASE = (
                 HG_BIN,
                 'debugdiscovery',
                 '--template',
                 'json',
                 '--config',
                 'extensions.subset=%s' % SUBSET_PATH,
             )
             #    '--local-as-revs "$left" --local-as-revs "$right"'
             #    > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
             #        )
             def to_revsets(case):
                 t = case[0]
                 if t == 'scratch':
                     return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
                 elif t == 'randomantichain':
                     return '::randomantichain(all(), "%d")' % case[1]
                 elif t == 'rev':
                     return '::%d' % case[1]
                 else:
                     assert False
             def process(case):
                 (repo, left, right) = case
                 cmd = list(CMD_BASE)
                 cmd.append('-R')
                 cmd.append(repo[0])
                 cmd.append('--local-as-revs')
                 cmd.append(to_revsets(left))
                 cmd.append('--remote-as-revs')
                 cmd.append(to_revsets(right))
                 s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                 out, err = s.communicate()
                 return json.loads(out)[0]
             def interesting_boundary(res):
                 """check if a case is interesting or not
                 For now we are mostly interrested in case were we do multiple roundstrip
                 and where the boundary is somewhere in the middle of the undecided set.
                 Ideally, we would make this configurable, but this is not a focus for now
                 return None or (round-trip, undecided-common, undecided-missing)
                 """
                 roundtrips = res["total-roundtrips"]
                 if roundtrips <= 1:
                     return None
+                total_revs = res["nb-revs"]
+                common_revs = res["nb-revs-common"]
+                missing_revs = res["nb-revs-missing"]
                 undecided_common = res["nb-ini_und-common"]
                 undecided_missing = res["nb-ini_und-missing"]
                 if undecided_common == 0:
                     return None
                 if undecided_missing == 0:
                     return None
-                return (roundtrips, undecided_common, undecided_missing)
+                return (
+                    roundtrips,
+                    undecided_common,
+                    undecided_missing,
+                    total_revs,
+                    common_revs,
+                    missing_revs,
+                )
             def end(*args, **kwargs):
                 done.set()
             def format_case(case):
                 return '-'.join(str(s) for s in case)
             signal.signal(signal.SIGINT, end)
             for i in range(JOB):
                 threading.Thread(target=worker).start()
             nb_cases = 0
             while not done.is_set():
                 repo = random.choice(repos)
                 left = pick_one(repo)
                 right = pick_one(repo)
                 cases.put((repo, left, right))
                 while not results.empty():
                     # results has a single reader so this is fine
                     c, res = results.get_nowait()
                     boundary = interesting_boundary(res)
                     if boundary is not None:
                         print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)
                         sys.stdout.flush()
                 nb_cases += 1
                 if not nb_cases % 100:
                     print('[%d cases generated]' % nb_cases, file=sys.stderr)
             for i in range(JOB):
                 try:
                     cases.put_nowait(None)
                 except queue.Full:
                     pass
             print('[%d cases generated]' % nb_cases, file=sys.stderr)
             print('[ouput generation is over]' % nb_cases, file=sys.stderr)