##// END OF EJS Templates
test-sparse-revlog: commit the repo content in a single in-mem transaction...
marmoute -
r53336:83f87912 default
parent child Browse files
Show More
@@ -1,259 +1,281
1 #!/usr/bin/env python3
1 #!/usr/bin/env python3
2 #
2 #
3 # generate-branchy-bundle - generate a branch for a "large" branchy repository
3 # generate-branchy-bundle - generate a branch for a "large" branchy repository
4 #
4 #
5 # Copyright 2018 Octobus, contact@octobus.net
5 # Copyright 2018 Octobus, contact@octobus.net
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9 #
9 #
10 # This script generates a repository suitable for testing delta computation
10 # This script generates a repository suitable for testing delta computation
11 # strategies.
11 # strategies.
12 #
12 #
13 # The repository update a single "large" file with many updates. One fixed part
13 # The repository update a single "large" file with many updates. One fixed part
14 # of the files always get updated while the rest of the lines get updated over
14 # of the files always get updated while the rest of the lines get updated over
15 # time. This update happens over many topological branches, some getting merged
15 # time. This update happens over many topological branches, some getting merged
16 # back.
16 # back.
17 #
18 # Running with `chg` in your path and `CHGHG` set is recommended for speed.
19
17
20
18
21 import hashlib
19 import hashlib
22 import os
20 import os
23 import shutil
21 import shutil
24 import subprocess
22 import subprocess
25 import sys
23 import sys
26 import tempfile
24 import tempfile
27
25
26 import mercurial.context
27 import mercurial.hg
28 import mercurial.ui
29
28 BUNDLE_NAME = 'big-file-churn.hg'
30 BUNDLE_NAME = 'big-file-churn.hg'
29
31
30 # constants for generating the repository
32 # constants for generating the repository
31 NB_CHANGESET = 5000
33 NB_CHANGESET = 5000
32 PERIOD_MERGING = 8
34 PERIOD_MERGING = 8
33 PERIOD_BRANCHING = 7
35 PERIOD_BRANCHING = 7
34 MOVE_BACK_MIN = 3
36 MOVE_BACK_MIN = 3
35 MOVE_BACK_RANGE = 5
37 MOVE_BACK_RANGE = 5
36
38
37 # constants for generating the large file we keep updating
39 # constants for generating the large file we keep updating
38 #
40 #
39 # At each revision, the beginning on the file change,
41 # At each revision, the beginning on the file change,
40 # and set of other lines changes too.
42 # and set of other lines changes too.
41 FILENAME = 'SPARSE-REVLOG-TEST-FILE'
43 FILENAME = 'SPARSE-REVLOG-TEST-FILE'
42 NB_LINES = 10500
44 NB_LINES = 10500
43 ALWAYS_CHANGE_LINES = 500
45 ALWAYS_CHANGE_LINES = 500
44 OTHER_CHANGES = 300
46 OTHER_CHANGES = 300
45
47
46
48
47 def build_graph():
49 def build_graph():
48 heads = {0}
50 heads = {0}
49 graph = {0: (None, None)}
51 graph = {0: (None, None)}
50 for idx in range(1, NB_CHANGESET + 1):
52 for idx in range(1, NB_CHANGESET + 1):
51 p, _ = parents = [idx - 1, None]
53 p, _ = parents = [idx - 1, None]
52 if (idx % PERIOD_BRANCHING) == 0:
54 if (idx % PERIOD_BRANCHING) == 0:
53 back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE)
55 back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE)
54 for _ in range(back):
56 for _ in range(back):
55 p = graph.get(p, (p,))[0]
57 p = graph.get(p, (p,))[0]
56 parents[0] = p
58 parents[0] = p
57 if (idx % PERIOD_MERGING) == 0:
59 if (idx % PERIOD_MERGING) == 0:
58 parents[1] = min(heads)
60 parents[1] = min(heads)
59 for p in parents:
61 for p in parents:
60 heads.discard(p)
62 heads.discard(p)
61 heads.add(idx)
63 heads.add(idx)
62 graph[idx] = tuple(parents)
64 graph[idx] = tuple(parents)
63 return graph
65 return graph
64
66
65
67
66 GRAPH = build_graph()
68 GRAPH = build_graph()
67
69
68
70
69 def nextcontent(previous_content):
71 def nextcontent(previous_content):
70 """utility to produce a new file content from the previous one"""
72 """utility to produce a new file content from the previous one"""
71 return hashlib.md5(previous_content).hexdigest().encode('ascii')
73 return hashlib.md5(previous_content).hexdigest().encode('ascii')
72
74
73
75
74 def filecontent(iteridx, oldcontent):
76 def filecontent(iteridx, oldcontent):
75 """generate a new file content
77 """generate a new file content
76
78
77 The content is generated according the iteration index and previous
79 The content is generated according the iteration index and previous
78 content"""
80 content"""
79
81
80 # initial call
82 # initial call
81 if iteridx == 0:
83 if iteridx == 0:
82 current = b''
84 current = b''
83 else:
85 else:
84 current = b"%d" % iteridx
86 current = b"%d" % iteridx
85
87
86 for idx in range(NB_LINES):
88 for idx in range(NB_LINES):
87 do_change_line = True
89 do_change_line = True
88 if oldcontent is not None and ALWAYS_CHANGE_LINES < idx:
90 if oldcontent is not None and ALWAYS_CHANGE_LINES < idx:
89 do_change_line = not ((idx - iteridx) % OTHER_CHANGES)
91 do_change_line = not ((idx - iteridx) % OTHER_CHANGES)
90
92
91 if do_change_line:
93 if do_change_line:
92 to_write = current + b'\n'
94 to_write = current + b'\n'
93 current = nextcontent(current)
95 current = nextcontent(current)
94 else:
96 else:
95 to_write = oldcontent[idx]
97 to_write = oldcontent[idx]
96 yield to_write
98 yield to_write
97
99
98
100
99 def merge_content(base, left, right):
101 def merge_content(base, left, right):
100 """merge two file content to produce a new one
102 """merge two file content to produce a new one
101
103
102 use unambiguous update on each side when possible, and produce a new line
104 use unambiguous update on each side when possible, and produce a new line
103 whenever a merge is needed. Similar to what the manifest would do.
105 whenever a merge is needed. Similar to what the manifest would do.
104 """
106 """
105 for old, left, right in zip(base, left, right):
107 for old, left, right in zip(base, left, right):
106 if old == left and old == right:
108 if old == left and old == right:
107 yield old
109 yield old
108 elif old == left and old != right:
110 elif old == left and old != right:
109 yield right
111 yield right
110 elif old != left and old == right:
112 elif old != left and old == right:
111 yield left
113 yield left
112 else:
114 else:
113 yield nextcontent(left + right)
115 yield nextcontent(left + right)
114
116
115
117
116 def ancestors(graph, rev):
118 def ancestors(graph, rev):
117 """return the set of ancestors of revision <rev>"""
119 """return the set of ancestors of revision <rev>"""
118 to_proceed = {rev}
120 to_proceed = {rev}
119 seen = set(to_proceed)
121 seen = set(to_proceed)
120 while to_proceed:
122 while to_proceed:
121 current = to_proceed.pop()
123 current = to_proceed.pop()
122 for p in graph[current]:
124 for p in graph[current]:
123 if p is None:
125 if p is None:
124 continue
126 continue
125 if p in seen:
127 if p in seen:
126 continue
128 continue
127 to_proceed.add(p)
129 to_proceed.add(p)
128 seen.add(p)
130 seen.add(p)
129 return seen
131 return seen
130
132
131
133
132 def gca(graph, left, right):
134 def gca(graph, left, right):
133 """find the greater common ancestors of left and right
135 """find the greater common ancestors of left and right
134
136
135 Note that the algorithm is stupid and NΒ² when run on all merge, however
137 Note that the algorithm is stupid and NΒ² when run on all merge, however
136 this should not be a too much issue given the current scale.
138 this should not be a too much issue given the current scale.
137 """
139 """
138 return max(ancestors(graph, left) & ancestors(graph, right))
140 return max(ancestors(graph, left) & ancestors(graph, right))
139
141
140
142
141 def make_one_content_fn(idx, base, left, right):
143 def make_one_content_fn(idx, base, left, right):
142 """build a function that build the content on demand
144 """build a function that build the content on demand
143
145
144 The dependency are kept are reference to make sure they are not
146 The dependency are kept are reference to make sure they are not
145 garbage-collected until we use them. Once we computed the current content,
147 garbage-collected until we use them. Once we computed the current content,
146 we make sure to drop their reference to allow them to be garbage collected.
148 we make sure to drop their reference to allow them to be garbage collected.
147 """
149 """
148
150
149 def content_fn(idx=idx, base=base, left=left, right=right):
151 def content_fn(idx=idx, base=base, left=left, right=right):
150 if left is None:
152 if left is None:
151 new = filecontent(idx, None)
153 new = filecontent(idx, None)
152 elif base is None:
154 elif base is None:
153 new = filecontent(idx, left())
155 new = filecontent(idx, left())
154 else:
156 else:
155 merged = merge_content(base(), left(), right())
157 merged = merge_content(base(), left(), right())
156 new = filecontent(idx, list(merged))
158 new = filecontent(idx, list(merged))
157 return list(new)
159 return list(new)
158
160
159 del idx
161 del idx
160 del base
162 del base
161 del left
163 del left
162 del right
164 del right
163
165
164 value = None
166 value = None
165 cf = [content_fn]
167 cf = [content_fn]
166 del content_fn
168 del content_fn
167
169
168 def final_fn():
170 def final_fn():
169 nonlocal value
171 nonlocal value
170 if value is None:
172 if value is None:
171 content_fn = cf.pop()
173 content_fn = cf.pop()
172 value = list(content_fn())
174 value = list(content_fn())
173 del content_fn
175 del content_fn
174 return value
176 return value
175
177
176 return final_fn
178 return final_fn
177
179
178
180
179 def build_content_graph(graph):
181 def build_content_graph(graph):
180 """produce file content for all revision
182 """produce file content for all revision
181
183
182 The content will be generated on demande and cached. Cleanup the
184 The content will be generated on demande and cached. Cleanup the
183 dictionnary are you use it to reduce memory usage.
185 dictionnary are you use it to reduce memory usage.
184 """
186 """
185 content = {}
187 content = {}
186 for idx, (p1, p2) in graph.items():
188 for idx, (p1, p2) in graph.items():
187 base = left = right = None
189 base = left = right = None
188 if p1 is not None:
190 if p1 is not None:
189 left = content[p1]
191 left = content[p1]
190 if p2 is not None:
192 if p2 is not None:
191 right = content[p2]
193 right = content[p2]
192 base_rev = gca(graph, p1, p2)
194 base_rev = gca(graph, p1, p2)
193 base = content[base_rev]
195 base = content[base_rev]
194 content[idx] = make_one_content_fn(idx, base, left, right)
196 content[idx] = make_one_content_fn(idx, base, left, right)
195 return content
197 return content
196
198
197
199
198 CONTENT = build_content_graph(GRAPH)
200 CONTENT = build_content_graph(GRAPH)
199
201
200
202
201 def hg(command, *args):
203 def hg(command, *args):
202 """call a mercurial command with appropriate config and argument"""
204 """call a mercurial command with appropriate config and argument"""
203 env = os.environ.copy()
205 env = os.environ.copy()
204 if 'CHGHG' in env:
206 if 'CHGHG' in env:
205 full_cmd = ['chg']
207 full_cmd = ['chg']
206 else:
208 else:
207 full_cmd = ['hg']
209 full_cmd = ['hg']
208 full_cmd.append('--quiet')
210 full_cmd.append('--quiet')
209 full_cmd.append(command)
211 full_cmd.append(command)
210 if command == 'commit':
212 if command == 'commit':
211 # reproducible commit metadata
213 # reproducible commit metadata
212 full_cmd.extend(['--date', '0 0', '--user', 'test'])
214 full_cmd.extend(['--date', '0 0', '--user', 'test'])
213 elif command == 'merge':
215 elif command == 'merge':
214 # avoid conflicts by picking the local variant
216 # avoid conflicts by picking the local variant
215 full_cmd.extend(['--tool', ':merge-local'])
217 full_cmd.extend(['--tool', ':merge-local'])
216 full_cmd.extend(args)
218 full_cmd.extend(args)
217 env['HGRCPATH'] = ''
219 env['HGRCPATH'] = ''
218 return subprocess.check_call(full_cmd, env=env)
220 return subprocess.check_call(full_cmd, env=env)
219
221
220
222
223 def write_repo(path):
224 """write repository content in memory"""
225 repo = mercurial.hg.repository(
226 mercurial.ui.ui.load(),
227 path=path.encode('utf-8'),
228 )
229 nodemap = {None: repo.nodeconstants.nullid}
230 with repo.lock(), repo.transaction(b'bundle-generation'):
231 for idx, (p1, p2) in GRAPH.items():
232 if sys.stdout.isatty():
233 print("generating commit #%d/%d" % (idx, NB_CHANGESET))
234
235 file_fn = lambda repo, memctx, path: mercurial.context.memfilectx(
236 repo,
237 memctx,
238 path,
239 data=b''.join(CONTENT.pop(idx)()),
240 )
241
242 mc = mercurial.context.memctx(
243 repo,
244 (nodemap[p1], nodemap[p2]),
245 b'commit #%d' % idx if idx else b'initial commit',
246 [FILENAME.encode('ascii')],
247 file_fn,
248 user=b"test",
249 date=(0, 0),
250 )
251 nodemap[idx] = repo.commitctx(mc)
252
253
221 def run(target):
254 def run(target):
222 tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-')
255 tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-')
223 try:
256 try:
224 os.chdir(tmpdir)
257 os.chdir(tmpdir)
225 hg('init')
258 hg(
226 for idx, (p1, p2) in GRAPH.items():
259 'init',
227 if sys.stdout.isatty():
260 '--config',
228 print("generating commit #%d/%d" % (idx, NB_CHANGESET))
261 'format.maxchainlen=%d' % NB_CHANGESET,
229 if p1 is not None and p1 != idx - 1:
262 )
230 hg('update', "%d" % p1)
263 write_repo(tmpdir)
231 if p2 is not None:
232 hg('merge', "%d" % p2)
233 with open(FILENAME, 'wb') as f:
234 # pop the value to let it be garbage collection eventually.
235 for line in CONTENT.pop(idx)():
236 f.write(line)
237 if idx == 0:
238 hg('add', FILENAME)
239 hg('commit', '--addremove', '--message', 'initial commit')
240 else:
241 hg('commit', '--message', 'commit #%d' % idx)
242 hg('bundle', '--all', target, '--config', 'devel.bundle.delta=p1')
264 hg('bundle', '--all', target, '--config', 'devel.bundle.delta=p1')
243 with open(target, 'rb') as bundle:
265 with open(target, 'rb') as bundle:
244 data = bundle.read()
266 data = bundle.read()
245 digest = hashlib.md5(data).hexdigest()
267 digest = hashlib.md5(data).hexdigest()
246 with open(target + '.md5', 'wb') as md5file:
268 with open(target + '.md5', 'wb') as md5file:
247 md5file.write(digest.encode('ascii') + b'\n')
269 md5file.write(digest.encode('ascii') + b'\n')
248 if sys.stdout.isatty():
270 if sys.stdout.isatty():
249 print('bundle generated at "%s" md5: %s' % (target, digest))
271 print('bundle generated at "%s" md5: %s' % (target, digest))
250
272
251 finally:
273 finally:
252 shutil.rmtree(tmpdir)
274 shutil.rmtree(tmpdir)
253 return 0
275 return 0
254
276
255
277
256 if __name__ == '__main__':
278 if __name__ == '__main__':
257 orig = os.path.realpath(os.path.dirname(sys.argv[0]))
279 orig = os.path.realpath(os.path.dirname(sys.argv[0]))
258 target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME)
280 target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME)
259 sys.exit(run(target))
281 sys.exit(run(target))
General Comments 0
You need to be logged in to leave comments. Login now