##// END OF EJS Templates
test-sparse-revlog: commit the repo content in a single in-mem transaction...
marmoute -
r53336:83f87912 default
parent child Browse files
Show More
@@ -1,259 +1,281
1 1 #!/usr/bin/env python3
2 2 #
3 3 # generate-branchy-bundle - generate a branch for a "large" branchy repository
4 4 #
5 5 # Copyright 2018 Octobus, contact@octobus.net
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9 #
10 10 # This script generates a repository suitable for testing delta computation
11 11 # strategies.
12 12 #
13 13 # The repository update a single "large" file with many updates. One fixed part
14 14 # of the files always get updated while the rest of the lines get updated over
15 15 # time. This update happens over many topological branches, some getting merged
16 16 # back.
17 #
18 # Running with `chg` in your path and `CHGHG` set is recommended for speed.
19 17
20 18
21 19 import hashlib
22 20 import os
23 21 import shutil
24 22 import subprocess
25 23 import sys
26 24 import tempfile
27 25
26 import mercurial.context
27 import mercurial.hg
28 import mercurial.ui
29
28 30 BUNDLE_NAME = 'big-file-churn.hg'
29 31
30 32 # constants for generating the repository
31 33 NB_CHANGESET = 5000
32 34 PERIOD_MERGING = 8
33 35 PERIOD_BRANCHING = 7
34 36 MOVE_BACK_MIN = 3
35 37 MOVE_BACK_RANGE = 5
36 38
37 39 # constants for generating the large file we keep updating
38 40 #
39 41 # At each revision, the beginning on the file change,
40 42 # and set of other lines changes too.
41 43 FILENAME = 'SPARSE-REVLOG-TEST-FILE'
42 44 NB_LINES = 10500
43 45 ALWAYS_CHANGE_LINES = 500
44 46 OTHER_CHANGES = 300
45 47
46 48
47 49 def build_graph():
48 50 heads = {0}
49 51 graph = {0: (None, None)}
50 52 for idx in range(1, NB_CHANGESET + 1):
51 53 p, _ = parents = [idx - 1, None]
52 54 if (idx % PERIOD_BRANCHING) == 0:
53 55 back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE)
54 56 for _ in range(back):
55 57 p = graph.get(p, (p,))[0]
56 58 parents[0] = p
57 59 if (idx % PERIOD_MERGING) == 0:
58 60 parents[1] = min(heads)
59 61 for p in parents:
60 62 heads.discard(p)
61 63 heads.add(idx)
62 64 graph[idx] = tuple(parents)
63 65 return graph
64 66
65 67
66 68 GRAPH = build_graph()
67 69
68 70
69 71 def nextcontent(previous_content):
70 72 """utility to produce a new file content from the previous one"""
71 73 return hashlib.md5(previous_content).hexdigest().encode('ascii')
72 74
73 75
74 76 def filecontent(iteridx, oldcontent):
75 77 """generate a new file content
76 78
77 79 The content is generated according the iteration index and previous
78 80 content"""
79 81
80 82 # initial call
81 83 if iteridx == 0:
82 84 current = b''
83 85 else:
84 86 current = b"%d" % iteridx
85 87
86 88 for idx in range(NB_LINES):
87 89 do_change_line = True
88 90 if oldcontent is not None and ALWAYS_CHANGE_LINES < idx:
89 91 do_change_line = not ((idx - iteridx) % OTHER_CHANGES)
90 92
91 93 if do_change_line:
92 94 to_write = current + b'\n'
93 95 current = nextcontent(current)
94 96 else:
95 97 to_write = oldcontent[idx]
96 98 yield to_write
97 99
98 100
99 101 def merge_content(base, left, right):
100 102 """merge two file content to produce a new one
101 103
102 104 use unambiguous update on each side when possible, and produce a new line
103 105 whenever a merge is needed. Similar to what the manifest would do.
104 106 """
105 107 for old, left, right in zip(base, left, right):
106 108 if old == left and old == right:
107 109 yield old
108 110 elif old == left and old != right:
109 111 yield right
110 112 elif old != left and old == right:
111 113 yield left
112 114 else:
113 115 yield nextcontent(left + right)
114 116
115 117
116 118 def ancestors(graph, rev):
117 119 """return the set of ancestors of revision <rev>"""
118 120 to_proceed = {rev}
119 121 seen = set(to_proceed)
120 122 while to_proceed:
121 123 current = to_proceed.pop()
122 124 for p in graph[current]:
123 125 if p is None:
124 126 continue
125 127 if p in seen:
126 128 continue
127 129 to_proceed.add(p)
128 130 seen.add(p)
129 131 return seen
130 132
131 133
132 134 def gca(graph, left, right):
133 135 """find the greater common ancestors of left and right
134 136
135 137 Note that the algorithm is stupid and NΒ² when run on all merge, however
136 138 this should not be a too much issue given the current scale.
137 139 """
138 140 return max(ancestors(graph, left) & ancestors(graph, right))
139 141
140 142
141 143 def make_one_content_fn(idx, base, left, right):
142 144 """build a function that build the content on demand
143 145
144 146 The dependency are kept are reference to make sure they are not
145 147 garbage-collected until we use them. Once we computed the current content,
146 148 we make sure to drop their reference to allow them to be garbage collected.
147 149 """
148 150
149 151 def content_fn(idx=idx, base=base, left=left, right=right):
150 152 if left is None:
151 153 new = filecontent(idx, None)
152 154 elif base is None:
153 155 new = filecontent(idx, left())
154 156 else:
155 157 merged = merge_content(base(), left(), right())
156 158 new = filecontent(idx, list(merged))
157 159 return list(new)
158 160
159 161 del idx
160 162 del base
161 163 del left
162 164 del right
163 165
164 166 value = None
165 167 cf = [content_fn]
166 168 del content_fn
167 169
168 170 def final_fn():
169 171 nonlocal value
170 172 if value is None:
171 173 content_fn = cf.pop()
172 174 value = list(content_fn())
173 175 del content_fn
174 176 return value
175 177
176 178 return final_fn
177 179
178 180
179 181 def build_content_graph(graph):
180 182 """produce file content for all revision
181 183
182 184 The content will be generated on demande and cached. Cleanup the
183 185 dictionnary are you use it to reduce memory usage.
184 186 """
185 187 content = {}
186 188 for idx, (p1, p2) in graph.items():
187 189 base = left = right = None
188 190 if p1 is not None:
189 191 left = content[p1]
190 192 if p2 is not None:
191 193 right = content[p2]
192 194 base_rev = gca(graph, p1, p2)
193 195 base = content[base_rev]
194 196 content[idx] = make_one_content_fn(idx, base, left, right)
195 197 return content
196 198
197 199
198 200 CONTENT = build_content_graph(GRAPH)
199 201
200 202
201 203 def hg(command, *args):
202 204 """call a mercurial command with appropriate config and argument"""
203 205 env = os.environ.copy()
204 206 if 'CHGHG' in env:
205 207 full_cmd = ['chg']
206 208 else:
207 209 full_cmd = ['hg']
208 210 full_cmd.append('--quiet')
209 211 full_cmd.append(command)
210 212 if command == 'commit':
211 213 # reproducible commit metadata
212 214 full_cmd.extend(['--date', '0 0', '--user', 'test'])
213 215 elif command == 'merge':
214 216 # avoid conflicts by picking the local variant
215 217 full_cmd.extend(['--tool', ':merge-local'])
216 218 full_cmd.extend(args)
217 219 env['HGRCPATH'] = ''
218 220 return subprocess.check_call(full_cmd, env=env)
219 221
220 222
223 def write_repo(path):
224 """write repository content in memory"""
225 repo = mercurial.hg.repository(
226 mercurial.ui.ui.load(),
227 path=path.encode('utf-8'),
228 )
229 nodemap = {None: repo.nodeconstants.nullid}
230 with repo.lock(), repo.transaction(b'bundle-generation'):
231 for idx, (p1, p2) in GRAPH.items():
232 if sys.stdout.isatty():
233 print("generating commit #%d/%d" % (idx, NB_CHANGESET))
234
235 file_fn = lambda repo, memctx, path: mercurial.context.memfilectx(
236 repo,
237 memctx,
238 path,
239 data=b''.join(CONTENT.pop(idx)()),
240 )
241
242 mc = mercurial.context.memctx(
243 repo,
244 (nodemap[p1], nodemap[p2]),
245 b'commit #%d' % idx if idx else b'initial commit',
246 [FILENAME.encode('ascii')],
247 file_fn,
248 user=b"test",
249 date=(0, 0),
250 )
251 nodemap[idx] = repo.commitctx(mc)
252
253
221 254 def run(target):
222 255 tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-')
223 256 try:
224 257 os.chdir(tmpdir)
225 hg('init')
226 for idx, (p1, p2) in GRAPH.items():
227 if sys.stdout.isatty():
228 print("generating commit #%d/%d" % (idx, NB_CHANGESET))
229 if p1 is not None and p1 != idx - 1:
230 hg('update', "%d" % p1)
231 if p2 is not None:
232 hg('merge', "%d" % p2)
233 with open(FILENAME, 'wb') as f:
234 # pop the value to let it be garbage collection eventually.
235 for line in CONTENT.pop(idx)():
236 f.write(line)
237 if idx == 0:
238 hg('add', FILENAME)
239 hg('commit', '--addremove', '--message', 'initial commit')
240 else:
241 hg('commit', '--message', 'commit #%d' % idx)
258 hg(
259 'init',
260 '--config',
261 'format.maxchainlen=%d' % NB_CHANGESET,
262 )
263 write_repo(tmpdir)
242 264 hg('bundle', '--all', target, '--config', 'devel.bundle.delta=p1')
243 265 with open(target, 'rb') as bundle:
244 266 data = bundle.read()
245 267 digest = hashlib.md5(data).hexdigest()
246 268 with open(target + '.md5', 'wb') as md5file:
247 269 md5file.write(digest.encode('ascii') + b'\n')
248 270 if sys.stdout.isatty():
249 271 print('bundle generated at "%s" md5: %s' % (target, digest))
250 272
251 273 finally:
252 274 shutil.rmtree(tmpdir)
253 275 return 0
254 276
255 277
256 278 if __name__ == '__main__':
257 279 orig = os.path.realpath(os.path.dirname(sys.argv[0]))
258 280 target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME)
259 281 sys.exit(run(target))
General Comments 0
You need to be logged in to leave comments. Login now