Show More
@@ -1,259 +1,281 | |||||
1 | #!/usr/bin/env python3 |
|
1 | #!/usr/bin/env python3 | |
2 | # |
|
2 | # | |
3 | # generate-branchy-bundle - generate a branch for a "large" branchy repository |
|
3 | # generate-branchy-bundle - generate a branch for a "large" branchy repository | |
4 | # |
|
4 | # | |
5 | # Copyright 2018 Octobus, contact@octobus.net |
|
5 | # Copyright 2018 Octobus, contact@octobus.net | |
6 | # |
|
6 | # | |
7 | # This software may be used and distributed according to the terms of the |
|
7 | # This software may be used and distributed according to the terms of the | |
8 | # GNU General Public License version 2 or any later version. |
|
8 | # GNU General Public License version 2 or any later version. | |
9 | # |
|
9 | # | |
10 | # This script generates a repository suitable for testing delta computation |
|
10 | # This script generates a repository suitable for testing delta computation | |
11 | # strategies. |
|
11 | # strategies. | |
12 | # |
|
12 | # | |
13 | # The repository update a single "large" file with many updates. One fixed part |
|
13 | # The repository update a single "large" file with many updates. One fixed part | |
14 | # of the files always get updated while the rest of the lines get updated over |
|
14 | # of the files always get updated while the rest of the lines get updated over | |
15 | # time. This update happens over many topological branches, some getting merged |
|
15 | # time. This update happens over many topological branches, some getting merged | |
16 | # back. |
|
16 | # back. | |
17 | # |
|
|||
18 | # Running with `chg` in your path and `CHGHG` set is recommended for speed. |
|
|||
19 |
|
17 | |||
20 |
|
18 | |||
21 | import hashlib |
|
19 | import hashlib | |
22 | import os |
|
20 | import os | |
23 | import shutil |
|
21 | import shutil | |
24 | import subprocess |
|
22 | import subprocess | |
25 | import sys |
|
23 | import sys | |
26 | import tempfile |
|
24 | import tempfile | |
27 |
|
25 | |||
|
26 | import mercurial.context | |||
|
27 | import mercurial.hg | |||
|
28 | import mercurial.ui | |||
|
29 | ||||
28 | BUNDLE_NAME = 'big-file-churn.hg' |
|
30 | BUNDLE_NAME = 'big-file-churn.hg' | |
29 |
|
31 | |||
30 | # constants for generating the repository |
|
32 | # constants for generating the repository | |
31 | NB_CHANGESET = 5000 |
|
33 | NB_CHANGESET = 5000 | |
32 | PERIOD_MERGING = 8 |
|
34 | PERIOD_MERGING = 8 | |
33 | PERIOD_BRANCHING = 7 |
|
35 | PERIOD_BRANCHING = 7 | |
34 | MOVE_BACK_MIN = 3 |
|
36 | MOVE_BACK_MIN = 3 | |
35 | MOVE_BACK_RANGE = 5 |
|
37 | MOVE_BACK_RANGE = 5 | |
36 |
|
38 | |||
37 | # constants for generating the large file we keep updating |
|
39 | # constants for generating the large file we keep updating | |
38 | # |
|
40 | # | |
39 | # At each revision, the beginning on the file change, |
|
41 | # At each revision, the beginning on the file change, | |
40 | # and set of other lines changes too. |
|
42 | # and set of other lines changes too. | |
41 | FILENAME = 'SPARSE-REVLOG-TEST-FILE' |
|
43 | FILENAME = 'SPARSE-REVLOG-TEST-FILE' | |
42 | NB_LINES = 10500 |
|
44 | NB_LINES = 10500 | |
43 | ALWAYS_CHANGE_LINES = 500 |
|
45 | ALWAYS_CHANGE_LINES = 500 | |
44 | OTHER_CHANGES = 300 |
|
46 | OTHER_CHANGES = 300 | |
45 |
|
47 | |||
46 |
|
48 | |||
47 | def build_graph(): |
|
49 | def build_graph(): | |
48 | heads = {0} |
|
50 | heads = {0} | |
49 | graph = {0: (None, None)} |
|
51 | graph = {0: (None, None)} | |
50 | for idx in range(1, NB_CHANGESET + 1): |
|
52 | for idx in range(1, NB_CHANGESET + 1): | |
51 | p, _ = parents = [idx - 1, None] |
|
53 | p, _ = parents = [idx - 1, None] | |
52 | if (idx % PERIOD_BRANCHING) == 0: |
|
54 | if (idx % PERIOD_BRANCHING) == 0: | |
53 | back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE) |
|
55 | back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE) | |
54 | for _ in range(back): |
|
56 | for _ in range(back): | |
55 | p = graph.get(p, (p,))[0] |
|
57 | p = graph.get(p, (p,))[0] | |
56 | parents[0] = p |
|
58 | parents[0] = p | |
57 | if (idx % PERIOD_MERGING) == 0: |
|
59 | if (idx % PERIOD_MERGING) == 0: | |
58 | parents[1] = min(heads) |
|
60 | parents[1] = min(heads) | |
59 | for p in parents: |
|
61 | for p in parents: | |
60 | heads.discard(p) |
|
62 | heads.discard(p) | |
61 | heads.add(idx) |
|
63 | heads.add(idx) | |
62 | graph[idx] = tuple(parents) |
|
64 | graph[idx] = tuple(parents) | |
63 | return graph |
|
65 | return graph | |
64 |
|
66 | |||
65 |
|
67 | |||
66 | GRAPH = build_graph() |
|
68 | GRAPH = build_graph() | |
67 |
|
69 | |||
68 |
|
70 | |||
69 | def nextcontent(previous_content): |
|
71 | def nextcontent(previous_content): | |
70 | """utility to produce a new file content from the previous one""" |
|
72 | """utility to produce a new file content from the previous one""" | |
71 | return hashlib.md5(previous_content).hexdigest().encode('ascii') |
|
73 | return hashlib.md5(previous_content).hexdigest().encode('ascii') | |
72 |
|
74 | |||
73 |
|
75 | |||
74 | def filecontent(iteridx, oldcontent): |
|
76 | def filecontent(iteridx, oldcontent): | |
75 | """generate a new file content |
|
77 | """generate a new file content | |
76 |
|
78 | |||
77 | The content is generated according the iteration index and previous |
|
79 | The content is generated according the iteration index and previous | |
78 | content""" |
|
80 | content""" | |
79 |
|
81 | |||
80 | # initial call |
|
82 | # initial call | |
81 | if iteridx == 0: |
|
83 | if iteridx == 0: | |
82 | current = b'' |
|
84 | current = b'' | |
83 | else: |
|
85 | else: | |
84 | current = b"%d" % iteridx |
|
86 | current = b"%d" % iteridx | |
85 |
|
87 | |||
86 | for idx in range(NB_LINES): |
|
88 | for idx in range(NB_LINES): | |
87 | do_change_line = True |
|
89 | do_change_line = True | |
88 | if oldcontent is not None and ALWAYS_CHANGE_LINES < idx: |
|
90 | if oldcontent is not None and ALWAYS_CHANGE_LINES < idx: | |
89 | do_change_line = not ((idx - iteridx) % OTHER_CHANGES) |
|
91 | do_change_line = not ((idx - iteridx) % OTHER_CHANGES) | |
90 |
|
92 | |||
91 | if do_change_line: |
|
93 | if do_change_line: | |
92 | to_write = current + b'\n' |
|
94 | to_write = current + b'\n' | |
93 | current = nextcontent(current) |
|
95 | current = nextcontent(current) | |
94 | else: |
|
96 | else: | |
95 | to_write = oldcontent[idx] |
|
97 | to_write = oldcontent[idx] | |
96 | yield to_write |
|
98 | yield to_write | |
97 |
|
99 | |||
98 |
|
100 | |||
99 | def merge_content(base, left, right): |
|
101 | def merge_content(base, left, right): | |
100 | """merge two file content to produce a new one |
|
102 | """merge two file content to produce a new one | |
101 |
|
103 | |||
102 | use unambiguous update on each side when possible, and produce a new line |
|
104 | use unambiguous update on each side when possible, and produce a new line | |
103 | whenever a merge is needed. Similar to what the manifest would do. |
|
105 | whenever a merge is needed. Similar to what the manifest would do. | |
104 | """ |
|
106 | """ | |
105 | for old, left, right in zip(base, left, right): |
|
107 | for old, left, right in zip(base, left, right): | |
106 | if old == left and old == right: |
|
108 | if old == left and old == right: | |
107 | yield old |
|
109 | yield old | |
108 | elif old == left and old != right: |
|
110 | elif old == left and old != right: | |
109 | yield right |
|
111 | yield right | |
110 | elif old != left and old == right: |
|
112 | elif old != left and old == right: | |
111 | yield left |
|
113 | yield left | |
112 | else: |
|
114 | else: | |
113 | yield nextcontent(left + right) |
|
115 | yield nextcontent(left + right) | |
114 |
|
116 | |||
115 |
|
117 | |||
116 | def ancestors(graph, rev): |
|
118 | def ancestors(graph, rev): | |
117 | """return the set of ancestors of revision <rev>""" |
|
119 | """return the set of ancestors of revision <rev>""" | |
118 | to_proceed = {rev} |
|
120 | to_proceed = {rev} | |
119 | seen = set(to_proceed) |
|
121 | seen = set(to_proceed) | |
120 | while to_proceed: |
|
122 | while to_proceed: | |
121 | current = to_proceed.pop() |
|
123 | current = to_proceed.pop() | |
122 | for p in graph[current]: |
|
124 | for p in graph[current]: | |
123 | if p is None: |
|
125 | if p is None: | |
124 | continue |
|
126 | continue | |
125 | if p in seen: |
|
127 | if p in seen: | |
126 | continue |
|
128 | continue | |
127 | to_proceed.add(p) |
|
129 | to_proceed.add(p) | |
128 | seen.add(p) |
|
130 | seen.add(p) | |
129 | return seen |
|
131 | return seen | |
130 |
|
132 | |||
131 |
|
133 | |||
132 | def gca(graph, left, right): |
|
134 | def gca(graph, left, right): | |
133 | """find the greater common ancestors of left and right |
|
135 | """find the greater common ancestors of left and right | |
134 |
|
136 | |||
135 | Note that the algorithm is stupid and NΒ² when run on all merge, however |
|
137 | Note that the algorithm is stupid and NΒ² when run on all merge, however | |
136 | this should not be a too much issue given the current scale. |
|
138 | this should not be a too much issue given the current scale. | |
137 | """ |
|
139 | """ | |
138 | return max(ancestors(graph, left) & ancestors(graph, right)) |
|
140 | return max(ancestors(graph, left) & ancestors(graph, right)) | |
139 |
|
141 | |||
140 |
|
142 | |||
141 | def make_one_content_fn(idx, base, left, right): |
|
143 | def make_one_content_fn(idx, base, left, right): | |
142 | """build a function that build the content on demand |
|
144 | """build a function that build the content on demand | |
143 |
|
145 | |||
144 | The dependency are kept are reference to make sure they are not |
|
146 | The dependency are kept are reference to make sure they are not | |
145 | garbage-collected until we use them. Once we computed the current content, |
|
147 | garbage-collected until we use them. Once we computed the current content, | |
146 | we make sure to drop their reference to allow them to be garbage collected. |
|
148 | we make sure to drop their reference to allow them to be garbage collected. | |
147 | """ |
|
149 | """ | |
148 |
|
150 | |||
149 | def content_fn(idx=idx, base=base, left=left, right=right): |
|
151 | def content_fn(idx=idx, base=base, left=left, right=right): | |
150 | if left is None: |
|
152 | if left is None: | |
151 | new = filecontent(idx, None) |
|
153 | new = filecontent(idx, None) | |
152 | elif base is None: |
|
154 | elif base is None: | |
153 | new = filecontent(idx, left()) |
|
155 | new = filecontent(idx, left()) | |
154 | else: |
|
156 | else: | |
155 | merged = merge_content(base(), left(), right()) |
|
157 | merged = merge_content(base(), left(), right()) | |
156 | new = filecontent(idx, list(merged)) |
|
158 | new = filecontent(idx, list(merged)) | |
157 | return list(new) |
|
159 | return list(new) | |
158 |
|
160 | |||
159 | del idx |
|
161 | del idx | |
160 | del base |
|
162 | del base | |
161 | del left |
|
163 | del left | |
162 | del right |
|
164 | del right | |
163 |
|
165 | |||
164 | value = None |
|
166 | value = None | |
165 | cf = [content_fn] |
|
167 | cf = [content_fn] | |
166 | del content_fn |
|
168 | del content_fn | |
167 |
|
169 | |||
168 | def final_fn(): |
|
170 | def final_fn(): | |
169 | nonlocal value |
|
171 | nonlocal value | |
170 | if value is None: |
|
172 | if value is None: | |
171 | content_fn = cf.pop() |
|
173 | content_fn = cf.pop() | |
172 | value = list(content_fn()) |
|
174 | value = list(content_fn()) | |
173 | del content_fn |
|
175 | del content_fn | |
174 | return value |
|
176 | return value | |
175 |
|
177 | |||
176 | return final_fn |
|
178 | return final_fn | |
177 |
|
179 | |||
178 |
|
180 | |||
179 | def build_content_graph(graph): |
|
181 | def build_content_graph(graph): | |
180 | """produce file content for all revision |
|
182 | """produce file content for all revision | |
181 |
|
183 | |||
182 | The content will be generated on demande and cached. Cleanup the |
|
184 | The content will be generated on demande and cached. Cleanup the | |
183 | dictionnary are you use it to reduce memory usage. |
|
185 | dictionnary are you use it to reduce memory usage. | |
184 | """ |
|
186 | """ | |
185 | content = {} |
|
187 | content = {} | |
186 | for idx, (p1, p2) in graph.items(): |
|
188 | for idx, (p1, p2) in graph.items(): | |
187 | base = left = right = None |
|
189 | base = left = right = None | |
188 | if p1 is not None: |
|
190 | if p1 is not None: | |
189 | left = content[p1] |
|
191 | left = content[p1] | |
190 | if p2 is not None: |
|
192 | if p2 is not None: | |
191 | right = content[p2] |
|
193 | right = content[p2] | |
192 | base_rev = gca(graph, p1, p2) |
|
194 | base_rev = gca(graph, p1, p2) | |
193 | base = content[base_rev] |
|
195 | base = content[base_rev] | |
194 | content[idx] = make_one_content_fn(idx, base, left, right) |
|
196 | content[idx] = make_one_content_fn(idx, base, left, right) | |
195 | return content |
|
197 | return content | |
196 |
|
198 | |||
197 |
|
199 | |||
198 | CONTENT = build_content_graph(GRAPH) |
|
200 | CONTENT = build_content_graph(GRAPH) | |
199 |
|
201 | |||
200 |
|
202 | |||
201 | def hg(command, *args): |
|
203 | def hg(command, *args): | |
202 | """call a mercurial command with appropriate config and argument""" |
|
204 | """call a mercurial command with appropriate config and argument""" | |
203 | env = os.environ.copy() |
|
205 | env = os.environ.copy() | |
204 | if 'CHGHG' in env: |
|
206 | if 'CHGHG' in env: | |
205 | full_cmd = ['chg'] |
|
207 | full_cmd = ['chg'] | |
206 | else: |
|
208 | else: | |
207 | full_cmd = ['hg'] |
|
209 | full_cmd = ['hg'] | |
208 | full_cmd.append('--quiet') |
|
210 | full_cmd.append('--quiet') | |
209 | full_cmd.append(command) |
|
211 | full_cmd.append(command) | |
210 | if command == 'commit': |
|
212 | if command == 'commit': | |
211 | # reproducible commit metadata |
|
213 | # reproducible commit metadata | |
212 | full_cmd.extend(['--date', '0 0', '--user', 'test']) |
|
214 | full_cmd.extend(['--date', '0 0', '--user', 'test']) | |
213 | elif command == 'merge': |
|
215 | elif command == 'merge': | |
214 | # avoid conflicts by picking the local variant |
|
216 | # avoid conflicts by picking the local variant | |
215 | full_cmd.extend(['--tool', ':merge-local']) |
|
217 | full_cmd.extend(['--tool', ':merge-local']) | |
216 | full_cmd.extend(args) |
|
218 | full_cmd.extend(args) | |
217 | env['HGRCPATH'] = '' |
|
219 | env['HGRCPATH'] = '' | |
218 | return subprocess.check_call(full_cmd, env=env) |
|
220 | return subprocess.check_call(full_cmd, env=env) | |
219 |
|
221 | |||
220 |
|
222 | |||
|
223 | def write_repo(path): | |||
|
224 | """write repository content in memory""" | |||
|
225 | repo = mercurial.hg.repository( | |||
|
226 | mercurial.ui.ui.load(), | |||
|
227 | path=path.encode('utf-8'), | |||
|
228 | ) | |||
|
229 | nodemap = {None: repo.nodeconstants.nullid} | |||
|
230 | with repo.lock(), repo.transaction(b'bundle-generation'): | |||
|
231 | for idx, (p1, p2) in GRAPH.items(): | |||
|
232 | if sys.stdout.isatty(): | |||
|
233 | print("generating commit #%d/%d" % (idx, NB_CHANGESET)) | |||
|
234 | ||||
|
235 | file_fn = lambda repo, memctx, path: mercurial.context.memfilectx( | |||
|
236 | repo, | |||
|
237 | memctx, | |||
|
238 | path, | |||
|
239 | data=b''.join(CONTENT.pop(idx)()), | |||
|
240 | ) | |||
|
241 | ||||
|
242 | mc = mercurial.context.memctx( | |||
|
243 | repo, | |||
|
244 | (nodemap[p1], nodemap[p2]), | |||
|
245 | b'commit #%d' % idx if idx else b'initial commit', | |||
|
246 | [FILENAME.encode('ascii')], | |||
|
247 | file_fn, | |||
|
248 | user=b"test", | |||
|
249 | date=(0, 0), | |||
|
250 | ) | |||
|
251 | nodemap[idx] = repo.commitctx(mc) | |||
|
252 | ||||
|
253 | ||||
221 | def run(target): |
|
254 | def run(target): | |
222 | tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-') |
|
255 | tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-') | |
223 | try: |
|
256 | try: | |
224 | os.chdir(tmpdir) |
|
257 | os.chdir(tmpdir) | |
225 |
hg( |
|
258 | hg( | |
226 | for idx, (p1, p2) in GRAPH.items(): |
|
259 | 'init', | |
227 | if sys.stdout.isatty(): |
|
260 | '--config', | |
228 | print("generating commit #%d/%d" % (idx, NB_CHANGESET)) |
|
261 | 'format.maxchainlen=%d' % NB_CHANGESET, | |
229 | if p1 is not None and p1 != idx - 1: |
|
262 | ) | |
230 | hg('update', "%d" % p1) |
|
263 | write_repo(tmpdir) | |
231 | if p2 is not None: |
|
|||
232 | hg('merge', "%d" % p2) |
|
|||
233 | with open(FILENAME, 'wb') as f: |
|
|||
234 | # pop the value to let it be garbage collection eventually. |
|
|||
235 | for line in CONTENT.pop(idx)(): |
|
|||
236 | f.write(line) |
|
|||
237 | if idx == 0: |
|
|||
238 | hg('add', FILENAME) |
|
|||
239 | hg('commit', '--addremove', '--message', 'initial commit') |
|
|||
240 | else: |
|
|||
241 | hg('commit', '--message', 'commit #%d' % idx) |
|
|||
242 | hg('bundle', '--all', target, '--config', 'devel.bundle.delta=p1') |
|
264 | hg('bundle', '--all', target, '--config', 'devel.bundle.delta=p1') | |
243 | with open(target, 'rb') as bundle: |
|
265 | with open(target, 'rb') as bundle: | |
244 | data = bundle.read() |
|
266 | data = bundle.read() | |
245 | digest = hashlib.md5(data).hexdigest() |
|
267 | digest = hashlib.md5(data).hexdigest() | |
246 | with open(target + '.md5', 'wb') as md5file: |
|
268 | with open(target + '.md5', 'wb') as md5file: | |
247 | md5file.write(digest.encode('ascii') + b'\n') |
|
269 | md5file.write(digest.encode('ascii') + b'\n') | |
248 | if sys.stdout.isatty(): |
|
270 | if sys.stdout.isatty(): | |
249 | print('bundle generated at "%s" md5: %s' % (target, digest)) |
|
271 | print('bundle generated at "%s" md5: %s' % (target, digest)) | |
250 |
|
272 | |||
251 | finally: |
|
273 | finally: | |
252 | shutil.rmtree(tmpdir) |
|
274 | shutil.rmtree(tmpdir) | |
253 | return 0 |
|
275 | return 0 | |
254 |
|
276 | |||
255 |
|
277 | |||
256 | if __name__ == '__main__': |
|
278 | if __name__ == '__main__': | |
257 | orig = os.path.realpath(os.path.dirname(sys.argv[0])) |
|
279 | orig = os.path.realpath(os.path.dirname(sys.argv[0])) | |
258 | target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME) |
|
280 | target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME) | |
259 | sys.exit(run(target)) |
|
281 | sys.exit(run(target)) |
General Comments 0
You need to be logged in to leave comments.
Login now