generate-churning-bundle.py
139 lines
| 4.4 KiB
| text/x-python
|
PythonLexer
Boris Feld
|
r39527 | #!/usr/bin/env python | ||
# | ||||
# generate-branchy-bundle - generate a branch for a "large" branchy repository | ||||
# | ||||
# Copyright 2018 Octobus, contact@octobus.net | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
# | ||||
# This script generates a repository suitable for testing delta computation | ||||
# strategies. | ||||
# | ||||
# The repository update a single "large" file with many updates. One fixed part | ||||
# of the files always get updated while the rest of the lines get updated over | ||||
# time. This update happens over many topological branches, some getting merged | ||||
# back. | ||||
# | ||||
# Running with `chg` in your path and `CHGHG` set is recommended for speed. | ||||
from __future__ import absolute_import, print_function | ||||
import hashlib | ||||
import os | ||||
import shutil | ||||
import subprocess | ||||
import sys | ||||
import tempfile | ||||
BUNDLE_NAME = 'big-file-churn.hg' | ||||
# constants for generating the repository | ||||
NB_CHANGESET = 5000 | ||||
PERIOD_MERGING = 8 | ||||
PERIOD_BRANCHING = 7 | ||||
MOVE_BACK_MIN = 3 | ||||
MOVE_BACK_RANGE = 5 | ||||
# constants for generating the large file we keep updating | ||||
# | ||||
# At each revision, the beginning on the file change, | ||||
# and set of other lines changes too. | ||||
FILENAME='SPARSE-REVLOG-TEST-FILE' | ||||
NB_LINES = 10500 | ||||
ALWAYS_CHANGE_LINES = 500 | ||||
FILENAME = 'SPARSE-REVLOG-TEST-FILE' | ||||
OTHER_CHANGES = 300 | ||||
def nextcontent(previous_content): | ||||
"""utility to produce a new file content from the previous one""" | ||||
return hashlib.md5(previous_content).hexdigest() | ||||
def filecontent(iteridx, oldcontent): | ||||
"""generate a new file content | ||||
The content is generated according the iteration index and previous | ||||
content""" | ||||
# initial call | ||||
if iteridx is None: | ||||
current = '' | ||||
else: | ||||
current = str(iteridx) | ||||
for idx in xrange(NB_LINES): | ||||
do_change_line = True | ||||
if oldcontent is not None and ALWAYS_CHANGE_LINES < idx: | ||||
do_change_line = not ((idx - iteridx) % OTHER_CHANGES) | ||||
if do_change_line: | ||||
to_write = current + '\n' | ||||
current = nextcontent(current) | ||||
else: | ||||
to_write = oldcontent[idx] | ||||
yield to_write | ||||
def updatefile(filename, idx): | ||||
"""update <filename> to be at appropriate content for iteration <idx>""" | ||||
existing = None | ||||
if idx is not None: | ||||
with open(filename, 'rb') as old: | ||||
existing = old.readlines() | ||||
with open(filename, 'wb') as target: | ||||
for line in filecontent(idx, existing): | ||||
target.write(line) | ||||
def hg(command, *args): | ||||
"""call a mercurial command with appropriate config and argument""" | ||||
env = os.environ.copy() | ||||
if 'CHGHG' in env: | ||||
full_cmd = ['chg'] | ||||
else: | ||||
full_cmd = ['hg'] | ||||
full_cmd.append('--quiet') | ||||
full_cmd.append(command) | ||||
if command == 'commit': | ||||
# reproducible commit metadata | ||||
full_cmd.extend(['--date', '0 0', '--user', 'test']) | ||||
elif command == 'merge': | ||||
# avoid conflicts by picking the local variant | ||||
full_cmd.extend(['--tool', ':merge-local']) | ||||
full_cmd.extend(args) | ||||
env['HGRCPATH'] = '' | ||||
return subprocess.check_call(full_cmd, env=env) | ||||
def run(target): | ||||
tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-') | ||||
try: | ||||
os.chdir(tmpdir) | ||||
hg('init') | ||||
updatefile(FILENAME, None) | ||||
hg('commit', '--addremove', '--message', 'initial commit') | ||||
for idx in xrange(1, NB_CHANGESET + 1): | ||||
if sys.stdout.isatty(): | ||||
print("generating commit #%d/%d" % (idx, NB_CHANGESET)) | ||||
if (idx % PERIOD_BRANCHING) == 0: | ||||
move_back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE) | ||||
hg('update', ".~%d" % move_back) | ||||
if (idx % PERIOD_MERGING) == 0: | ||||
hg('merge', 'min(head())') | ||||
updatefile(FILENAME, idx) | ||||
hg('commit', '--message', 'commit #%d' % idx) | ||||
hg('bundle', '--all', target) | ||||
with open(target, 'rb') as bundle: | ||||
data = bundle.read() | ||||
digest = hashlib.md5(data).hexdigest() | ||||
with open(target + '.md5', 'wb') as md5file: | ||||
md5file.write(digest + '\n') | ||||
if sys.stdout.isatty(): | ||||
print('bundle generated at "%s" md5: %s' % (target, digest)) | ||||
finally: | ||||
shutil.rmtree(tmpdir) | ||||
return 0 | ||||
if __name__ == '__main__': | ||||
orig = os.path.realpath(os.path.dirname(sys.argv[0])) | ||||
target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME) | ||||
sys.exit(run(target)) | ||||