##// END OF EJS Templates
sparse-revlog: add a test checking revlog deltas for a churning file...
Boris Feld -
r39527:4ca7a67c default
parent child Browse files
Show More
@@ -0,0 +1,1 b''
1 fe0d0bb5979de50f4fed71bb9437764d
@@ -0,0 +1,139 b''
1 #!/usr/bin/env python
2 #
3 # generate-branchy-bundle - generate a branch for a "large" branchy repository
4 #
5 # Copyright 2018 Octobus, contact@octobus.net
6 #
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
9 #
10 # This script generates a repository suitable for testing delta computation
11 # strategies.
12 #
13 # The repository update a single "large" file with many updates. One fixed part
14 # of the files always get updated while the rest of the lines get updated over
15 # time. This update happens over many topological branches, some getting merged
16 # back.
17 #
18 # Running with `chg` in your path and `CHGHG` set is recommended for speed.
19
20 from __future__ import absolute_import, print_function
21
22 import hashlib
23 import os
24 import shutil
25 import subprocess
26 import sys
27 import tempfile
28
29 BUNDLE_NAME = 'big-file-churn.hg'
30
31 # constants for generating the repository
32 NB_CHANGESET = 5000
33 PERIOD_MERGING = 8
34 PERIOD_BRANCHING = 7
35 MOVE_BACK_MIN = 3
36 MOVE_BACK_RANGE = 5
37
38 # constants for generating the large file we keep updating
39 #
40 # At each revision, the beginning on the file change,
41 # and set of other lines changes too.
42 FILENAME='SPARSE-REVLOG-TEST-FILE'
43 NB_LINES = 10500
44 ALWAYS_CHANGE_LINES = 500
45 FILENAME = 'SPARSE-REVLOG-TEST-FILE'
46 OTHER_CHANGES = 300
47
48 def nextcontent(previous_content):
49 """utility to produce a new file content from the previous one"""
50 return hashlib.md5(previous_content).hexdigest()
51
52 def filecontent(iteridx, oldcontent):
53 """generate a new file content
54
55 The content is generated according the iteration index and previous
56 content"""
57
58 # initial call
59 if iteridx is None:
60 current = ''
61 else:
62 current = str(iteridx)
63
64 for idx in xrange(NB_LINES):
65 do_change_line = True
66 if oldcontent is not None and ALWAYS_CHANGE_LINES < idx:
67 do_change_line = not ((idx - iteridx) % OTHER_CHANGES)
68
69 if do_change_line:
70 to_write = current + '\n'
71 current = nextcontent(current)
72 else:
73 to_write = oldcontent[idx]
74 yield to_write
75
76 def updatefile(filename, idx):
77 """update <filename> to be at appropriate content for iteration <idx>"""
78 existing = None
79 if idx is not None:
80 with open(filename, 'rb') as old:
81 existing = old.readlines()
82 with open(filename, 'wb') as target:
83 for line in filecontent(idx, existing):
84 target.write(line)
85
86 def hg(command, *args):
87 """call a mercurial command with appropriate config and argument"""
88 env = os.environ.copy()
89 if 'CHGHG' in env:
90 full_cmd = ['chg']
91 else:
92 full_cmd = ['hg']
93 full_cmd.append('--quiet')
94 full_cmd.append(command)
95 if command == 'commit':
96 # reproducible commit metadata
97 full_cmd.extend(['--date', '0 0', '--user', 'test'])
98 elif command == 'merge':
99 # avoid conflicts by picking the local variant
100 full_cmd.extend(['--tool', ':merge-local'])
101 full_cmd.extend(args)
102 env['HGRCPATH'] = ''
103 return subprocess.check_call(full_cmd, env=env)
104
105 def run(target):
106 tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-')
107 try:
108 os.chdir(tmpdir)
109 hg('init')
110 updatefile(FILENAME, None)
111 hg('commit', '--addremove', '--message', 'initial commit')
112 for idx in xrange(1, NB_CHANGESET + 1):
113 if sys.stdout.isatty():
114 print("generating commit #%d/%d" % (idx, NB_CHANGESET))
115 if (idx % PERIOD_BRANCHING) == 0:
116 move_back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE)
117 hg('update', ".~%d" % move_back)
118 if (idx % PERIOD_MERGING) == 0:
119 hg('merge', 'min(head())')
120 updatefile(FILENAME, idx)
121 hg('commit', '--message', 'commit #%d' % idx)
122 hg('bundle', '--all', target)
123 with open(target, 'rb') as bundle:
124 data = bundle.read()
125 digest = hashlib.md5(data).hexdigest()
126 with open(target + '.md5', 'wb') as md5file:
127 md5file.write(digest + '\n')
128 if sys.stdout.isatty():
129 print('bundle generated at "%s" md5: %s' % (target, digest))
130
131 finally:
132 shutil.rmtree(tmpdir)
133 return 0
134
135 if __name__ == '__main__':
136 orig = os.path.realpath(os.path.dirname(sys.argv[0]))
137 target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME)
138 sys.exit(run(target))
139
@@ -0,0 +1,121 b''
1 ====================================
2 Test delta choice with sparse revlog
3 ====================================
4
5 Sparse-revlog usually shows the most gain on Manifest. However, it is simpler
6 to general an appropriate file, so we test with a single file instead. The
7 goal is to observe intermediate snapshot being created.
8
9 We need a large enough file. Part of the content needs to be replaced
10 repeatedly while some of it changes rarely.
11
12 $ bundlepath="$TESTDIR/artifacts/cache/big-file-churn.hg"
13
14 $ expectedhash=`cat "$bundlepath".md5`
15 $ if [ ! -f "$bundlepath" ]; then
16 > echo 'skipped: missing artifact, run "'"$TESTDIR"'/artifacts/scripts/generate-churning-bundle.py"'
17 > exit 80
18 > fi
19 $ currenthash=`f -M "$bundlepath" | cut -d = -f 2`
20 $ if [ "$currenthash" != "$expectedhash" ]; then
21 > echo 'skipped: outdated artifact, md5 "'"$currenthash"'" expected "'"$expectedhash"'" run "'"$TESTDIR"'/artifacts/scripts/generate-churning-bundle.py"'
22 > exit 80
23 > fi
24
25 $ cat >> $HGRCPATH << EOF
26 > [format]
27 > sparse-revlog = yes
28 > [storage]
29 > revlog.optimize-delta-parent-choice = yes
30 > EOF
31 $ hg init sparse-repo
32 $ cd sparse-repo
33 $ hg unbundle $bundlepath
34 adding changesets
35 adding manifests
36 adding file changes
37 added 5001 changesets with 5001 changes to 1 files (+89 heads)
38 new changesets 9706f5af64f4:d9032adc8114 (5001 drafts)
39 (run 'hg heads' to see heads, 'hg merge' to merge)
40 $ hg up
41 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
42 updated to "d9032adc8114: commit #5000"
43 89 other heads for branch "default"
44
45 $ hg log --stat -r 0:3
46 changeset: 0:9706f5af64f4
47 user: test
48 date: Thu Jan 01 00:00:00 1970 +0000
49 summary: initial commit
50
51 SPARSE-REVLOG-TEST-FILE | 10500 ++++++++++++++++++++++++++++++++++++++++++++++
52 1 files changed, 10500 insertions(+), 0 deletions(-)
53
54 changeset: 1:724907deaa5e
55 user: test
56 date: Thu Jan 01 00:00:00 1970 +0000
57 summary: commit #1
58
59 SPARSE-REVLOG-TEST-FILE | 1068 +++++++++++++++++++++++-----------------------
60 1 files changed, 534 insertions(+), 534 deletions(-)
61
62 changeset: 2:62c41bce3e5d
63 user: test
64 date: Thu Jan 01 00:00:00 1970 +0000
65 summary: commit #2
66
67 SPARSE-REVLOG-TEST-FILE | 1068 +++++++++++++++++++++++-----------------------
68 1 files changed, 534 insertions(+), 534 deletions(-)
69
70 changeset: 3:348a9cbd6959
71 user: test
72 date: Thu Jan 01 00:00:00 1970 +0000
73 summary: commit #3
74
75 SPARSE-REVLOG-TEST-FILE | 1068 +++++++++++++++++++++++-----------------------
76 1 files changed, 534 insertions(+), 534 deletions(-)
77
78
79 $ f -s .hg/store/data/*.d
80 .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=74365490
81 $ hg debugrevlog *
82 format : 1
83 flags : generaldelta
84
85 revisions : 5001
86 merges : 625 (12.50%)
87 normal : 4376 (87.50%)
88 revisions : 5001
89 empty : 0 ( 0.00%)
90 text : 0 (100.00%)
91 delta : 0 (100.00%)
92 snapshot : 101 ( 2.02%)
93 lvl-0 : 101 ( 2.02%)
94 deltas : 4900 (97.98%)
95 revision size : 74365490
96 snapshot : 20307865 (27.31%)
97 lvl-0 : 20307865 (27.31%)
98 deltas : 54057625 (72.69%)
99
100 chunks : 5001
101 0x78 (x) : 5001 (100.00%)
102 chunks size : 74365490
103 0x78 (x) : 74365490 (100.00%)
104
105 avg chain length : 23
106 max chain length : 45
107 max chain reach : 11039464
108 compression ratio : 23
109
110 uncompressed data size (min/max/avg) : 346468 / 346472 / 346471
111 full revision size (min/max/avg) : 200927 / 201202 / 201067
112 inter-snapshot size (min/max/avg) : 0 / 0 / 0
113 delta size (min/max/avg) : 10649 / 103898 / 11032
114
115 deltas against prev : 4231 (86.35%)
116 where prev = p1 : 4172 (98.61%)
117 where prev = p2 : 0 ( 0.00%)
118 other : 59 ( 1.39%)
119 deltas against p1 : 651 (13.29%)
120 deltas against p2 : 18 ( 0.37%)
121 deltas against other : 0 ( 0.00%)
@@ -19,6 +19,7 b' syntax: glob'
19 *.zip
19 *.zip
20 \#*\#
20 \#*\#
21 .\#*
21 .\#*
22 tests/artifacts/cache/big-file-churn.hg
22 tests/.coverage*
23 tests/.coverage*
23 tests/.testtimes*
24 tests/.testtimes*
24 tests/.hypothesis
25 tests/.hypothesis
General Comments 0
You need to be logged in to leave comments. Login now