##// END OF EJS Templates
Add script to rewrite revlog to workaround lack of parent deltas....
Greg Ward -
r9515:f7d85980 default
parent child Browse files
Show More
@@ -0,0 +1,218 b''
1 #!/usr/bin/env python
2
3 """\
4 Reorder a revlog (by default the the manifest file in the current
5 repository) to save space. Specifically, this topologically sorts the
6 revisions in the revlog so that revisions on the same branch are adjacent
7 as much as possible. This is a workaround for the fact that Mercurial
8 computes deltas relative to the previous revision rather than relative to a
9 parent revision. This is *not* safe to run on a changelog.
10 """
11
12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 # renamed by Greg Ward <greg at gerg.ca>.
15
16 # XXX would be nice to have a way to verify the repository after shrinking,
17 # e.g. by comparing "before" and "after" states of random changesets
18 # (maybe: export before, shrink, export after, diff).
19
20 import sys, os, tempfile
21 import optparse
22 from mercurial import ui as ui_, hg, revlog, transaction, node, util
23
24 def toposort(rl):
25 write = sys.stdout.write
26
27 children = {}
28 root = []
29 # build children and roots
30 write('reading %d revs ' % len(rl))
31 try:
32 for i in rl:
33 children[i] = []
34 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
35 # in case of duplicate parents
36 if len(parents) == 2 and parents[0] == parents[1]:
37 del parents[1]
38 for p in parents:
39 assert p in children
40 children[p].append(i)
41
42 if len(parents) == 0:
43 root.append(i)
44
45 if i % 1000 == 0:
46 write('.')
47 finally:
48 write('\n')
49
50 # XXX this is a reimplementation of the 'branchsort' topo sort
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
52 # the algorithm
53 write('sorting ...')
54 visit = root
55 ret = []
56 while visit:
57 i = visit.pop(0)
58 ret.append(i)
59 if i not in children:
60 # This only happens if some node's p1 == p2, which can
61 # happen in the manifest in certain circumstances.
62 continue
63 next = []
64 for c in children.pop(i):
65 parents_unseen = [p for p in rl.parentrevs(c)
66 if p != node.nullrev and p in children]
67 if len(parents_unseen) == 0:
68 next.append(c)
69 visit = next + visit
70 write('\n')
71 return ret
72
73 def writerevs(r1, r2, order, tr):
74 write = sys.stdout.write
75 write('writing %d revs ' % len(order))
76 try:
77 count = 0
78 for rev in order:
79 n = r1.node(rev)
80 p1, p2 = r1.parents(n)
81 l = r1.linkrev(rev)
82 t = r1.revision(n)
83 n2 = r2.addrevision(t, tr, l, p1, p2)
84
85 if count % 1000 == 0:
86 write('.')
87 count += 1
88 finally:
89 write('\n')
90
91 def report(olddatafn, newdatafn):
92 oldsize = float(os.stat(olddatafn).st_size)
93 newsize = float(os.stat(newdatafn).st_size)
94
95 # argh: have to pass an int to %d, because a float >= 2^32
96 # blows up under Python 2.5 or earlier
97 sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'
98 % (int(oldsize), oldsize/1024/1024))
99 sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'
100 % (int(newsize), newsize/1024/1024))
101
102 shrink_percent = (oldsize - newsize) / oldsize * 100
103 shrink_factor = oldsize / newsize
104 sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'
105 % (shrink_percent, shrink_factor))
106
107 def main():
108
109 # Unbuffer stdout for nice progress output.
110 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
111 write = sys.stdout.write
112
113 parser = optparse.OptionParser(description=__doc__)
114 parser.add_option('-R', '--repository',
115 default=os.path.curdir,
116 metavar='REPO',
117 help='repository root directory [default: current dir]')
118 parser.add_option('--revlog',
119 metavar='FILE',
120 help='shrink FILE [default: REPO/hg/store/00manifest.i]')
121 (options, args) = parser.parse_args()
122 if args:
123 parser.error('too many arguments')
124
125 # Open the specified repository.
126 ui = ui_.ui()
127 repo = hg.repository(ui, options.repository)
128 if not repo.local():
129 parser.error('not a local repository: %s' % options.repository)
130
131 if options.revlog is None:
132 indexfn = repo.sjoin('00manifest.i')
133 else:
134 if not options.revlog.endswith('.i'):
135 parser.error('--revlog option must specify the revlog index file '
136 '(*.i), not %s' % options.revlog)
137
138 indexfn = os.path.realpath(options.revlog)
139 store = repo.sjoin('')
140 if not indexfn.startswith(store):
141 parser.error('--revlog option must specify a revlog in %s, not %s'
142 % (store, indexfn))
143
144 datafn = indexfn[:-2] + '.d'
145 if not os.path.exists(indexfn):
146 parser.error('no such file: %s' % indexfn)
147 if '00changelog' in indexfn:
148 parser.error('shrinking the changelog will corrupt your repository')
149 if not os.path.exists(datafn):
150 # This is just a lazy shortcut because I can't be bothered to
151 # handle all the special cases that entail from no .d file.
152 parser.error('%s does not exist: revlog not big enough '
153 'to be worth shrinking' % datafn)
154
155 oldindexfn = indexfn + '.old'
156 olddatafn = datafn + '.old'
157 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
158 parser.error('one or both of\n'
159 ' %s\n'
160 ' %s\n'
161 'exists from a previous run; please clean up before '
162 'running again'
163 % (oldindexfn, olddatafn))
164
165 write('shrinking %s\n' % indexfn)
166 prefix = os.path.basename(indexfn)[:-1]
167 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
168 prefix=prefix,
169 suffix='.i')
170 tmpdatafn = tmpindexfn[:-2] + '.d'
171 os.close(tmpfd)
172
173 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
174 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
175
176 # Don't use repo.transaction(), because then things get hairy with
177 # paths: some need to be relative to .hg, and some need to be
178 # absolute. Doing it this way keeps things simple: everything is an
179 # absolute path.
180 lock = repo.lock(wait=False)
181 tr = transaction.transaction(sys.stderr.write,
182 open,
183 repo.sjoin('journal'))
184
185 try:
186 try:
187 order = toposort(r1)
188 writerevs(r1, r2, order, tr)
189 report(datafn, tmpdatafn)
190 tr.close()
191 except:
192 # Abort transaction first, so we truncate the files before
193 # deleting them.
194 tr.abort()
195 if os.path.exists(tmpindexfn):
196 os.unlink(tmpindexfn)
197 if os.path.exists(tmpdatafn):
198 os.unlink(tmpdatafn)
199 raise
200 finally:
201 lock.release()
202
203 os.link(indexfn, oldindexfn)
204 os.link(datafn, olddatafn)
205 os.rename(tmpindexfn, indexfn)
206 os.rename(tmpdatafn, datafn)
207 write('note: old revlog saved in:\n'
208 ' %s\n'
209 ' %s\n'
210 '(You can delete those files when you are satisfied that your\n'
211 'repository is still sane. '
212 'Running \'hg verify\' is strongly recommended.)\n'
213 % (oldindexfn, olddatafn))
214
215 try:
216 main()
217 except KeyboardInterrupt:
218 sys.exit("interrupted")
General Comments 0
You need to be logged in to leave comments. Login now