##// END OF EJS Templates
shrink-revlog: add --dry-run option
Patrick Mezard -
r10241:4b2a086b default
parent child Browse files
Show More
@@ -1,215 +1,222 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import sys, os, tempfile
22 import sys, os, tempfile
23 import optparse
23 import optparse
24 from mercurial import ui as ui_, hg, revlog, transaction, node, util
24 from mercurial import ui as ui_, hg, revlog, transaction, node, util
25 from mercurial import changegroup
25 from mercurial import changegroup
26
26
27 def toposort(ui, rl):
27 def toposort(ui, rl):
28
28
29 children = {}
29 children = {}
30 root = []
30 root = []
31 # build children and roots
31 # build children and roots
32 ui.write('reading %d revs ' % len(rl))
32 ui.write('reading %d revs ' % len(rl))
33 try:
33 try:
34 for i in rl:
34 for i in rl:
35 children[i] = []
35 children[i] = []
36 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
36 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
37 # in case of duplicate parents
37 # in case of duplicate parents
38 if len(parents) == 2 and parents[0] == parents[1]:
38 if len(parents) == 2 and parents[0] == parents[1]:
39 del parents[1]
39 del parents[1]
40 for p in parents:
40 for p in parents:
41 assert p in children
41 assert p in children
42 children[p].append(i)
42 children[p].append(i)
43
43
44 if len(parents) == 0:
44 if len(parents) == 0:
45 root.append(i)
45 root.append(i)
46
46
47 if i % 1000 == 0:
47 if i % 1000 == 0:
48 ui.write('.')
48 ui.write('.')
49 finally:
49 finally:
50 ui.write('\n')
50 ui.write('\n')
51
51
52 # XXX this is a reimplementation of the 'branchsort' topo sort
52 # XXX this is a reimplementation of the 'branchsort' topo sort
53 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
53 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
54 # the algorithm
54 # the algorithm
55 ui.write('sorting ...')
55 ui.write('sorting ...')
56 visit = root
56 visit = root
57 ret = []
57 ret = []
58 while visit:
58 while visit:
59 i = visit.pop(0)
59 i = visit.pop(0)
60 ret.append(i)
60 ret.append(i)
61 if i not in children:
61 if i not in children:
62 # This only happens if some node's p1 == p2, which can
62 # This only happens if some node's p1 == p2, which can
63 # happen in the manifest in certain circumstances.
63 # happen in the manifest in certain circumstances.
64 continue
64 continue
65 next = []
65 next = []
66 for c in children.pop(i):
66 for c in children.pop(i):
67 parents_unseen = [p for p in rl.parentrevs(c)
67 parents_unseen = [p for p in rl.parentrevs(c)
68 if p != node.nullrev and p in children]
68 if p != node.nullrev and p in children]
69 if len(parents_unseen) == 0:
69 if len(parents_unseen) == 0:
70 next.append(c)
70 next.append(c)
71 visit = next + visit
71 visit = next + visit
72 ui.write('\n')
72 ui.write('\n')
73 return ret
73 return ret
74
74
75 def writerevs(ui, r1, r2, order, tr):
75 def writerevs(ui, r1, r2, order, tr):
76
76
77 ui.write('writing %d revs ' % len(order))
77 ui.write('writing %d revs ' % len(order))
78 count = [0]
78 count = [0]
79 def progress(*args):
79 def progress(*args):
80 if count[0] % 1000 == 0:
80 if count[0] % 1000 == 0:
81 ui.write('.')
81 ui.write('.')
82 count[0] += 1
82 count[0] += 1
83
83
84 order = [r1.node(r) for r in order]
84 order = [r1.node(r) for r in order]
85
85
86 # this is a bit ugly, but it works
86 # this is a bit ugly, but it works
87 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
87 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
88 unlookup = lambda x: int(x, 10)
88 unlookup = lambda x: int(x, 10)
89
89
90 try:
90 try:
91 group = util.chunkbuffer(r1.group(order, lookup, progress))
91 group = util.chunkbuffer(r1.group(order, lookup, progress))
92 chunkiter = changegroup.chunkiter(group)
92 chunkiter = changegroup.chunkiter(group)
93 r2.addgroup(chunkiter, unlookup, tr)
93 r2.addgroup(chunkiter, unlookup, tr)
94 finally:
94 finally:
95 ui.write('\n')
95 ui.write('\n')
96
96
97 def report(ui, olddatafn, newdatafn):
97 def report(ui, olddatafn, newdatafn):
98 oldsize = float(os.stat(olddatafn).st_size)
98 oldsize = float(os.stat(olddatafn).st_size)
99 newsize = float(os.stat(newdatafn).st_size)
99 newsize = float(os.stat(newdatafn).st_size)
100
100
101 # argh: have to pass an int to %d, because a float >= 2^32
101 # argh: have to pass an int to %d, because a float >= 2^32
102 # blows up under Python 2.5 or earlier
102 # blows up under Python 2.5 or earlier
103 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
103 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
104 % (int(oldsize), oldsize/1024/1024))
104 % (int(oldsize), oldsize/1024/1024))
105 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
105 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
106 % (int(newsize), newsize/1024/1024))
106 % (int(newsize), newsize/1024/1024))
107
107
108 shrink_percent = (oldsize - newsize) / oldsize * 100
108 shrink_percent = (oldsize - newsize) / oldsize * 100
109 shrink_factor = oldsize / newsize
109 shrink_factor = oldsize / newsize
110 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
110 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
111
111
112 def shrink(ui, repo, **opts):
112 def shrink(ui, repo, **opts):
113 """
113 """
114 Shrink revlog by re-ordering revisions. Will operate on manifest for
114 Shrink revlog by re-ordering revisions. Will operate on manifest for
115 the given repository if no other revlog is specified."""
115 the given repository if no other revlog is specified."""
116
116
117 # Unbuffer stdout for nice progress output.
117 # Unbuffer stdout for nice progress output.
118 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
118 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
119
119
120 if not repo.local():
120 if not repo.local():
121 raise util.Abort('not a local repository: %s' % repo.root)
121 raise util.Abort('not a local repository: %s' % repo.root)
122
122
123 fn = opts.get('revlog')
123 fn = opts.get('revlog')
124 if not fn:
124 if not fn:
125 indexfn = repo.sjoin('00manifest.i')
125 indexfn = repo.sjoin('00manifest.i')
126 else:
126 else:
127 if not fn.endswith('.i'):
127 if not fn.endswith('.i'):
128 raise util.Abort('--revlog option must specify the revlog index '
128 raise util.Abort('--revlog option must specify the revlog index '
129 'file (*.i), not %s' % opts.get('revlog'))
129 'file (*.i), not %s' % opts.get('revlog'))
130
130
131 indexfn = os.path.realpath(fn)
131 indexfn = os.path.realpath(fn)
132 store = repo.sjoin('')
132 store = repo.sjoin('')
133 if not indexfn.startswith(store):
133 if not indexfn.startswith(store):
134 raise util.Abort('--revlog option must specify a revlog in %s, '
134 raise util.Abort('--revlog option must specify a revlog in %s, '
135 'not %s' % (store, indexfn))
135 'not %s' % (store, indexfn))
136
136
137 datafn = indexfn[:-2] + '.d'
137 datafn = indexfn[:-2] + '.d'
138 if not os.path.exists(indexfn):
138 if not os.path.exists(indexfn):
139 raise util.Abort('no such file: %s' % indexfn)
139 raise util.Abort('no such file: %s' % indexfn)
140 if '00changelog' in indexfn:
140 if '00changelog' in indexfn:
141 raise util.Abort('shrinking the changelog will corrupt your repository')
141 raise util.Abort('shrinking the changelog will corrupt your repository')
142 if not os.path.exists(datafn):
142 if not os.path.exists(datafn):
143 # This is just a lazy shortcut because I can't be bothered to
143 # This is just a lazy shortcut because I can't be bothered to
144 # handle all the special cases that entail from no .d file.
144 # handle all the special cases that entail from no .d file.
145 raise util.Abort('%s does not exist: revlog not big enough '
145 raise util.Abort('%s does not exist: revlog not big enough '
146 'to be worth shrinking' % datafn)
146 'to be worth shrinking' % datafn)
147
147
148 oldindexfn = indexfn + '.old'
148 oldindexfn = indexfn + '.old'
149 olddatafn = datafn + '.old'
149 olddatafn = datafn + '.old'
150 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
150 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
151 raise util.Abort('one or both of\n'
151 raise util.Abort('one or both of\n'
152 ' %s\n'
152 ' %s\n'
153 ' %s\n'
153 ' %s\n'
154 'exists from a previous run; please clean up before '
154 'exists from a previous run; please clean up before '
155 'running again' % (oldindexfn, olddatafn))
155 'running again' % (oldindexfn, olddatafn))
156
156
157 ui.write('shrinking %s\n' % indexfn)
157 ui.write('shrinking %s\n' % indexfn)
158 prefix = os.path.basename(indexfn)[:-1]
158 prefix = os.path.basename(indexfn)[:-1]
159 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
159 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
160 prefix=prefix,
160 prefix=prefix,
161 suffix='.i')
161 suffix='.i')
162 tmpdatafn = tmpindexfn[:-2] + '.d'
162 tmpdatafn = tmpindexfn[:-2] + '.d'
163 os.close(tmpfd)
163 os.close(tmpfd)
164
164
165 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
165 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
166 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
166 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
167
167
168 # Don't use repo.transaction(), because then things get hairy with
168 # Don't use repo.transaction(), because then things get hairy with
169 # paths: some need to be relative to .hg, and some need to be
169 # paths: some need to be relative to .hg, and some need to be
170 # absolute. Doing it this way keeps things simple: everything is an
170 # absolute. Doing it this way keeps things simple: everything is an
171 # absolute path.
171 # absolute path.
172 lock = repo.lock(wait=False)
172 lock = repo.lock(wait=False)
173 tr = transaction.transaction(ui.warn,
173 tr = transaction.transaction(ui.warn,
174 open,
174 open,
175 repo.sjoin('journal'))
175 repo.sjoin('journal'))
176
176
177 try:
177 try:
178 try:
178 try:
179 order = toposort(ui, r1)
179 order = toposort(ui, r1)
180 writerevs(ui, r1, r2, order, tr)
180 writerevs(ui, r1, r2, order, tr)
181 report(ui, datafn, tmpdatafn)
181 report(ui, datafn, tmpdatafn)
182 tr.close()
182 tr.close()
183 except:
183 except:
184 # Abort transaction first, so we truncate the files before
184 # Abort transaction first, so we truncate the files before
185 # deleting them.
185 # deleting them.
186 tr.abort()
186 tr.abort()
187 if os.path.exists(tmpindexfn):
187 if os.path.exists(tmpindexfn):
188 os.unlink(tmpindexfn)
188 os.unlink(tmpindexfn)
189 if os.path.exists(tmpdatafn):
189 if os.path.exists(tmpdatafn):
190 os.unlink(tmpdatafn)
190 os.unlink(tmpdatafn)
191 raise
191 raise
192 # Racy since both files cannot be renamed atomically
192 if not opts.get('dry_run'):
193 util.os_link(indexfn, oldindexfn)
193 # Racy since both files cannot be renamed atomically
194 util.os_link(datafn, olddatafn)
194 util.os_link(indexfn, oldindexfn)
195 util.rename(tmpindexfn, indexfn)
195 util.os_link(datafn, olddatafn)
196 util.rename(tmpdatafn, datafn)
196 util.rename(tmpindexfn, indexfn)
197 util.rename(tmpdatafn, datafn)
198 else:
199 os.unlink(tmpindexfn)
200 os.unlink(tmpdatafn)
197 finally:
201 finally:
198 lock.release()
202 lock.release()
199
203
200 ui.write('note: old revlog saved in:\n'
204 if not opts.get('dry_run'):
201 ' %s\n'
205 ui.write('note: old revlog saved in:\n'
202 ' %s\n'
206 ' %s\n'
203 '(You can delete those files when you are satisfied that your\n'
207 ' %s\n'
204 'repository is still sane. '
208 '(You can delete those files when you are satisfied that your\n'
205 'Running \'hg verify\' is strongly recommended.)\n'
209 'repository is still sane. '
206 % (oldindexfn, olddatafn))
210 'Running \'hg verify\' is strongly recommended.)\n'
211 % (oldindexfn, olddatafn))
207
212
208 cmdtable = {
213 cmdtable = {
209 'shrink': (shrink,
214 'shrink': (shrink,
210 [('', 'revlog', '', 'index (.i) file of the revlog to shrink')],
215 [('', 'revlog', '', 'index (.i) file of the revlog to shrink'),
216 ('n', 'dry-run', None, 'do not shrink, simulate only'),
217 ],
211 'hg shrink [--revlog PATH]')
218 'hg shrink [--revlog PATH]')
212 }
219 }
213
220
214 if __name__ == "__main__":
221 if __name__ == "__main__":
215 print "shrink-revlog.py is now an extension (see hg help extensions)"
222 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now