##// END OF EJS Templates
bundle: fix shrink-revlog bundle usage
Matt Mackall -
r12348:7f97b484 default
parent child Browse files
Show More
@@ -1,288 +1,289 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import os, tempfile, errno
22 import os, tempfile, errno
23 from mercurial import revlog, transaction, node, util
23 from mercurial import revlog, transaction, node, util
24 from mercurial import changegroup
24 from mercurial import changegroup
25 from mercurial.i18n import _
25 from mercurial.i18n import _
26
26
27
27
28 def postorder(start, edges):
28 def postorder(start, edges):
29 result = []
29 result = []
30 visit = list(start)
30 visit = list(start)
31 finished = set()
31 finished = set()
32
32
33 while visit:
33 while visit:
34 cur = visit[-1]
34 cur = visit[-1]
35 for p in edges[cur]:
35 for p in edges[cur]:
36 if p not in finished:
36 if p not in finished:
37 visit.append(p)
37 visit.append(p)
38 break
38 break
39 else:
39 else:
40 result.append(cur)
40 result.append(cur)
41 finished.add(cur)
41 finished.add(cur)
42 visit.pop()
42 visit.pop()
43
43
44 return result
44 return result
45
45
46 def toposort_reversepostorder(ui, rl):
46 def toposort_reversepostorder(ui, rl):
47 # postorder of the reverse directed graph
47 # postorder of the reverse directed graph
48
48
49 # map rev to list of parent revs (p2 first)
49 # map rev to list of parent revs (p2 first)
50 parents = {}
50 parents = {}
51 heads = set()
51 heads = set()
52 ui.status(_('reading revs\n'))
52 ui.status(_('reading revs\n'))
53 try:
53 try:
54 for rev in rl:
54 for rev in rl:
55 ui.progress(_('reading'), rev, total=len(rl))
55 ui.progress(_('reading'), rev, total=len(rl))
56 (p1, p2) = rl.parentrevs(rev)
56 (p1, p2) = rl.parentrevs(rev)
57 if p1 == p2 == node.nullrev:
57 if p1 == p2 == node.nullrev:
58 parents[rev] = () # root node
58 parents[rev] = () # root node
59 elif p1 == p2 or p2 == node.nullrev:
59 elif p1 == p2 or p2 == node.nullrev:
60 parents[rev] = (p1,) # normal node
60 parents[rev] = (p1,) # normal node
61 else:
61 else:
62 parents[rev] = (p2, p1) # merge node
62 parents[rev] = (p2, p1) # merge node
63 heads.add(rev)
63 heads.add(rev)
64 for p in parents[rev]:
64 for p in parents[rev]:
65 heads.discard(p)
65 heads.discard(p)
66 finally:
66 finally:
67 ui.progress(_('reading'), None)
67 ui.progress(_('reading'), None)
68
68
69 heads = list(heads)
69 heads = list(heads)
70 heads.sort(reverse=True)
70 heads.sort(reverse=True)
71
71
72 ui.status(_('sorting revs\n'))
72 ui.status(_('sorting revs\n'))
73 return postorder(heads, parents)
73 return postorder(heads, parents)
74
74
75 def toposort_postorderreverse(ui, rl):
75 def toposort_postorderreverse(ui, rl):
76 # reverse-postorder of the reverse directed graph
76 # reverse-postorder of the reverse directed graph
77
77
78 children = {}
78 children = {}
79 roots = set()
79 roots = set()
80 ui.status(_('reading revs\n'))
80 ui.status(_('reading revs\n'))
81 try:
81 try:
82 for rev in rl:
82 for rev in rl:
83 ui.progress(_('reading'), rev, total=len(rl))
83 ui.progress(_('reading'), rev, total=len(rl))
84 (p1, p2) = rl.parentrevs(rev)
84 (p1, p2) = rl.parentrevs(rev)
85 if p1 == p2 == node.nullrev:
85 if p1 == p2 == node.nullrev:
86 roots.add(rev)
86 roots.add(rev)
87 children[rev] = []
87 children[rev] = []
88 if p1 != node.nullrev:
88 if p1 != node.nullrev:
89 children[p1].append(rev)
89 children[p1].append(rev)
90 if p2 != node.nullrev:
90 if p2 != node.nullrev:
91 children[p2].append(rev)
91 children[p2].append(rev)
92 finally:
92 finally:
93 ui.progress(_('reading'), None)
93 ui.progress(_('reading'), None)
94
94
95 roots = list(roots)
95 roots = list(roots)
96 roots.sort()
96 roots.sort()
97
97
98 ui.status(_('sorting revs\n'))
98 ui.status(_('sorting revs\n'))
99 result = postorder(roots, children)
99 result = postorder(roots, children)
100 result.reverse()
100 result.reverse()
101 return result
101 return result
102
102
103 def writerevs(ui, r1, r2, order, tr):
103 def writerevs(ui, r1, r2, order, tr):
104
104
105 ui.status(_('writing revs\n'))
105 ui.status(_('writing revs\n'))
106
106
107 count = [0]
107 count = [0]
108 def progress(*args):
108 def progress(*args):
109 ui.progress(_('writing'), count[0], total=len(order))
109 ui.progress(_('writing'), count[0], total=len(order))
110 count[0] += 1
110 count[0] += 1
111
111
112 order = [r1.node(r) for r in order]
112 order = [r1.node(r) for r in order]
113
113
114 # this is a bit ugly, but it works
114 # this is a bit ugly, but it works
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 unlookup = lambda x: int(x, 10)
116 unlookup = lambda x: int(x, 10)
117
117
118 try:
118 try:
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 group = changegroup.unbundle10(group, "UN")
120 r2.addgroup(group, unlookup, tr)
121 r2.addgroup(group, unlookup, tr)
121 finally:
122 finally:
122 ui.progress(_('writing'), None)
123 ui.progress(_('writing'), None)
123
124
124 def report(ui, r1, r2):
125 def report(ui, r1, r2):
125 def getsize(r):
126 def getsize(r):
126 s = 0
127 s = 0
127 for fn in (r.indexfile, r.datafile):
128 for fn in (r.indexfile, r.datafile):
128 try:
129 try:
129 s += os.stat(fn).st_size
130 s += os.stat(fn).st_size
130 except OSError, inst:
131 except OSError, inst:
131 if inst.errno != errno.ENOENT:
132 if inst.errno != errno.ENOENT:
132 raise
133 raise
133 return s
134 return s
134
135
135 oldsize = float(getsize(r1))
136 oldsize = float(getsize(r1))
136 newsize = float(getsize(r2))
137 newsize = float(getsize(r2))
137
138
138 # argh: have to pass an int to %d, because a float >= 2^32
139 # argh: have to pass an int to %d, because a float >= 2^32
139 # blows up under Python 2.5 or earlier
140 # blows up under Python 2.5 or earlier
140 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 % (int(oldsize), oldsize / 1024 / 1024))
142 % (int(oldsize), oldsize / 1024 / 1024))
142 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 % (int(newsize), newsize / 1024 / 1024))
144 % (int(newsize), newsize / 1024 / 1024))
144
145
145 shrink_percent = (oldsize - newsize) / oldsize * 100
146 shrink_percent = (oldsize - newsize) / oldsize * 100
146 shrink_factor = oldsize / newsize
147 shrink_factor = oldsize / newsize
147 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 % (shrink_percent, shrink_factor))
149 % (shrink_percent, shrink_factor))
149
150
150 def shrink(ui, repo, **opts):
151 def shrink(ui, repo, **opts):
151 """shrink a revlog by reordering revisions
152 """shrink a revlog by reordering revisions
152
153
153 Rewrites all the entries in some revlog of the current repository
154 Rewrites all the entries in some revlog of the current repository
154 (by default, the manifest log) to save space.
155 (by default, the manifest log) to save space.
155
156
156 Different sort algorithms have different performance
157 Different sort algorithms have different performance
157 characteristics. Use ``--sort`` to select a sort algorithm so you
158 characteristics. Use ``--sort`` to select a sort algorithm so you
158 can determine which works best for your data.
159 can determine which works best for your data.
159 """
160 """
160
161
161 if not repo.local():
162 if not repo.local():
162 raise util.Abort(_('not a local repository: %s') % repo.root)
163 raise util.Abort(_('not a local repository: %s') % repo.root)
163
164
164 fn = opts.get('revlog')
165 fn = opts.get('revlog')
165 if not fn:
166 if not fn:
166 indexfn = repo.sjoin('00manifest.i')
167 indexfn = repo.sjoin('00manifest.i')
167 else:
168 else:
168 if not fn.endswith('.i'):
169 if not fn.endswith('.i'):
169 raise util.Abort(_('--revlog option must specify the revlog index '
170 raise util.Abort(_('--revlog option must specify the revlog index '
170 'file (*.i), not %s') % opts.get('revlog'))
171 'file (*.i), not %s') % opts.get('revlog'))
171
172
172 indexfn = os.path.realpath(fn)
173 indexfn = os.path.realpath(fn)
173 store = repo.sjoin('')
174 store = repo.sjoin('')
174 if not indexfn.startswith(store):
175 if not indexfn.startswith(store):
175 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 'not %s') % (store, indexfn))
177 'not %s') % (store, indexfn))
177
178
178 sortname = opts['sort']
179 sortname = opts['sort']
179 try:
180 try:
180 toposort = globals()['toposort_' + sortname]
181 toposort = globals()['toposort_' + sortname]
181 except KeyError:
182 except KeyError:
182 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183
184
184 if not os.path.exists(indexfn):
185 if not os.path.exists(indexfn):
185 raise util.Abort(_('no such file: %s') % indexfn)
186 raise util.Abort(_('no such file: %s') % indexfn)
186 if '00changelog' in indexfn:
187 if '00changelog' in indexfn:
187 raise util.Abort(_('shrinking the changelog '
188 raise util.Abort(_('shrinking the changelog '
188 'will corrupt your repository'))
189 'will corrupt your repository'))
189
190
190 ui.write(_('shrinking %s\n') % indexfn)
191 ui.write(_('shrinking %s\n') % indexfn)
191 prefix = os.path.basename(indexfn)[:-1]
192 prefix = os.path.basename(indexfn)[:-1]
192 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
193 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
193
194
194 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196
197
197 datafn, tmpdatafn = r1.datafile, r2.datafile
198 datafn, tmpdatafn = r1.datafile, r2.datafile
198
199
199 oldindexfn = indexfn + '.old'
200 oldindexfn = indexfn + '.old'
200 olddatafn = datafn + '.old'
201 olddatafn = datafn + '.old'
201 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 raise util.Abort(_('one or both of\n'
203 raise util.Abort(_('one or both of\n'
203 ' %s\n'
204 ' %s\n'
204 ' %s\n'
205 ' %s\n'
205 'exists from a previous run; please clean up '
206 'exists from a previous run; please clean up '
206 'before running again') % (oldindexfn, olddatafn))
207 'before running again') % (oldindexfn, olddatafn))
207
208
208 # Don't use repo.transaction(), because then things get hairy with
209 # Don't use repo.transaction(), because then things get hairy with
209 # paths: some need to be relative to .hg, and some need to be
210 # paths: some need to be relative to .hg, and some need to be
210 # absolute. Doing it this way keeps things simple: everything is an
211 # absolute. Doing it this way keeps things simple: everything is an
211 # absolute path.
212 # absolute path.
212 lock = repo.lock(wait=False)
213 lock = repo.lock(wait=False)
213 tr = transaction.transaction(ui.warn,
214 tr = transaction.transaction(ui.warn,
214 open,
215 open,
215 repo.sjoin('journal'))
216 repo.sjoin('journal'))
216
217
217 def ignoremissing(func):
218 def ignoremissing(func):
218 def f(*args, **kw):
219 def f(*args, **kw):
219 try:
220 try:
220 return func(*args, **kw)
221 return func(*args, **kw)
221 except OSError, inst:
222 except OSError, inst:
222 if inst.errno != errno.ENOENT:
223 if inst.errno != errno.ENOENT:
223 raise
224 raise
224 return f
225 return f
225
226
226 try:
227 try:
227 try:
228 try:
228 order = toposort(ui, r1)
229 order = toposort(ui, r1)
229
230
230 suboptimal = 0
231 suboptimal = 0
231 for i in xrange(1, len(order)):
232 for i in xrange(1, len(order)):
232 parents = [p for p in r1.parentrevs(order[i])
233 parents = [p for p in r1.parentrevs(order[i])
233 if p != node.nullrev]
234 if p != node.nullrev]
234 if parents and order[i - 1] not in parents:
235 if parents and order[i - 1] not in parents:
235 suboptimal += 1
236 suboptimal += 1
236 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237
238
238 writerevs(ui, r1, r2, order, tr)
239 writerevs(ui, r1, r2, order, tr)
239 report(ui, r1, r2)
240 report(ui, r1, r2)
240 tr.close()
241 tr.close()
241 except:
242 except:
242 # Abort transaction first, so we truncate the files before
243 # Abort transaction first, so we truncate the files before
243 # deleting them.
244 # deleting them.
244 tr.abort()
245 tr.abort()
245 for fn in (tmpindexfn, tmpdatafn):
246 for fn in (tmpindexfn, tmpdatafn):
246 ignoremissing(os.unlink)(fn)
247 ignoremissing(os.unlink)(fn)
247 raise
248 raise
248 if not opts.get('dry_run'):
249 if not opts.get('dry_run'):
249 # racy, both files cannot be renamed atomically
250 # racy, both files cannot be renamed atomically
250 # copy files
251 # copy files
251 util.os_link(indexfn, oldindexfn)
252 util.os_link(indexfn, oldindexfn)
252 ignoremissing(util.os_link)(datafn, olddatafn)
253 ignoremissing(util.os_link)(datafn, olddatafn)
253
254
254 # rename
255 # rename
255 util.rename(tmpindexfn, indexfn)
256 util.rename(tmpindexfn, indexfn)
256 try:
257 try:
257 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 util.rename(tmpdatafn, datafn)
259 util.rename(tmpdatafn, datafn)
259 except OSError, inst:
260 except OSError, inst:
260 if inst.errno != errno.ENOENT:
261 if inst.errno != errno.ENOENT:
261 raise
262 raise
262 ignoremissing(os.unlink)(datafn)
263 ignoremissing(os.unlink)(datafn)
263 else:
264 else:
264 for fn in (tmpindexfn, tmpdatafn):
265 for fn in (tmpindexfn, tmpdatafn):
265 ignoremissing(os.unlink)(fn)
266 ignoremissing(os.unlink)(fn)
266 finally:
267 finally:
267 lock.release()
268 lock.release()
268
269
269 if not opts.get('dry_run'):
270 if not opts.get('dry_run'):
270 ui.write(_('note: old revlog saved in:\n'
271 ui.write(_('note: old revlog saved in:\n'
271 ' %s\n'
272 ' %s\n'
272 ' %s\n'
273 ' %s\n'
273 '(You can delete those files when you are satisfied that your\n'
274 '(You can delete those files when you are satisfied that your\n'
274 'repository is still sane. '
275 'repository is still sane. '
275 'Running \'hg verify\' is strongly recommended.)\n')
276 'Running \'hg verify\' is strongly recommended.)\n')
276 % (oldindexfn, olddatafn))
277 % (oldindexfn, olddatafn))
277
278
278 cmdtable = {
279 cmdtable = {
279 'shrink': (shrink,
280 'shrink': (shrink,
280 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 ],
284 ],
284 _('hg shrink [--revlog PATH]'))
285 _('hg shrink [--revlog PATH]'))
285 }
286 }
286
287
287 if __name__ == "__main__":
288 if __name__ == "__main__":
288 print "shrink-revlog.py is now an extension (see hg help extensions)"
289 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now