##// END OF EJS Templates
shrink-revlog: update util.opener to scmutil.opener after d13913355390
Augie Fackler -
r14029:83d3f87c default
parent child Browse files
Show More
@@ -1,287 +1,287 b''
1 """reorder a revlog (the manifest by default) to save space
1 """reorder a revlog (the manifest by default) to save space
2
2
3 Specifically, this topologically sorts the revisions in the revlog so that
3 Specifically, this topologically sorts the revisions in the revlog so that
4 revisions on the same branch are adjacent as much as possible. This is a
4 revisions on the same branch are adjacent as much as possible. This is a
5 workaround for the fact that Mercurial computes deltas relative to the
5 workaround for the fact that Mercurial computes deltas relative to the
6 previous revision rather than relative to a parent revision.
6 previous revision rather than relative to a parent revision.
7
7
8 This is *not* safe to run on a changelog.
8 This is *not* safe to run on a changelog.
9 """
9 """
10
10
11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
13 # renamed by Greg Ward <greg at gerg.ca>.
13 # renamed by Greg Ward <greg at gerg.ca>.
14
14
15 # XXX would be nice to have a way to verify the repository after shrinking,
15 # XXX would be nice to have a way to verify the repository after shrinking,
16 # e.g. by comparing "before" and "after" states of random changesets
16 # e.g. by comparing "before" and "after" states of random changesets
17 # (maybe: export before, shrink, export after, diff).
17 # (maybe: export before, shrink, export after, diff).
18
18
19 import os, tempfile, errno
19 import os, tempfile, errno
20 from mercurial import revlog, transaction, node, util
20 from mercurial import revlog, transaction, node, util, scmutil
21 from mercurial import changegroup
21 from mercurial import changegroup
22 from mercurial.i18n import _
22 from mercurial.i18n import _
23
23
24
24
25 def postorder(start, edges):
25 def postorder(start, edges):
26 result = []
26 result = []
27 visit = list(start)
27 visit = list(start)
28 finished = set()
28 finished = set()
29
29
30 while visit:
30 while visit:
31 cur = visit[-1]
31 cur = visit[-1]
32 for p in edges[cur]:
32 for p in edges[cur]:
33 if p not in finished:
33 if p not in finished:
34 visit.append(p)
34 visit.append(p)
35 break
35 break
36 else:
36 else:
37 result.append(cur)
37 result.append(cur)
38 finished.add(cur)
38 finished.add(cur)
39 visit.pop()
39 visit.pop()
40
40
41 return result
41 return result
42
42
43 def toposort_reversepostorder(ui, rl):
43 def toposort_reversepostorder(ui, rl):
44 # postorder of the reverse directed graph
44 # postorder of the reverse directed graph
45
45
46 # map rev to list of parent revs (p2 first)
46 # map rev to list of parent revs (p2 first)
47 parents = {}
47 parents = {}
48 heads = set()
48 heads = set()
49 ui.status(_('reading revs\n'))
49 ui.status(_('reading revs\n'))
50 try:
50 try:
51 for rev in rl:
51 for rev in rl:
52 ui.progress(_('reading'), rev, total=len(rl))
52 ui.progress(_('reading'), rev, total=len(rl))
53 (p1, p2) = rl.parentrevs(rev)
53 (p1, p2) = rl.parentrevs(rev)
54 if p1 == p2 == node.nullrev:
54 if p1 == p2 == node.nullrev:
55 parents[rev] = () # root node
55 parents[rev] = () # root node
56 elif p1 == p2 or p2 == node.nullrev:
56 elif p1 == p2 or p2 == node.nullrev:
57 parents[rev] = (p1,) # normal node
57 parents[rev] = (p1,) # normal node
58 else:
58 else:
59 parents[rev] = (p2, p1) # merge node
59 parents[rev] = (p2, p1) # merge node
60 heads.add(rev)
60 heads.add(rev)
61 for p in parents[rev]:
61 for p in parents[rev]:
62 heads.discard(p)
62 heads.discard(p)
63 finally:
63 finally:
64 ui.progress(_('reading'), None)
64 ui.progress(_('reading'), None)
65
65
66 heads = list(heads)
66 heads = list(heads)
67 heads.sort(reverse=True)
67 heads.sort(reverse=True)
68
68
69 ui.status(_('sorting revs\n'))
69 ui.status(_('sorting revs\n'))
70 return postorder(heads, parents)
70 return postorder(heads, parents)
71
71
72 def toposort_postorderreverse(ui, rl):
72 def toposort_postorderreverse(ui, rl):
73 # reverse-postorder of the reverse directed graph
73 # reverse-postorder of the reverse directed graph
74
74
75 children = {}
75 children = {}
76 roots = set()
76 roots = set()
77 ui.status(_('reading revs\n'))
77 ui.status(_('reading revs\n'))
78 try:
78 try:
79 for rev in rl:
79 for rev in rl:
80 ui.progress(_('reading'), rev, total=len(rl))
80 ui.progress(_('reading'), rev, total=len(rl))
81 (p1, p2) = rl.parentrevs(rev)
81 (p1, p2) = rl.parentrevs(rev)
82 if p1 == p2 == node.nullrev:
82 if p1 == p2 == node.nullrev:
83 roots.add(rev)
83 roots.add(rev)
84 children[rev] = []
84 children[rev] = []
85 if p1 != node.nullrev:
85 if p1 != node.nullrev:
86 children[p1].append(rev)
86 children[p1].append(rev)
87 if p2 != node.nullrev:
87 if p2 != node.nullrev:
88 children[p2].append(rev)
88 children[p2].append(rev)
89 finally:
89 finally:
90 ui.progress(_('reading'), None)
90 ui.progress(_('reading'), None)
91
91
92 roots = list(roots)
92 roots = list(roots)
93 roots.sort()
93 roots.sort()
94
94
95 ui.status(_('sorting revs\n'))
95 ui.status(_('sorting revs\n'))
96 result = postorder(roots, children)
96 result = postorder(roots, children)
97 result.reverse()
97 result.reverse()
98 return result
98 return result
99
99
100 def writerevs(ui, r1, r2, order, tr):
100 def writerevs(ui, r1, r2, order, tr):
101
101
102 ui.status(_('writing revs\n'))
102 ui.status(_('writing revs\n'))
103
103
104
104
105 order = [r1.node(r) for r in order]
105 order = [r1.node(r) for r in order]
106
106
107 # this is a bit ugly, but it works
107 # this is a bit ugly, but it works
108 count = [0]
108 count = [0]
109 def lookup(x):
109 def lookup(x):
110 count[0] += 1
110 count[0] += 1
111 ui.progress(_('writing'), count[0], total=len(order))
111 ui.progress(_('writing'), count[0], total=len(order))
112 return "%020d" % r1.linkrev(r1.rev(x))
112 return "%020d" % r1.linkrev(r1.rev(x))
113
113
114 unlookup = lambda x: int(x, 10)
114 unlookup = lambda x: int(x, 10)
115
115
116 try:
116 try:
117 group = util.chunkbuffer(r1.group(order, lookup, progress))
117 group = util.chunkbuffer(r1.group(order, lookup, progress))
118 group = changegroup.unbundle10(group, "UN")
118 group = changegroup.unbundle10(group, "UN")
119 r2.addgroup(group, unlookup, tr)
119 r2.addgroup(group, unlookup, tr)
120 finally:
120 finally:
121 ui.progress(_('writing'), None)
121 ui.progress(_('writing'), None)
122
122
123 def report(ui, r1, r2):
123 def report(ui, r1, r2):
124 def getsize(r):
124 def getsize(r):
125 s = 0
125 s = 0
126 for fn in (r.indexfile, r.datafile):
126 for fn in (r.indexfile, r.datafile):
127 try:
127 try:
128 s += os.stat(fn).st_size
128 s += os.stat(fn).st_size
129 except OSError, inst:
129 except OSError, inst:
130 if inst.errno != errno.ENOENT:
130 if inst.errno != errno.ENOENT:
131 raise
131 raise
132 return s
132 return s
133
133
134 oldsize = float(getsize(r1))
134 oldsize = float(getsize(r1))
135 newsize = float(getsize(r2))
135 newsize = float(getsize(r2))
136
136
137 # argh: have to pass an int to %d, because a float >= 2^32
137 # argh: have to pass an int to %d, because a float >= 2^32
138 # blows up under Python 2.5 or earlier
138 # blows up under Python 2.5 or earlier
139 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
139 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
140 % (int(oldsize), oldsize / 1024 / 1024))
140 % (int(oldsize), oldsize / 1024 / 1024))
141 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
141 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
142 % (int(newsize), newsize / 1024 / 1024))
142 % (int(newsize), newsize / 1024 / 1024))
143
143
144 shrink_percent = (oldsize - newsize) / oldsize * 100
144 shrink_percent = (oldsize - newsize) / oldsize * 100
145 shrink_factor = oldsize / newsize
145 shrink_factor = oldsize / newsize
146 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
146 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
147 % (shrink_percent, shrink_factor))
147 % (shrink_percent, shrink_factor))
148
148
149 def shrink(ui, repo, **opts):
149 def shrink(ui, repo, **opts):
150 """shrink a revlog by reordering revisions
150 """shrink a revlog by reordering revisions
151
151
152 Rewrites all the entries in some revlog of the current repository
152 Rewrites all the entries in some revlog of the current repository
153 (by default, the manifest log) to save space.
153 (by default, the manifest log) to save space.
154
154
155 Different sort algorithms have different performance
155 Different sort algorithms have different performance
156 characteristics. Use ``--sort`` to select a sort algorithm so you
156 characteristics. Use ``--sort`` to select a sort algorithm so you
157 can determine which works best for your data.
157 can determine which works best for your data.
158 """
158 """
159
159
160 if not repo.local():
160 if not repo.local():
161 raise util.Abort(_('not a local repository: %s') % repo.root)
161 raise util.Abort(_('not a local repository: %s') % repo.root)
162
162
163 fn = opts.get('revlog')
163 fn = opts.get('revlog')
164 if not fn:
164 if not fn:
165 indexfn = repo.sjoin('00manifest.i')
165 indexfn = repo.sjoin('00manifest.i')
166 else:
166 else:
167 if not fn.endswith('.i'):
167 if not fn.endswith('.i'):
168 raise util.Abort(_('--revlog option must specify the revlog index '
168 raise util.Abort(_('--revlog option must specify the revlog index '
169 'file (*.i), not %s') % opts.get('revlog'))
169 'file (*.i), not %s') % opts.get('revlog'))
170
170
171 indexfn = os.path.realpath(fn)
171 indexfn = os.path.realpath(fn)
172 store = repo.sjoin('')
172 store = repo.sjoin('')
173 if not indexfn.startswith(store):
173 if not indexfn.startswith(store):
174 raise util.Abort(_('--revlog option must specify a revlog in %s, '
174 raise util.Abort(_('--revlog option must specify a revlog in %s, '
175 'not %s') % (store, indexfn))
175 'not %s') % (store, indexfn))
176
176
177 sortname = opts['sort']
177 sortname = opts['sort']
178 try:
178 try:
179 toposort = globals()['toposort_' + sortname]
179 toposort = globals()['toposort_' + sortname]
180 except KeyError:
180 except KeyError:
181 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
181 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
182
182
183 if not os.path.exists(indexfn):
183 if not os.path.exists(indexfn):
184 raise util.Abort(_('no such file: %s') % indexfn)
184 raise util.Abort(_('no such file: %s') % indexfn)
185 if '00changelog' in indexfn:
185 if '00changelog' in indexfn:
186 raise util.Abort(_('shrinking the changelog '
186 raise util.Abort(_('shrinking the changelog '
187 'will corrupt your repository'))
187 'will corrupt your repository'))
188
188
189 ui.write(_('shrinking %s\n') % indexfn)
189 ui.write(_('shrinking %s\n') % indexfn)
190 prefix = os.path.basename(indexfn)[:-1]
190 prefix = os.path.basename(indexfn)[:-1]
191 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
191 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
192
192
193 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
193 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
194 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
194 r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
195
195
196 datafn, tmpdatafn = r1.datafile, r2.datafile
196 datafn, tmpdatafn = r1.datafile, r2.datafile
197
197
198 oldindexfn = indexfn + '.old'
198 oldindexfn = indexfn + '.old'
199 olddatafn = datafn + '.old'
199 olddatafn = datafn + '.old'
200 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
200 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
201 raise util.Abort(_('one or both of\n'
201 raise util.Abort(_('one or both of\n'
202 ' %s\n'
202 ' %s\n'
203 ' %s\n'
203 ' %s\n'
204 'exists from a previous run; please clean up '
204 'exists from a previous run; please clean up '
205 'before running again') % (oldindexfn, olddatafn))
205 'before running again') % (oldindexfn, olddatafn))
206
206
207 # Don't use repo.transaction(), because then things get hairy with
207 # Don't use repo.transaction(), because then things get hairy with
208 # paths: some need to be relative to .hg, and some need to be
208 # paths: some need to be relative to .hg, and some need to be
209 # absolute. Doing it this way keeps things simple: everything is an
209 # absolute. Doing it this way keeps things simple: everything is an
210 # absolute path.
210 # absolute path.
211 lock = repo.lock(wait=False)
211 lock = repo.lock(wait=False)
212 tr = transaction.transaction(ui.warn,
212 tr = transaction.transaction(ui.warn,
213 open,
213 open,
214 repo.sjoin('journal'))
214 repo.sjoin('journal'))
215
215
216 def ignoremissing(func):
216 def ignoremissing(func):
217 def f(*args, **kw):
217 def f(*args, **kw):
218 try:
218 try:
219 return func(*args, **kw)
219 return func(*args, **kw)
220 except OSError, inst:
220 except OSError, inst:
221 if inst.errno != errno.ENOENT:
221 if inst.errno != errno.ENOENT:
222 raise
222 raise
223 return f
223 return f
224
224
225 try:
225 try:
226 try:
226 try:
227 order = toposort(ui, r1)
227 order = toposort(ui, r1)
228
228
229 suboptimal = 0
229 suboptimal = 0
230 for i in xrange(1, len(order)):
230 for i in xrange(1, len(order)):
231 parents = [p for p in r1.parentrevs(order[i])
231 parents = [p for p in r1.parentrevs(order[i])
232 if p != node.nullrev]
232 if p != node.nullrev]
233 if parents and order[i - 1] not in parents:
233 if parents and order[i - 1] not in parents:
234 suboptimal += 1
234 suboptimal += 1
235 ui.note(_('%d suboptimal nodes\n') % suboptimal)
235 ui.note(_('%d suboptimal nodes\n') % suboptimal)
236
236
237 writerevs(ui, r1, r2, order, tr)
237 writerevs(ui, r1, r2, order, tr)
238 report(ui, r1, r2)
238 report(ui, r1, r2)
239 tr.close()
239 tr.close()
240 except:
240 except:
241 # Abort transaction first, so we truncate the files before
241 # Abort transaction first, so we truncate the files before
242 # deleting them.
242 # deleting them.
243 tr.abort()
243 tr.abort()
244 for fn in (tmpindexfn, tmpdatafn):
244 for fn in (tmpindexfn, tmpdatafn):
245 ignoremissing(os.unlink)(fn)
245 ignoremissing(os.unlink)(fn)
246 raise
246 raise
247 if not opts.get('dry_run'):
247 if not opts.get('dry_run'):
248 # racy, both files cannot be renamed atomically
248 # racy, both files cannot be renamed atomically
249 # copy files
249 # copy files
250 util.os_link(indexfn, oldindexfn)
250 util.os_link(indexfn, oldindexfn)
251 ignoremissing(util.os_link)(datafn, olddatafn)
251 ignoremissing(util.os_link)(datafn, olddatafn)
252
252
253 # rename
253 # rename
254 util.rename(tmpindexfn, indexfn)
254 util.rename(tmpindexfn, indexfn)
255 try:
255 try:
256 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
256 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
257 util.rename(tmpdatafn, datafn)
257 util.rename(tmpdatafn, datafn)
258 except OSError, inst:
258 except OSError, inst:
259 if inst.errno != errno.ENOENT:
259 if inst.errno != errno.ENOENT:
260 raise
260 raise
261 ignoremissing(os.unlink)(datafn)
261 ignoremissing(os.unlink)(datafn)
262 else:
262 else:
263 for fn in (tmpindexfn, tmpdatafn):
263 for fn in (tmpindexfn, tmpdatafn):
264 ignoremissing(os.unlink)(fn)
264 ignoremissing(os.unlink)(fn)
265 finally:
265 finally:
266 lock.release()
266 lock.release()
267
267
268 if not opts.get('dry_run'):
268 if not opts.get('dry_run'):
269 ui.write(_('note: old revlog saved in:\n'
269 ui.write(_('note: old revlog saved in:\n'
270 ' %s\n'
270 ' %s\n'
271 ' %s\n'
271 ' %s\n'
272 '(You can delete those files when you are satisfied that your\n'
272 '(You can delete those files when you are satisfied that your\n'
273 'repository is still sane. '
273 'repository is still sane. '
274 'Running \'hg verify\' is strongly recommended.)\n')
274 'Running \'hg verify\' is strongly recommended.)\n')
275 % (oldindexfn, olddatafn))
275 % (oldindexfn, olddatafn))
276
276
277 cmdtable = {
277 cmdtable = {
278 'shrink': (shrink,
278 'shrink': (shrink,
279 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
279 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
280 ('n', 'dry-run', None, _('do not shrink, simulate only')),
280 ('n', 'dry-run', None, _('do not shrink, simulate only')),
281 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
281 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
282 ],
282 ],
283 _('hg shrink [--revlog PATH]'))
283 _('hg shrink [--revlog PATH]'))
284 }
284 }
285
285
286 if __name__ == "__main__":
286 if __name__ == "__main__":
287 print "shrink-revlog.py is now an extension (see hg help extensions)"
287 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now