##// END OF EJS Templates
shrink-revlog: use util.mktempcopy() to preserve mode of index file....
Greg Ward -
r11294:7b5d05e0 default
parent child Browse files
Show More
@@ -1,295 +1,289 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import os, tempfile, errno
22 import os, tempfile, errno
23 from mercurial import revlog, transaction, node, util
23 from mercurial import revlog, transaction, node, util
24 from mercurial import changegroup
24 from mercurial import changegroup
25 from mercurial.i18n import _
25 from mercurial.i18n import _
26
26
27
27
28 def postorder(start, edges):
28 def postorder(start, edges):
29 result = []
29 result = []
30 visit = list(start)
30 visit = list(start)
31 finished = set()
31 finished = set()
32
32
33 while visit:
33 while visit:
34 cur = visit[-1]
34 cur = visit[-1]
35 for p in edges[cur]:
35 for p in edges[cur]:
36 if p not in finished:
36 if p not in finished:
37 visit.append(p)
37 visit.append(p)
38 break
38 break
39 else:
39 else:
40 result.append(cur)
40 result.append(cur)
41 finished.add(cur)
41 finished.add(cur)
42 visit.pop()
42 visit.pop()
43
43
44 return result
44 return result
45
45
46 def toposort_reversepostorder(ui, rl):
46 def toposort_reversepostorder(ui, rl):
47 # postorder of the reverse directed graph
47 # postorder of the reverse directed graph
48
48
49 # map rev to list of parent revs (p2 first)
49 # map rev to list of parent revs (p2 first)
50 parents = {}
50 parents = {}
51 heads = set()
51 heads = set()
52 ui.status(_('reading revs\n'))
52 ui.status(_('reading revs\n'))
53 try:
53 try:
54 for rev in rl:
54 for rev in rl:
55 ui.progress(_('reading'), rev, total=len(rl))
55 ui.progress(_('reading'), rev, total=len(rl))
56 (p1, p2) = rl.parentrevs(rev)
56 (p1, p2) = rl.parentrevs(rev)
57 if p1 == p2 == node.nullrev:
57 if p1 == p2 == node.nullrev:
58 parents[rev] = () # root node
58 parents[rev] = () # root node
59 elif p1 == p2 or p2 == node.nullrev:
59 elif p1 == p2 or p2 == node.nullrev:
60 parents[rev] = (p1,) # normal node
60 parents[rev] = (p1,) # normal node
61 else:
61 else:
62 parents[rev] = (p2, p1) # merge node
62 parents[rev] = (p2, p1) # merge node
63 heads.add(rev)
63 heads.add(rev)
64 for p in parents[rev]:
64 for p in parents[rev]:
65 heads.discard(p)
65 heads.discard(p)
66 finally:
66 finally:
67 ui.progress(_('reading'), None)
67 ui.progress(_('reading'), None)
68
68
69 heads = list(heads)
69 heads = list(heads)
70 heads.sort(reverse=True)
70 heads.sort(reverse=True)
71
71
72 ui.status(_('sorting revs\n'))
72 ui.status(_('sorting revs\n'))
73 return postorder(heads, parents)
73 return postorder(heads, parents)
74
74
75 def toposort_postorderreverse(ui, rl):
75 def toposort_postorderreverse(ui, rl):
76 # reverse-postorder of the reverse directed graph
76 # reverse-postorder of the reverse directed graph
77
77
78 children = {}
78 children = {}
79 roots = set()
79 roots = set()
80 ui.status(_('reading revs\n'))
80 ui.status(_('reading revs\n'))
81 try:
81 try:
82 for rev in rl:
82 for rev in rl:
83 ui.progress(_('reading'), rev, total=len(rl))
83 ui.progress(_('reading'), rev, total=len(rl))
84 (p1, p2) = rl.parentrevs(rev)
84 (p1, p2) = rl.parentrevs(rev)
85 if p1 == p2 == node.nullrev:
85 if p1 == p2 == node.nullrev:
86 roots.add(rev)
86 roots.add(rev)
87 children[rev] = []
87 children[rev] = []
88 if p1 != node.nullrev:
88 if p1 != node.nullrev:
89 children[p1].append(rev)
89 children[p1].append(rev)
90 if p2 != node.nullrev:
90 if p2 != node.nullrev:
91 children[p2].append(rev)
91 children[p2].append(rev)
92 finally:
92 finally:
93 ui.progress(_('reading'), None)
93 ui.progress(_('reading'), None)
94
94
95 root = list(roots)
95 root = list(roots)
96 roots.sort()
96 roots.sort()
97
97
98 ui.status(_('sorting revs\n'))
98 ui.status(_('sorting revs\n'))
99 result = postorder(roots, children)
99 result = postorder(roots, children)
100 result.reverse()
100 result.reverse()
101 return result
101 return result
102
102
103 def writerevs(ui, r1, r2, order, tr):
103 def writerevs(ui, r1, r2, order, tr):
104
104
105 ui.status(_('writing revs\n'))
105 ui.status(_('writing revs\n'))
106
106
107 count = [0]
107 count = [0]
108 def progress(*args):
108 def progress(*args):
109 ui.progress(_('writing'), count[0], total=len(order))
109 ui.progress(_('writing'), count[0], total=len(order))
110 count[0] += 1
110 count[0] += 1
111
111
112 order = [r1.node(r) for r in order]
112 order = [r1.node(r) for r in order]
113
113
114 # this is a bit ugly, but it works
114 # this is a bit ugly, but it works
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 unlookup = lambda x: int(x, 10)
116 unlookup = lambda x: int(x, 10)
117
117
118 try:
118 try:
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 chunkiter = changegroup.chunkiter(group)
120 chunkiter = changegroup.chunkiter(group)
121 r2.addgroup(chunkiter, unlookup, tr)
121 r2.addgroup(chunkiter, unlookup, tr)
122 finally:
122 finally:
123 ui.progress(_('writing'), None)
123 ui.progress(_('writing'), None)
124
124
125 def report(ui, r1, r2):
125 def report(ui, r1, r2):
126 def getsize(r):
126 def getsize(r):
127 s = 0
127 s = 0
128 for fn in (r.indexfile, r.datafile):
128 for fn in (r.indexfile, r.datafile):
129 try:
129 try:
130 s += os.stat(fn).st_size
130 s += os.stat(fn).st_size
131 except OSError, inst:
131 except OSError, inst:
132 if inst.errno != errno.ENOENT:
132 if inst.errno != errno.ENOENT:
133 raise
133 raise
134 return s
134 return s
135
135
136 oldsize = float(getsize(r1))
136 oldsize = float(getsize(r1))
137 newsize = float(getsize(r2))
137 newsize = float(getsize(r2))
138
138
139 # argh: have to pass an int to %d, because a float >= 2^32
139 # argh: have to pass an int to %d, because a float >= 2^32
140 # blows up under Python 2.5 or earlier
140 # blows up under Python 2.5 or earlier
141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
142 % (int(oldsize), oldsize / 1024 / 1024))
142 % (int(oldsize), oldsize / 1024 / 1024))
143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
144 % (int(newsize), newsize / 1024 / 1024))
144 % (int(newsize), newsize / 1024 / 1024))
145
145
146 shrink_percent = (oldsize - newsize) / oldsize * 100
146 shrink_percent = (oldsize - newsize) / oldsize * 100
147 shrink_factor = oldsize / newsize
147 shrink_factor = oldsize / newsize
148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
149 % (shrink_percent, shrink_factor))
149 % (shrink_percent, shrink_factor))
150
150
151 def shrink(ui, repo, **opts):
151 def shrink(ui, repo, **opts):
152 """shrink a revlog by reordering revisions
152 """shrink a revlog by reordering revisions
153
153
154 Rewrites all the entries in some revlog of the current repository
154 Rewrites all the entries in some revlog of the current repository
155 (by default, the manifest log) to save space.
155 (by default, the manifest log) to save space.
156
156
157 Different sort algorithms have different performance
157 Different sort algorithms have different performance
158 characteristics. Use ``--sort`` to select a sort algorithm so you
158 characteristics. Use ``--sort`` to select a sort algorithm so you
159 can determine which works best for your data.
159 can determine which works best for your data.
160 """
160 """
161
161
162 if not repo.local():
162 if not repo.local():
163 raise util.Abort(_('not a local repository: %s') % repo.root)
163 raise util.Abort(_('not a local repository: %s') % repo.root)
164
164
165 fn = opts.get('revlog')
165 fn = opts.get('revlog')
166 if not fn:
166 if not fn:
167 indexfn = repo.sjoin('00manifest.i')
167 indexfn = repo.sjoin('00manifest.i')
168 else:
168 else:
169 if not fn.endswith('.i'):
169 if not fn.endswith('.i'):
170 raise util.Abort(_('--revlog option must specify the revlog index '
170 raise util.Abort(_('--revlog option must specify the revlog index '
171 'file (*.i), not %s') % opts.get('revlog'))
171 'file (*.i), not %s') % opts.get('revlog'))
172
172
173 indexfn = os.path.realpath(fn)
173 indexfn = os.path.realpath(fn)
174 store = repo.sjoin('')
174 store = repo.sjoin('')
175 if not indexfn.startswith(store):
175 if not indexfn.startswith(store):
176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
177 'not %s') % (store, indexfn))
177 'not %s') % (store, indexfn))
178
178
179 sortname = opts['sort']
179 sortname = opts['sort']
180 try:
180 try:
181 toposort = globals()['toposort_' + sortname]
181 toposort = globals()['toposort_' + sortname]
182 except KeyError:
182 except KeyError:
183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
184
184
185 if not os.path.exists(indexfn):
185 if not os.path.exists(indexfn):
186 raise util.Abort(_('no such file: %s') % indexfn)
186 raise util.Abort(_('no such file: %s') % indexfn)
187 if '00changelog' in indexfn:
187 if '00changelog' in indexfn:
188 raise util.Abort(_('shrinking the changelog '
188 raise util.Abort(_('shrinking the changelog '
189 'will corrupt your repository'))
189 'will corrupt your repository'))
190
190
191 ui.write(_('shrinking %s\n') % indexfn)
191 ui.write(_('shrinking %s\n') % indexfn)
192 prefix = os.path.basename(indexfn)[:-1]
192 prefix = os.path.basename(indexfn)[:-1]
193 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
193 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
194 prefix=prefix,
195 suffix='.i')
196 os.close(tmpfd)
197
194
198 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
199 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
200
197
201 datafn, tmpdatafn = r1.datafile, r2.datafile
198 datafn, tmpdatafn = r1.datafile, r2.datafile
202
199
203 oldindexfn = indexfn + '.old'
200 oldindexfn = indexfn + '.old'
204 olddatafn = datafn + '.old'
201 olddatafn = datafn + '.old'
205 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
206 raise util.Abort(_('one or both of\n'
203 raise util.Abort(_('one or both of\n'
207 ' %s\n'
204 ' %s\n'
208 ' %s\n'
205 ' %s\n'
209 'exists from a previous run; please clean up '
206 'exists from a previous run; please clean up '
210 'before running again') % (oldindexfn, olddatafn))
207 'before running again') % (oldindexfn, olddatafn))
211
208
212 # Don't use repo.transaction(), because then things get hairy with
209 # Don't use repo.transaction(), because then things get hairy with
213 # paths: some need to be relative to .hg, and some need to be
210 # paths: some need to be relative to .hg, and some need to be
214 # absolute. Doing it this way keeps things simple: everything is an
211 # absolute. Doing it this way keeps things simple: everything is an
215 # absolute path.
212 # absolute path.
216 lock = repo.lock(wait=False)
213 lock = repo.lock(wait=False)
217 tr = transaction.transaction(ui.warn,
214 tr = transaction.transaction(ui.warn,
218 open,
215 open,
219 repo.sjoin('journal'))
216 repo.sjoin('journal'))
220
217
221 def ignoremissing(func):
218 def ignoremissing(func):
222 def f(*args, **kw):
219 def f(*args, **kw):
223 try:
220 try:
224 return func(*args, **kw)
221 return func(*args, **kw)
225 except OSError, inst:
222 except OSError, inst:
226 if inst.errno != errno.ENOENT:
223 if inst.errno != errno.ENOENT:
227 raise
224 raise
228 return f
225 return f
229
226
230 try:
227 try:
231 try:
228 try:
232 order = toposort(ui, r1)
229 order = toposort(ui, r1)
233
230
234 suboptimal = 0
231 suboptimal = 0
235 for i in xrange(1, len(order)):
232 for i in xrange(1, len(order)):
236 parents = [p for p in r1.parentrevs(order[i])
233 parents = [p for p in r1.parentrevs(order[i])
237 if p != node.nullrev]
234 if p != node.nullrev]
238 if parents and order[i - 1] not in parents:
235 if parents and order[i - 1] not in parents:
239 suboptimal += 1
236 suboptimal += 1
240 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237 ui.note(_('%d suboptimal nodes\n') % suboptimal)
241
238
242 writerevs(ui, r1, r2, order, tr)
239 writerevs(ui, r1, r2, order, tr)
243 report(ui, r1, r2)
240 report(ui, r1, r2)
244 tr.close()
241 tr.close()
245 except:
242 except:
246 # Abort transaction first, so we truncate the files before
243 # Abort transaction first, so we truncate the files before
247 # deleting them.
244 # deleting them.
248 tr.abort()
245 tr.abort()
249 for fn in (tmpindexfn, tmpdatafn):
246 for fn in (tmpindexfn, tmpdatafn):
250 ignoremissing(os.unlink)(fn)
247 ignoremissing(os.unlink)(fn)
251 raise
248 raise
252 if not opts.get('dry_run'):
249 if not opts.get('dry_run'):
253 # racy, both files cannot be renamed atomically
250 # racy, both files cannot be renamed atomically
254 # copy files
251 # copy files
255 util.os_link(indexfn, oldindexfn)
252 util.os_link(indexfn, oldindexfn)
256 ignoremissing(util.os_link)(datafn, olddatafn)
253 ignoremissing(util.os_link)(datafn, olddatafn)
257
254
258 # mkstemp() creates files only readable by the owner
259 os.chmod(tmpindexfn, os.stat(indexfn).st_mode)
260
261 # rename
255 # rename
262 util.rename(tmpindexfn, indexfn)
256 util.rename(tmpindexfn, indexfn)
263 try:
257 try:
264 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
265 util.rename(tmpdatafn, datafn)
259 util.rename(tmpdatafn, datafn)
266 except OSError, inst:
260 except OSError, inst:
267 if inst.errno != errno.ENOENT:
261 if inst.errno != errno.ENOENT:
268 raise
262 raise
269 ignoremissing(os.unlink)(datafn)
263 ignoremissing(os.unlink)(datafn)
270 else:
264 else:
271 for fn in (tmpindexfn, tmpdatafn):
265 for fn in (tmpindexfn, tmpdatafn):
272 ignoremissing(os.unlink)(fn)
266 ignoremissing(os.unlink)(fn)
273 finally:
267 finally:
274 lock.release()
268 lock.release()
275
269
276 if not opts.get('dry_run'):
270 if not opts.get('dry_run'):
277 ui.write(_('note: old revlog saved in:\n'
271 ui.write(_('note: old revlog saved in:\n'
278 ' %s\n'
272 ' %s\n'
279 ' %s\n'
273 ' %s\n'
280 '(You can delete those files when you are satisfied that your\n'
274 '(You can delete those files when you are satisfied that your\n'
281 'repository is still sane. '
275 'repository is still sane. '
282 'Running \'hg verify\' is strongly recommended.)\n')
276 'Running \'hg verify\' is strongly recommended.)\n')
283 % (oldindexfn, olddatafn))
277 % (oldindexfn, olddatafn))
284
278
285 cmdtable = {
279 cmdtable = {
286 'shrink': (shrink,
280 'shrink': (shrink,
287 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
288 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 ('n', 'dry-run', None, _('do not shrink, simulate only')),
289 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
290 ],
284 ],
291 _('hg shrink [--revlog PATH]'))
285 _('hg shrink [--revlog PATH]'))
292 }
286 }
293
287
294 if __name__ == "__main__":
288 if __name__ == "__main__":
295 print "shrink-revlog.py is now an extension (see hg help extensions)"
289 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now