##// END OF EJS Templates
shrink-revlog: make pyflakes happy
Greg Ward -
r16305:90ca62bb default
parent child Browse files
Show More
@@ -1,291 +1,290 b''
1 """reorder a revlog (the manifest by default) to save space
1 """reorder a revlog (the manifest by default) to save space
2
2
3 Specifically, this topologically sorts the revisions in the revlog so that
3 Specifically, this topologically sorts the revisions in the revlog so that
4 revisions on the same branch are adjacent as much as possible. This is a
4 revisions on the same branch are adjacent as much as possible. This is a
5 workaround for the fact that Mercurial computes deltas relative to the
5 workaround for the fact that Mercurial computes deltas relative to the
6 previous revision rather than relative to a parent revision.
6 previous revision rather than relative to a parent revision.
7
7
8 This is *not* safe to run on a changelog.
8 This is *not* safe to run on a changelog.
9 """
9 """
10
10
11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
13 # renamed by Greg Ward <greg at gerg.ca>.
13 # renamed by Greg Ward <greg at gerg.ca>.
14
14
15 # XXX would be nice to have a way to verify the repository after shrinking,
15 # XXX would be nice to have a way to verify the repository after shrinking,
16 # e.g. by comparing "before" and "after" states of random changesets
16 # e.g. by comparing "before" and "after" states of random changesets
17 # (maybe: export before, shrink, export after, diff).
17 # (maybe: export before, shrink, export after, diff).
18
18
19 import os, tempfile, errno
19 import os, errno
20 from mercurial import revlog, transaction, node, util, scmutil
20 from mercurial import revlog, transaction, node, util, scmutil
21 from mercurial import changegroup
21 from mercurial import changegroup
22 from mercurial.i18n import _
22 from mercurial.i18n import _
23
23
24
24
25 def postorder(start, edges):
25 def postorder(start, edges):
26 result = []
26 result = []
27 visit = list(start)
27 visit = list(start)
28 finished = set()
28 finished = set()
29
29
30 while visit:
30 while visit:
31 cur = visit[-1]
31 cur = visit[-1]
32 for p in edges[cur]:
32 for p in edges[cur]:
33 # defend against node.nullrev because it's occasionally
33 # defend against node.nullrev because it's occasionally
34 # possible for a node to have parents (null, something)
34 # possible for a node to have parents (null, something)
35 # rather than (something, null)
35 # rather than (something, null)
36 if p not in finished and p != node.nullrev:
36 if p not in finished and p != node.nullrev:
37 visit.append(p)
37 visit.append(p)
38 break
38 break
39 else:
39 else:
40 result.append(cur)
40 result.append(cur)
41 finished.add(cur)
41 finished.add(cur)
42 visit.pop()
42 visit.pop()
43
43
44 return result
44 return result
45
45
46 def toposort_reversepostorder(ui, rl):
46 def toposort_reversepostorder(ui, rl):
47 # postorder of the reverse directed graph
47 # postorder of the reverse directed graph
48
48
49 # map rev to list of parent revs (p2 first)
49 # map rev to list of parent revs (p2 first)
50 parents = {}
50 parents = {}
51 heads = set()
51 heads = set()
52 ui.status(_('reading revs\n'))
52 ui.status(_('reading revs\n'))
53 try:
53 try:
54 for rev in rl:
54 for rev in rl:
55 ui.progress(_('reading'), rev, total=len(rl))
55 ui.progress(_('reading'), rev, total=len(rl))
56 (p1, p2) = rl.parentrevs(rev)
56 (p1, p2) = rl.parentrevs(rev)
57 if p1 == p2 == node.nullrev:
57 if p1 == p2 == node.nullrev:
58 parents[rev] = () # root node
58 parents[rev] = () # root node
59 elif p1 == p2 or p2 == node.nullrev:
59 elif p1 == p2 or p2 == node.nullrev:
60 parents[rev] = (p1,) # normal node
60 parents[rev] = (p1,) # normal node
61 else:
61 else:
62 parents[rev] = (p2, p1) # merge node
62 parents[rev] = (p2, p1) # merge node
63 heads.add(rev)
63 heads.add(rev)
64 for p in parents[rev]:
64 for p in parents[rev]:
65 heads.discard(p)
65 heads.discard(p)
66 finally:
66 finally:
67 ui.progress(_('reading'), None)
67 ui.progress(_('reading'), None)
68
68
69 heads = list(heads)
69 heads = list(heads)
70 heads.sort(reverse=True)
70 heads.sort(reverse=True)
71
71
72 ui.status(_('sorting revs\n'))
72 ui.status(_('sorting revs\n'))
73 return postorder(heads, parents)
73 return postorder(heads, parents)
74
74
75 def toposort_postorderreverse(ui, rl):
75 def toposort_postorderreverse(ui, rl):
76 # reverse-postorder of the reverse directed graph
76 # reverse-postorder of the reverse directed graph
77
77
78 children = {}
78 children = {}
79 roots = set()
79 roots = set()
80 ui.status(_('reading revs\n'))
80 ui.status(_('reading revs\n'))
81 try:
81 try:
82 for rev in rl:
82 for rev in rl:
83 ui.progress(_('reading'), rev, total=len(rl))
83 ui.progress(_('reading'), rev, total=len(rl))
84 (p1, p2) = rl.parentrevs(rev)
84 (p1, p2) = rl.parentrevs(rev)
85 if p1 == p2 == node.nullrev:
85 if p1 == p2 == node.nullrev:
86 roots.add(rev)
86 roots.add(rev)
87 children[rev] = []
87 children[rev] = []
88 if p1 != node.nullrev:
88 if p1 != node.nullrev:
89 children[p1].append(rev)
89 children[p1].append(rev)
90 if p2 != node.nullrev:
90 if p2 != node.nullrev:
91 children[p2].append(rev)
91 children[p2].append(rev)
92 finally:
92 finally:
93 ui.progress(_('reading'), None)
93 ui.progress(_('reading'), None)
94
94
95 roots = list(roots)
95 roots = list(roots)
96 roots.sort()
96 roots.sort()
97
97
98 ui.status(_('sorting revs\n'))
98 ui.status(_('sorting revs\n'))
99 result = postorder(roots, children)
99 result = postorder(roots, children)
100 result.reverse()
100 result.reverse()
101 return result
101 return result
102
102
103 def writerevs(ui, r1, r2, order, tr):
103 def writerevs(ui, r1, r2, order, tr):
104
104
105 ui.status(_('writing revs\n'))
105 ui.status(_('writing revs\n'))
106
106
107
107
108 order = [r1.node(r) for r in order]
108 order = [r1.node(r) for r in order]
109
109
110 # this is a bit ugly, but it works
110 # this is a bit ugly, but it works
111 count = [0]
111 count = [0]
112 def lookup(revl, x):
112 def lookup(revl, x):
113 count[0] += 1
113 count[0] += 1
114 ui.progress(_('writing'), count[0], total=len(order))
114 ui.progress(_('writing'), count[0], total=len(order))
115 return "%020d" % revl.linkrev(revl.rev(x))
115 return "%020d" % revl.linkrev(revl.rev(x))
116
116
117 unlookup = lambda x: int(x, 10)
117 unlookup = lambda x: int(x, 10)
118
118
119 try:
119 try:
120 bundler = changegroup.bundle10(lookup)
120 bundler = changegroup.bundle10(lookup)
121 group = util.chunkbuffer(r1.group(order, bundler))
121 group = util.chunkbuffer(r1.group(order, bundler))
122 group = changegroup.unbundle10(group, "UN")
122 group = changegroup.unbundle10(group, "UN")
123 r2.addgroup(group, unlookup, tr)
123 r2.addgroup(group, unlookup, tr)
124 finally:
124 finally:
125 ui.progress(_('writing'), None)
125 ui.progress(_('writing'), None)
126
126
127 def report(ui, r1, r2):
127 def report(ui, r1, r2):
128 def getsize(r):
128 def getsize(r):
129 s = 0
129 s = 0
130 for fn in (r.indexfile, r.datafile):
130 for fn in (r.indexfile, r.datafile):
131 try:
131 try:
132 s += os.stat(fn).st_size
132 s += os.stat(fn).st_size
133 except OSError, inst:
133 except OSError, inst:
134 if inst.errno != errno.ENOENT:
134 if inst.errno != errno.ENOENT:
135 raise
135 raise
136 return s
136 return s
137
137
138 oldsize = float(getsize(r1))
138 oldsize = float(getsize(r1))
139 newsize = float(getsize(r2))
139 newsize = float(getsize(r2))
140
140
141 # argh: have to pass an int to %d, because a float >= 2^32
141 # argh: have to pass an int to %d, because a float >= 2^32
142 # blows up under Python 2.5 or earlier
142 # blows up under Python 2.5 or earlier
143 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
143 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
144 % (int(oldsize), oldsize / 1024 / 1024))
144 % (int(oldsize), oldsize / 1024 / 1024))
145 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
145 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
146 % (int(newsize), newsize / 1024 / 1024))
146 % (int(newsize), newsize / 1024 / 1024))
147
147
148 shrink_percent = (oldsize - newsize) / oldsize * 100
148 shrink_percent = (oldsize - newsize) / oldsize * 100
149 shrink_factor = oldsize / newsize
149 shrink_factor = oldsize / newsize
150 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
150 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
151 % (shrink_percent, shrink_factor))
151 % (shrink_percent, shrink_factor))
152
152
153 def shrink(ui, repo, **opts):
153 def shrink(ui, repo, **opts):
154 """shrink a revlog by reordering revisions
154 """shrink a revlog by reordering revisions
155
155
156 Rewrites all the entries in some revlog of the current repository
156 Rewrites all the entries in some revlog of the current repository
157 (by default, the manifest log) to save space.
157 (by default, the manifest log) to save space.
158
158
159 Different sort algorithms have different performance
159 Different sort algorithms have different performance
160 characteristics. Use ``--sort`` to select a sort algorithm so you
160 characteristics. Use ``--sort`` to select a sort algorithm so you
161 can determine which works best for your data.
161 can determine which works best for your data.
162 """
162 """
163
163
164 if not repo.local():
164 if not repo.local():
165 raise util.Abort(_('not a local repository: %s') % repo.root)
165 raise util.Abort(_('not a local repository: %s') % repo.root)
166
166
167 fn = opts.get('revlog')
167 fn = opts.get('revlog')
168 if not fn:
168 if not fn:
169 indexfn = repo.sjoin('00manifest.i')
169 indexfn = repo.sjoin('00manifest.i')
170 else:
170 else:
171 if not fn.endswith('.i'):
171 if not fn.endswith('.i'):
172 raise util.Abort(_('--revlog option must specify the revlog index '
172 raise util.Abort(_('--revlog option must specify the revlog index '
173 'file (*.i), not %s') % opts.get('revlog'))
173 'file (*.i), not %s') % opts.get('revlog'))
174
174
175 indexfn = os.path.realpath(fn)
175 indexfn = os.path.realpath(fn)
176 store = repo.sjoin('')
176 store = repo.sjoin('')
177 if not indexfn.startswith(store):
177 if not indexfn.startswith(store):
178 raise util.Abort(_('--revlog option must specify a revlog in %s, '
178 raise util.Abort(_('--revlog option must specify a revlog in %s, '
179 'not %s') % (store, indexfn))
179 'not %s') % (store, indexfn))
180
180
181 sortname = opts['sort']
181 sortname = opts['sort']
182 try:
182 try:
183 toposort = globals()['toposort_' + sortname]
183 toposort = globals()['toposort_' + sortname]
184 except KeyError:
184 except KeyError:
185 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
185 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
186
186
187 if not os.path.exists(indexfn):
187 if not os.path.exists(indexfn):
188 raise util.Abort(_('no such file: %s') % indexfn)
188 raise util.Abort(_('no such file: %s') % indexfn)
189 if '00changelog' in indexfn:
189 if '00changelog' in indexfn:
190 raise util.Abort(_('shrinking the changelog '
190 raise util.Abort(_('shrinking the changelog '
191 'will corrupt your repository'))
191 'will corrupt your repository'))
192
192
193 ui.write(_('shrinking %s\n') % indexfn)
193 ui.write(_('shrinking %s\n') % indexfn)
194 prefix = os.path.basename(indexfn)[:-1]
195 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
194 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
196
195
197 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
196 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
198 r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
197 r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
199
198
200 datafn, tmpdatafn = r1.datafile, r2.datafile
199 datafn, tmpdatafn = r1.datafile, r2.datafile
201
200
202 oldindexfn = indexfn + '.old'
201 oldindexfn = indexfn + '.old'
203 olddatafn = datafn + '.old'
202 olddatafn = datafn + '.old'
204 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
203 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
205 raise util.Abort(_('one or both of\n'
204 raise util.Abort(_('one or both of\n'
206 ' %s\n'
205 ' %s\n'
207 ' %s\n'
206 ' %s\n'
208 'exists from a previous run; please clean up '
207 'exists from a previous run; please clean up '
209 'before running again') % (oldindexfn, olddatafn))
208 'before running again') % (oldindexfn, olddatafn))
210
209
211 # Don't use repo.transaction(), because then things get hairy with
210 # Don't use repo.transaction(), because then things get hairy with
212 # paths: some need to be relative to .hg, and some need to be
211 # paths: some need to be relative to .hg, and some need to be
213 # absolute. Doing it this way keeps things simple: everything is an
212 # absolute. Doing it this way keeps things simple: everything is an
214 # absolute path.
213 # absolute path.
215 lock = repo.lock(wait=False)
214 lock = repo.lock(wait=False)
216 tr = transaction.transaction(ui.warn,
215 tr = transaction.transaction(ui.warn,
217 open,
216 open,
218 repo.sjoin('journal'))
217 repo.sjoin('journal'))
219
218
220 def ignoremissing(func):
219 def ignoremissing(func):
221 def f(*args, **kw):
220 def f(*args, **kw):
222 try:
221 try:
223 return func(*args, **kw)
222 return func(*args, **kw)
224 except OSError, inst:
223 except OSError, inst:
225 if inst.errno != errno.ENOENT:
224 if inst.errno != errno.ENOENT:
226 raise
225 raise
227 return f
226 return f
228
227
229 try:
228 try:
230 try:
229 try:
231 order = toposort(ui, r1)
230 order = toposort(ui, r1)
232
231
233 suboptimal = 0
232 suboptimal = 0
234 for i in xrange(1, len(order)):
233 for i in xrange(1, len(order)):
235 parents = [p for p in r1.parentrevs(order[i])
234 parents = [p for p in r1.parentrevs(order[i])
236 if p != node.nullrev]
235 if p != node.nullrev]
237 if parents and order[i - 1] not in parents:
236 if parents and order[i - 1] not in parents:
238 suboptimal += 1
237 suboptimal += 1
239 ui.note(_('%d suboptimal nodes\n') % suboptimal)
238 ui.note(_('%d suboptimal nodes\n') % suboptimal)
240
239
241 writerevs(ui, r1, r2, order, tr)
240 writerevs(ui, r1, r2, order, tr)
242 report(ui, r1, r2)
241 report(ui, r1, r2)
243 tr.close()
242 tr.close()
244 except:
243 except:
245 # Abort transaction first, so we truncate the files before
244 # Abort transaction first, so we truncate the files before
246 # deleting them.
245 # deleting them.
247 tr.abort()
246 tr.abort()
248 for fn in (tmpindexfn, tmpdatafn):
247 for fn in (tmpindexfn, tmpdatafn):
249 ignoremissing(os.unlink)(fn)
248 ignoremissing(os.unlink)(fn)
250 raise
249 raise
251 if not opts.get('dry_run'):
250 if not opts.get('dry_run'):
252 # racy, both files cannot be renamed atomically
251 # racy, both files cannot be renamed atomically
253 # copy files
252 # copy files
254 util.oslink(indexfn, oldindexfn)
253 util.oslink(indexfn, oldindexfn)
255 ignoremissing(util.oslink)(datafn, olddatafn)
254 ignoremissing(util.oslink)(datafn, olddatafn)
256
255
257 # rename
256 # rename
258 util.rename(tmpindexfn, indexfn)
257 util.rename(tmpindexfn, indexfn)
259 try:
258 try:
260 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
259 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
261 util.rename(tmpdatafn, datafn)
260 util.rename(tmpdatafn, datafn)
262 except OSError, inst:
261 except OSError, inst:
263 if inst.errno != errno.ENOENT:
262 if inst.errno != errno.ENOENT:
264 raise
263 raise
265 ignoremissing(os.unlink)(datafn)
264 ignoremissing(os.unlink)(datafn)
266 else:
265 else:
267 for fn in (tmpindexfn, tmpdatafn):
266 for fn in (tmpindexfn, tmpdatafn):
268 ignoremissing(os.unlink)(fn)
267 ignoremissing(os.unlink)(fn)
269 finally:
268 finally:
270 lock.release()
269 lock.release()
271
270
272 if not opts.get('dry_run'):
271 if not opts.get('dry_run'):
273 ui.write(_('note: old revlog saved in:\n'
272 ui.write(_('note: old revlog saved in:\n'
274 ' %s\n'
273 ' %s\n'
275 ' %s\n'
274 ' %s\n'
276 '(You can delete those files when you are satisfied that your\n'
275 '(You can delete those files when you are satisfied that your\n'
277 'repository is still sane. '
276 'repository is still sane. '
278 'Running \'hg verify\' is strongly recommended.)\n')
277 'Running \'hg verify\' is strongly recommended.)\n')
279 % (oldindexfn, olddatafn))
278 % (oldindexfn, olddatafn))
280
279
281 cmdtable = {
280 cmdtable = {
282 'shrink': (shrink,
281 'shrink': (shrink,
283 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
282 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
284 ('n', 'dry-run', None, _('do not shrink, simulate only')),
283 ('n', 'dry-run', None, _('do not shrink, simulate only')),
285 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
284 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
286 ],
285 ],
287 _('hg shrink [--revlog PATH]'))
286 _('hg shrink [--revlog PATH]'))
288 }
287 }
289
288
290 if __name__ == "__main__":
289 if __name__ == "__main__":
291 print "shrink-revlog.py is now an extension (see hg help extensions)"
290 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now