##// END OF EJS Templates
shrink-repo: wrong variable name
Pradeepkumar Gayam -
r11298:3e46d76e default
parent child Browse files
Show More
@@ -1,289 +1,289 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import os, tempfile, errno
22 import os, tempfile, errno
23 from mercurial import revlog, transaction, node, util
23 from mercurial import revlog, transaction, node, util
24 from mercurial import changegroup
24 from mercurial import changegroup
25 from mercurial.i18n import _
25 from mercurial.i18n import _
26
26
27
27
28 def postorder(start, edges):
28 def postorder(start, edges):
29 result = []
29 result = []
30 visit = list(start)
30 visit = list(start)
31 finished = set()
31 finished = set()
32
32
33 while visit:
33 while visit:
34 cur = visit[-1]
34 cur = visit[-1]
35 for p in edges[cur]:
35 for p in edges[cur]:
36 if p not in finished:
36 if p not in finished:
37 visit.append(p)
37 visit.append(p)
38 break
38 break
39 else:
39 else:
40 result.append(cur)
40 result.append(cur)
41 finished.add(cur)
41 finished.add(cur)
42 visit.pop()
42 visit.pop()
43
43
44 return result
44 return result
45
45
46 def toposort_reversepostorder(ui, rl):
46 def toposort_reversepostorder(ui, rl):
47 # postorder of the reverse directed graph
47 # postorder of the reverse directed graph
48
48
49 # map rev to list of parent revs (p2 first)
49 # map rev to list of parent revs (p2 first)
50 parents = {}
50 parents = {}
51 heads = set()
51 heads = set()
52 ui.status(_('reading revs\n'))
52 ui.status(_('reading revs\n'))
53 try:
53 try:
54 for rev in rl:
54 for rev in rl:
55 ui.progress(_('reading'), rev, total=len(rl))
55 ui.progress(_('reading'), rev, total=len(rl))
56 (p1, p2) = rl.parentrevs(rev)
56 (p1, p2) = rl.parentrevs(rev)
57 if p1 == p2 == node.nullrev:
57 if p1 == p2 == node.nullrev:
58 parents[rev] = () # root node
58 parents[rev] = () # root node
59 elif p1 == p2 or p2 == node.nullrev:
59 elif p1 == p2 or p2 == node.nullrev:
60 parents[rev] = (p1,) # normal node
60 parents[rev] = (p1,) # normal node
61 else:
61 else:
62 parents[rev] = (p2, p1) # merge node
62 parents[rev] = (p2, p1) # merge node
63 heads.add(rev)
63 heads.add(rev)
64 for p in parents[rev]:
64 for p in parents[rev]:
65 heads.discard(p)
65 heads.discard(p)
66 finally:
66 finally:
67 ui.progress(_('reading'), None)
67 ui.progress(_('reading'), None)
68
68
69 heads = list(heads)
69 heads = list(heads)
70 heads.sort(reverse=True)
70 heads.sort(reverse=True)
71
71
72 ui.status(_('sorting revs\n'))
72 ui.status(_('sorting revs\n'))
73 return postorder(heads, parents)
73 return postorder(heads, parents)
74
74
75 def toposort_postorderreverse(ui, rl):
75 def toposort_postorderreverse(ui, rl):
76 # reverse-postorder of the reverse directed graph
76 # reverse-postorder of the reverse directed graph
77
77
78 children = {}
78 children = {}
79 roots = set()
79 roots = set()
80 ui.status(_('reading revs\n'))
80 ui.status(_('reading revs\n'))
81 try:
81 try:
82 for rev in rl:
82 for rev in rl:
83 ui.progress(_('reading'), rev, total=len(rl))
83 ui.progress(_('reading'), rev, total=len(rl))
84 (p1, p2) = rl.parentrevs(rev)
84 (p1, p2) = rl.parentrevs(rev)
85 if p1 == p2 == node.nullrev:
85 if p1 == p2 == node.nullrev:
86 roots.add(rev)
86 roots.add(rev)
87 children[rev] = []
87 children[rev] = []
88 if p1 != node.nullrev:
88 if p1 != node.nullrev:
89 children[p1].append(rev)
89 children[p1].append(rev)
90 if p2 != node.nullrev:
90 if p2 != node.nullrev:
91 children[p2].append(rev)
91 children[p2].append(rev)
92 finally:
92 finally:
93 ui.progress(_('reading'), None)
93 ui.progress(_('reading'), None)
94
94
95 root = list(roots)
95 roots = list(roots)
96 roots.sort()
96 roots.sort()
97
97
98 ui.status(_('sorting revs\n'))
98 ui.status(_('sorting revs\n'))
99 result = postorder(roots, children)
99 result = postorder(roots, children)
100 result.reverse()
100 result.reverse()
101 return result
101 return result
102
102
103 def writerevs(ui, r1, r2, order, tr):
103 def writerevs(ui, r1, r2, order, tr):
104
104
105 ui.status(_('writing revs\n'))
105 ui.status(_('writing revs\n'))
106
106
107 count = [0]
107 count = [0]
108 def progress(*args):
108 def progress(*args):
109 ui.progress(_('writing'), count[0], total=len(order))
109 ui.progress(_('writing'), count[0], total=len(order))
110 count[0] += 1
110 count[0] += 1
111
111
112 order = [r1.node(r) for r in order]
112 order = [r1.node(r) for r in order]
113
113
114 # this is a bit ugly, but it works
114 # this is a bit ugly, but it works
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 unlookup = lambda x: int(x, 10)
116 unlookup = lambda x: int(x, 10)
117
117
118 try:
118 try:
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 chunkiter = changegroup.chunkiter(group)
120 chunkiter = changegroup.chunkiter(group)
121 r2.addgroup(chunkiter, unlookup, tr)
121 r2.addgroup(chunkiter, unlookup, tr)
122 finally:
122 finally:
123 ui.progress(_('writing'), None)
123 ui.progress(_('writing'), None)
124
124
125 def report(ui, r1, r2):
125 def report(ui, r1, r2):
126 def getsize(r):
126 def getsize(r):
127 s = 0
127 s = 0
128 for fn in (r.indexfile, r.datafile):
128 for fn in (r.indexfile, r.datafile):
129 try:
129 try:
130 s += os.stat(fn).st_size
130 s += os.stat(fn).st_size
131 except OSError, inst:
131 except OSError, inst:
132 if inst.errno != errno.ENOENT:
132 if inst.errno != errno.ENOENT:
133 raise
133 raise
134 return s
134 return s
135
135
136 oldsize = float(getsize(r1))
136 oldsize = float(getsize(r1))
137 newsize = float(getsize(r2))
137 newsize = float(getsize(r2))
138
138
139 # argh: have to pass an int to %d, because a float >= 2^32
139 # argh: have to pass an int to %d, because a float >= 2^32
140 # blows up under Python 2.5 or earlier
140 # blows up under Python 2.5 or earlier
141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
142 % (int(oldsize), oldsize / 1024 / 1024))
142 % (int(oldsize), oldsize / 1024 / 1024))
143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
144 % (int(newsize), newsize / 1024 / 1024))
144 % (int(newsize), newsize / 1024 / 1024))
145
145
146 shrink_percent = (oldsize - newsize) / oldsize * 100
146 shrink_percent = (oldsize - newsize) / oldsize * 100
147 shrink_factor = oldsize / newsize
147 shrink_factor = oldsize / newsize
148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
149 % (shrink_percent, shrink_factor))
149 % (shrink_percent, shrink_factor))
150
150
151 def shrink(ui, repo, **opts):
151 def shrink(ui, repo, **opts):
152 """shrink a revlog by reordering revisions
152 """shrink a revlog by reordering revisions
153
153
154 Rewrites all the entries in some revlog of the current repository
154 Rewrites all the entries in some revlog of the current repository
155 (by default, the manifest log) to save space.
155 (by default, the manifest log) to save space.
156
156
157 Different sort algorithms have different performance
157 Different sort algorithms have different performance
158 characteristics. Use ``--sort`` to select a sort algorithm so you
158 characteristics. Use ``--sort`` to select a sort algorithm so you
159 can determine which works best for your data.
159 can determine which works best for your data.
160 """
160 """
161
161
162 if not repo.local():
162 if not repo.local():
163 raise util.Abort(_('not a local repository: %s') % repo.root)
163 raise util.Abort(_('not a local repository: %s') % repo.root)
164
164
165 fn = opts.get('revlog')
165 fn = opts.get('revlog')
166 if not fn:
166 if not fn:
167 indexfn = repo.sjoin('00manifest.i')
167 indexfn = repo.sjoin('00manifest.i')
168 else:
168 else:
169 if not fn.endswith('.i'):
169 if not fn.endswith('.i'):
170 raise util.Abort(_('--revlog option must specify the revlog index '
170 raise util.Abort(_('--revlog option must specify the revlog index '
171 'file (*.i), not %s') % opts.get('revlog'))
171 'file (*.i), not %s') % opts.get('revlog'))
172
172
173 indexfn = os.path.realpath(fn)
173 indexfn = os.path.realpath(fn)
174 store = repo.sjoin('')
174 store = repo.sjoin('')
175 if not indexfn.startswith(store):
175 if not indexfn.startswith(store):
176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
177 'not %s') % (store, indexfn))
177 'not %s') % (store, indexfn))
178
178
179 sortname = opts['sort']
179 sortname = opts['sort']
180 try:
180 try:
181 toposort = globals()['toposort_' + sortname]
181 toposort = globals()['toposort_' + sortname]
182 except KeyError:
182 except KeyError:
183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
184
184
185 if not os.path.exists(indexfn):
185 if not os.path.exists(indexfn):
186 raise util.Abort(_('no such file: %s') % indexfn)
186 raise util.Abort(_('no such file: %s') % indexfn)
187 if '00changelog' in indexfn:
187 if '00changelog' in indexfn:
188 raise util.Abort(_('shrinking the changelog '
188 raise util.Abort(_('shrinking the changelog '
189 'will corrupt your repository'))
189 'will corrupt your repository'))
190
190
191 ui.write(_('shrinking %s\n') % indexfn)
191 ui.write(_('shrinking %s\n') % indexfn)
192 prefix = os.path.basename(indexfn)[:-1]
192 prefix = os.path.basename(indexfn)[:-1]
193 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
193 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
194
194
195 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
196 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
197
197
198 datafn, tmpdatafn = r1.datafile, r2.datafile
198 datafn, tmpdatafn = r1.datafile, r2.datafile
199
199
200 oldindexfn = indexfn + '.old'
200 oldindexfn = indexfn + '.old'
201 olddatafn = datafn + '.old'
201 olddatafn = datafn + '.old'
202 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
203 raise util.Abort(_('one or both of\n'
203 raise util.Abort(_('one or both of\n'
204 ' %s\n'
204 ' %s\n'
205 ' %s\n'
205 ' %s\n'
206 'exists from a previous run; please clean up '
206 'exists from a previous run; please clean up '
207 'before running again') % (oldindexfn, olddatafn))
207 'before running again') % (oldindexfn, olddatafn))
208
208
209 # Don't use repo.transaction(), because then things get hairy with
209 # Don't use repo.transaction(), because then things get hairy with
210 # paths: some need to be relative to .hg, and some need to be
210 # paths: some need to be relative to .hg, and some need to be
211 # absolute. Doing it this way keeps things simple: everything is an
211 # absolute. Doing it this way keeps things simple: everything is an
212 # absolute path.
212 # absolute path.
213 lock = repo.lock(wait=False)
213 lock = repo.lock(wait=False)
214 tr = transaction.transaction(ui.warn,
214 tr = transaction.transaction(ui.warn,
215 open,
215 open,
216 repo.sjoin('journal'))
216 repo.sjoin('journal'))
217
217
218 def ignoremissing(func):
218 def ignoremissing(func):
219 def f(*args, **kw):
219 def f(*args, **kw):
220 try:
220 try:
221 return func(*args, **kw)
221 return func(*args, **kw)
222 except OSError, inst:
222 except OSError, inst:
223 if inst.errno != errno.ENOENT:
223 if inst.errno != errno.ENOENT:
224 raise
224 raise
225 return f
225 return f
226
226
227 try:
227 try:
228 try:
228 try:
229 order = toposort(ui, r1)
229 order = toposort(ui, r1)
230
230
231 suboptimal = 0
231 suboptimal = 0
232 for i in xrange(1, len(order)):
232 for i in xrange(1, len(order)):
233 parents = [p for p in r1.parentrevs(order[i])
233 parents = [p for p in r1.parentrevs(order[i])
234 if p != node.nullrev]
234 if p != node.nullrev]
235 if parents and order[i - 1] not in parents:
235 if parents and order[i - 1] not in parents:
236 suboptimal += 1
236 suboptimal += 1
237 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237 ui.note(_('%d suboptimal nodes\n') % suboptimal)
238
238
239 writerevs(ui, r1, r2, order, tr)
239 writerevs(ui, r1, r2, order, tr)
240 report(ui, r1, r2)
240 report(ui, r1, r2)
241 tr.close()
241 tr.close()
242 except:
242 except:
243 # Abort transaction first, so we truncate the files before
243 # Abort transaction first, so we truncate the files before
244 # deleting them.
244 # deleting them.
245 tr.abort()
245 tr.abort()
246 for fn in (tmpindexfn, tmpdatafn):
246 for fn in (tmpindexfn, tmpdatafn):
247 ignoremissing(os.unlink)(fn)
247 ignoremissing(os.unlink)(fn)
248 raise
248 raise
249 if not opts.get('dry_run'):
249 if not opts.get('dry_run'):
250 # racy, both files cannot be renamed atomically
250 # racy, both files cannot be renamed atomically
251 # copy files
251 # copy files
252 util.os_link(indexfn, oldindexfn)
252 util.os_link(indexfn, oldindexfn)
253 ignoremissing(util.os_link)(datafn, olddatafn)
253 ignoremissing(util.os_link)(datafn, olddatafn)
254
254
255 # rename
255 # rename
256 util.rename(tmpindexfn, indexfn)
256 util.rename(tmpindexfn, indexfn)
257 try:
257 try:
258 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
259 util.rename(tmpdatafn, datafn)
259 util.rename(tmpdatafn, datafn)
260 except OSError, inst:
260 except OSError, inst:
261 if inst.errno != errno.ENOENT:
261 if inst.errno != errno.ENOENT:
262 raise
262 raise
263 ignoremissing(os.unlink)(datafn)
263 ignoremissing(os.unlink)(datafn)
264 else:
264 else:
265 for fn in (tmpindexfn, tmpdatafn):
265 for fn in (tmpindexfn, tmpdatafn):
266 ignoremissing(os.unlink)(fn)
266 ignoremissing(os.unlink)(fn)
267 finally:
267 finally:
268 lock.release()
268 lock.release()
269
269
270 if not opts.get('dry_run'):
270 if not opts.get('dry_run'):
271 ui.write(_('note: old revlog saved in:\n'
271 ui.write(_('note: old revlog saved in:\n'
272 ' %s\n'
272 ' %s\n'
273 ' %s\n'
273 ' %s\n'
274 '(You can delete those files when you are satisfied that your\n'
274 '(You can delete those files when you are satisfied that your\n'
275 'repository is still sane. '
275 'repository is still sane. '
276 'Running \'hg verify\' is strongly recommended.)\n')
276 'Running \'hg verify\' is strongly recommended.)\n')
277 % (oldindexfn, olddatafn))
277 % (oldindexfn, olddatafn))
278
278
279 cmdtable = {
279 cmdtable = {
280 'shrink': (shrink,
280 'shrink': (shrink,
281 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
282 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 ('n', 'dry-run', None, _('do not shrink, simulate only')),
283 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
284 ],
284 ],
285 _('hg shrink [--revlog PATH]'))
285 _('hg shrink [--revlog PATH]'))
286 }
286 }
287
287
288 if __name__ == "__main__":
288 if __name__ == "__main__":
289 print "shrink-revlog.py is now an extension (see hg help extensions)"
289 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now