##// END OF EJS Templates
shrink-revlog: add missing whitespace in expression
Martin Geisler -
r10655:f63fb5c0 default
parent child Browse files
Show More
@@ -1,290 +1,290 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import os, tempfile, errno
22 import os, tempfile, errno
23 from mercurial import revlog, transaction, node, util
23 from mercurial import revlog, transaction, node, util
24 from mercurial import changegroup
24 from mercurial import changegroup
25 from mercurial.i18n import _
25 from mercurial.i18n import _
26
26
27
27
28 def postorder(start, edges):
28 def postorder(start, edges):
29 result = []
29 result = []
30 visit = list(start)
30 visit = list(start)
31 finished = set()
31 finished = set()
32
32
33 while visit:
33 while visit:
34 cur = visit[-1]
34 cur = visit[-1]
35 for p in edges[cur]:
35 for p in edges[cur]:
36 if p not in finished:
36 if p not in finished:
37 visit.append(p)
37 visit.append(p)
38 break
38 break
39 else:
39 else:
40 result.append(cur)
40 result.append(cur)
41 finished.add(cur)
41 finished.add(cur)
42 visit.pop()
42 visit.pop()
43
43
44 return result
44 return result
45
45
46 def toposort_reversepostorder(ui, rl):
46 def toposort_reversepostorder(ui, rl):
47 # postorder of the reverse directed graph
47 # postorder of the reverse directed graph
48
48
49 # map rev to list of parent revs (p2 first)
49 # map rev to list of parent revs (p2 first)
50 parents = {}
50 parents = {}
51 heads = set()
51 heads = set()
52 ui.status(_('reading revs\n'))
52 ui.status(_('reading revs\n'))
53 try:
53 try:
54 for rev in rl:
54 for rev in rl:
55 ui.progress(_('reading'), rev, total=len(rl))
55 ui.progress(_('reading'), rev, total=len(rl))
56 (p1, p2) = rl.parentrevs(rev)
56 (p1, p2) = rl.parentrevs(rev)
57 if p1 == p2 == node.nullrev:
57 if p1 == p2 == node.nullrev:
58 parents[rev] = () # root node
58 parents[rev] = () # root node
59 elif p1 == p2 or p2 == node.nullrev:
59 elif p1 == p2 or p2 == node.nullrev:
60 parents[rev] = (p1,) # normal node
60 parents[rev] = (p1,) # normal node
61 else:
61 else:
62 parents[rev] = (p2, p1) # merge node
62 parents[rev] = (p2, p1) # merge node
63 heads.add(rev)
63 heads.add(rev)
64 for p in parents[rev]:
64 for p in parents[rev]:
65 heads.discard(p)
65 heads.discard(p)
66 finally:
66 finally:
67 ui.progress(_('reading'), None, total=len(rl))
67 ui.progress(_('reading'), None, total=len(rl))
68
68
69 heads = list(heads)
69 heads = list(heads)
70 heads.sort(reverse=True)
70 heads.sort(reverse=True)
71
71
72 ui.status(_('sorting revs\n'))
72 ui.status(_('sorting revs\n'))
73 return postorder(heads, parents)
73 return postorder(heads, parents)
74
74
75 def toposort_postorderreverse(ui, rl):
75 def toposort_postorderreverse(ui, rl):
76 # reverse-postorder of the reverse directed graph
76 # reverse-postorder of the reverse directed graph
77
77
78 children = {}
78 children = {}
79 roots = set()
79 roots = set()
80 ui.status(_('reading revs\n'))
80 ui.status(_('reading revs\n'))
81 try:
81 try:
82 for rev in rl:
82 for rev in rl:
83 ui.progress(_('reading'), rev, total=len(rl))
83 ui.progress(_('reading'), rev, total=len(rl))
84 (p1, p2) = rl.parentrevs(rev)
84 (p1, p2) = rl.parentrevs(rev)
85 if p1 == p2 == node.nullrev:
85 if p1 == p2 == node.nullrev:
86 roots.add(rev)
86 roots.add(rev)
87 children[rev] = []
87 children[rev] = []
88 if p1 != node.nullrev:
88 if p1 != node.nullrev:
89 children[p1].append(rev)
89 children[p1].append(rev)
90 if p2 != node.nullrev:
90 if p2 != node.nullrev:
91 children[p2].append(rev)
91 children[p2].append(rev)
92 finally:
92 finally:
93 ui.progress(_('reading'), None, total=len(rl))
93 ui.progress(_('reading'), None, total=len(rl))
94
94
95 root = list(roots)
95 root = list(roots)
96 roots.sort()
96 roots.sort()
97
97
98 ui.status(_('sorting revs\n'))
98 ui.status(_('sorting revs\n'))
99 result = postorder(roots, children)
99 result = postorder(roots, children)
100 result.reverse()
100 result.reverse()
101 return result
101 return result
102
102
103 def writerevs(ui, r1, r2, order, tr):
103 def writerevs(ui, r1, r2, order, tr):
104
104
105 ui.status(_('writing revs\n'))
105 ui.status(_('writing revs\n'))
106
106
107 count = [0]
107 count = [0]
108 def progress(*args):
108 def progress(*args):
109 ui.progress(_('writing'), count[0], total=len(order))
109 ui.progress(_('writing'), count[0], total=len(order))
110 count[0] += 1
110 count[0] += 1
111
111
112 order = [r1.node(r) for r in order]
112 order = [r1.node(r) for r in order]
113
113
114 # this is a bit ugly, but it works
114 # this is a bit ugly, but it works
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 unlookup = lambda x: int(x, 10)
116 unlookup = lambda x: int(x, 10)
117
117
118 try:
118 try:
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 chunkiter = changegroup.chunkiter(group)
120 chunkiter = changegroup.chunkiter(group)
121 r2.addgroup(chunkiter, unlookup, tr)
121 r2.addgroup(chunkiter, unlookup, tr)
122 finally:
122 finally:
123 ui.progress(_('writing'), None, len(order))
123 ui.progress(_('writing'), None, len(order))
124
124
125 def report(ui, r1, r2):
125 def report(ui, r1, r2):
126 def getsize(r):
126 def getsize(r):
127 s = 0
127 s = 0
128 for fn in (r.indexfile, r.datafile):
128 for fn in (r.indexfile, r.datafile):
129 try:
129 try:
130 s += os.stat(fn).st_size
130 s += os.stat(fn).st_size
131 except OSError, inst:
131 except OSError, inst:
132 if inst.errno != errno.ENOENT:
132 if inst.errno != errno.ENOENT:
133 raise
133 raise
134 return s
134 return s
135
135
136 oldsize = float(getsize(r1))
136 oldsize = float(getsize(r1))
137 newsize = float(getsize(r2))
137 newsize = float(getsize(r2))
138
138
139 # argh: have to pass an int to %d, because a float >= 2^32
139 # argh: have to pass an int to %d, because a float >= 2^32
140 # blows up under Python 2.5 or earlier
140 # blows up under Python 2.5 or earlier
141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
142 % (int(oldsize), oldsize / 1024 / 1024))
142 % (int(oldsize), oldsize / 1024 / 1024))
143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
144 % (int(newsize), newsize / 1024 / 1024))
144 % (int(newsize), newsize / 1024 / 1024))
145
145
146 shrink_percent = (oldsize - newsize) / oldsize * 100
146 shrink_percent = (oldsize - newsize) / oldsize * 100
147 shrink_factor = oldsize / newsize
147 shrink_factor = oldsize / newsize
148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
149 % (shrink_percent, shrink_factor))
149 % (shrink_percent, shrink_factor))
150
150
151 def shrink(ui, repo, **opts):
151 def shrink(ui, repo, **opts):
152 """shrink a revlog by reordering revisions
152 """shrink a revlog by reordering revisions
153
153
154 Rewrites all the entries in some revlog of the current repository
154 Rewrites all the entries in some revlog of the current repository
155 (by default, the manifest log) to save space.
155 (by default, the manifest log) to save space.
156
156
157 Different sort algorithms have different performance
157 Different sort algorithms have different performance
158 characteristics. Use ``--sort`` to select a sort algorithm so you
158 characteristics. Use ``--sort`` to select a sort algorithm so you
159 can determine which works best for your data.
159 can determine which works best for your data.
160 """
160 """
161
161
162 if not repo.local():
162 if not repo.local():
163 raise util.Abort(_('not a local repository: %s') % repo.root)
163 raise util.Abort(_('not a local repository: %s') % repo.root)
164
164
165 fn = opts.get('revlog')
165 fn = opts.get('revlog')
166 if not fn:
166 if not fn:
167 indexfn = repo.sjoin('00manifest.i')
167 indexfn = repo.sjoin('00manifest.i')
168 else:
168 else:
169 if not fn.endswith('.i'):
169 if not fn.endswith('.i'):
170 raise util.Abort(_('--revlog option must specify the revlog index '
170 raise util.Abort(_('--revlog option must specify the revlog index '
171 'file (*.i), not %s') % opts.get('revlog'))
171 'file (*.i), not %s') % opts.get('revlog'))
172
172
173 indexfn = os.path.realpath(fn)
173 indexfn = os.path.realpath(fn)
174 store = repo.sjoin('')
174 store = repo.sjoin('')
175 if not indexfn.startswith(store):
175 if not indexfn.startswith(store):
176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
177 'not %s') % (store, indexfn))
177 'not %s') % (store, indexfn))
178
178
179 sortname = opts['sort']
179 sortname = opts['sort']
180 try:
180 try:
181 toposort = globals()['toposort_' + sortname]
181 toposort = globals()['toposort_' + sortname]
182 except KeyError:
182 except KeyError:
183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
184
184
185 if not os.path.exists(indexfn):
185 if not os.path.exists(indexfn):
186 raise util.Abort(_('no such file: %s') % indexfn)
186 raise util.Abort(_('no such file: %s') % indexfn)
187 if '00changelog' in indexfn:
187 if '00changelog' in indexfn:
188 raise util.Abort(_('shrinking the changelog '
188 raise util.Abort(_('shrinking the changelog '
189 'will corrupt your repository'))
189 'will corrupt your repository'))
190
190
191 ui.write(_('shrinking %s\n') % indexfn)
191 ui.write(_('shrinking %s\n') % indexfn)
192 prefix = os.path.basename(indexfn)[:-1]
192 prefix = os.path.basename(indexfn)[:-1]
193 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
193 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
194 prefix=prefix,
194 prefix=prefix,
195 suffix='.i')
195 suffix='.i')
196 os.close(tmpfd)
196 os.close(tmpfd)
197
197
198 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
198 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
199 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
199 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
200
200
201 datafn, tmpdatafn = r1.datafile, r2.datafile
201 datafn, tmpdatafn = r1.datafile, r2.datafile
202
202
203 oldindexfn = indexfn + '.old'
203 oldindexfn = indexfn + '.old'
204 olddatafn = datafn + '.old'
204 olddatafn = datafn + '.old'
205 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
205 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
206 raise util.Abort(_('one or both of\n'
206 raise util.Abort(_('one or both of\n'
207 ' %s\n'
207 ' %s\n'
208 ' %s\n'
208 ' %s\n'
209 'exists from a previous run; please clean up '
209 'exists from a previous run; please clean up '
210 'before running again') % (oldindexfn, olddatafn))
210 'before running again') % (oldindexfn, olddatafn))
211
211
212 # Don't use repo.transaction(), because then things get hairy with
212 # Don't use repo.transaction(), because then things get hairy with
213 # paths: some need to be relative to .hg, and some need to be
213 # paths: some need to be relative to .hg, and some need to be
214 # absolute. Doing it this way keeps things simple: everything is an
214 # absolute. Doing it this way keeps things simple: everything is an
215 # absolute path.
215 # absolute path.
216 lock = repo.lock(wait=False)
216 lock = repo.lock(wait=False)
217 tr = transaction.transaction(ui.warn,
217 tr = transaction.transaction(ui.warn,
218 open,
218 open,
219 repo.sjoin('journal'))
219 repo.sjoin('journal'))
220
220
221 def ignoremissing(func):
221 def ignoremissing(func):
222 def f(*args, **kw):
222 def f(*args, **kw):
223 try:
223 try:
224 return func(*args, **kw)
224 return func(*args, **kw)
225 except OSError, inst:
225 except OSError, inst:
226 if inst.errno != errno.ENOENT:
226 if inst.errno != errno.ENOENT:
227 raise
227 raise
228 return f
228 return f
229
229
230 try:
230 try:
231 try:
231 try:
232 order = toposort(ui, r1)
232 order = toposort(ui, r1)
233
233
234 suboptimal = 0
234 suboptimal = 0
235 for i in xrange(1, len(order)):
235 for i in xrange(1, len(order)):
236 parents = [p for p in r1.parentrevs(order[i])
236 parents = [p for p in r1.parentrevs(order[i])
237 if p != node.nullrev]
237 if p != node.nullrev]
238 if parents and order[i-1] not in parents:
238 if parents and order[i - 1] not in parents:
239 suboptimal += 1
239 suboptimal += 1
240 ui.note(_('%d suboptimal nodes\n') % suboptimal)
240 ui.note(_('%d suboptimal nodes\n') % suboptimal)
241
241
242 writerevs(ui, r1, r2, order, tr)
242 writerevs(ui, r1, r2, order, tr)
243 report(ui, r1, r2)
243 report(ui, r1, r2)
244 tr.close()
244 tr.close()
245 except:
245 except:
246 # Abort transaction first, so we truncate the files before
246 # Abort transaction first, so we truncate the files before
247 # deleting them.
247 # deleting them.
248 tr.abort()
248 tr.abort()
249 for fn in (tmpindexfn, tmpdatafn):
249 for fn in (tmpindexfn, tmpdatafn):
250 ignoremissing(os.unlink)(fn)
250 ignoremissing(os.unlink)(fn)
251 raise
251 raise
252 if not opts.get('dry_run'):
252 if not opts.get('dry_run'):
253 # racy, both files cannot be renamed atomically
253 # racy, both files cannot be renamed atomically
254 # copy files
254 # copy files
255 util.os_link(indexfn, oldindexfn)
255 util.os_link(indexfn, oldindexfn)
256 ignoremissing(util.os_link)(datafn, olddatafn)
256 ignoremissing(util.os_link)(datafn, olddatafn)
257 # rename
257 # rename
258 util.rename(tmpindexfn, indexfn)
258 util.rename(tmpindexfn, indexfn)
259 try:
259 try:
260 util.rename(tmpdatafn, datafn)
260 util.rename(tmpdatafn, datafn)
261 except OSError, inst:
261 except OSError, inst:
262 if inst.errno != errno.ENOENT:
262 if inst.errno != errno.ENOENT:
263 raise
263 raise
264 ignoremissing(os.unlink)(datafn)
264 ignoremissing(os.unlink)(datafn)
265 else:
265 else:
266 for fn in (tmpindexfn, tmpdatafn):
266 for fn in (tmpindexfn, tmpdatafn):
267 ignoremissing(os.unlink)(fn)
267 ignoremissing(os.unlink)(fn)
268 finally:
268 finally:
269 lock.release()
269 lock.release()
270
270
271 if not opts.get('dry_run'):
271 if not opts.get('dry_run'):
272 ui.write(_('note: old revlog saved in:\n'
272 ui.write(_('note: old revlog saved in:\n'
273 ' %s\n'
273 ' %s\n'
274 ' %s\n'
274 ' %s\n'
275 '(You can delete those files when you are satisfied that your\n'
275 '(You can delete those files when you are satisfied that your\n'
276 'repository is still sane. '
276 'repository is still sane. '
277 'Running \'hg verify\' is strongly recommended.)\n')
277 'Running \'hg verify\' is strongly recommended.)\n')
278 % (oldindexfn, olddatafn))
278 % (oldindexfn, olddatafn))
279
279
280 cmdtable = {
280 cmdtable = {
281 'shrink': (shrink,
281 'shrink': (shrink,
282 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
282 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
283 ('n', 'dry-run', None, _('do not shrink, simulate only')),
283 ('n', 'dry-run', None, _('do not shrink, simulate only')),
284 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
284 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
285 ],
285 ],
286 _('hg shrink [--revlog PATH]'))
286 _('hg shrink [--revlog PATH]'))
287 }
287 }
288
288
289 if __name__ == "__main__":
289 if __name__ == "__main__":
290 print "shrink-revlog.py is now an extension (see hg help extensions)"
290 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now