##// END OF EJS Templates
shrink-revlog: remove \ from docstring
Augie Fackler -
r14028:7e453770 default
parent child Browse files
Show More
@@ -1,288 +1,287
1 """\
1 """reorder a revlog (the manifest by default) to save space
2 reorder a revlog (the manifest by default) to save space
3
2
4 Specifically, this topologically sorts the revisions in the revlog so that
3 Specifically, this topologically sorts the revisions in the revlog so that
5 revisions on the same branch are adjacent as much as possible. This is a
4 revisions on the same branch are adjacent as much as possible. This is a
6 workaround for the fact that Mercurial computes deltas relative to the
5 workaround for the fact that Mercurial computes deltas relative to the
7 previous revision rather than relative to a parent revision.
6 previous revision rather than relative to a parent revision.
8
7
9 This is *not* safe to run on a changelog.
8 This is *not* safe to run on a changelog.
10 """
9 """
11
10
12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 # renamed by Greg Ward <greg at gerg.ca>.
13 # renamed by Greg Ward <greg at gerg.ca>.
15
14
16 # XXX would be nice to have a way to verify the repository after shrinking,
15 # XXX would be nice to have a way to verify the repository after shrinking,
17 # e.g. by comparing "before" and "after" states of random changesets
16 # e.g. by comparing "before" and "after" states of random changesets
18 # (maybe: export before, shrink, export after, diff).
17 # (maybe: export before, shrink, export after, diff).
19
18
20 import os, tempfile, errno
19 import os, tempfile, errno
21 from mercurial import revlog, transaction, node, util
20 from mercurial import revlog, transaction, node, util
22 from mercurial import changegroup
21 from mercurial import changegroup
23 from mercurial.i18n import _
22 from mercurial.i18n import _
24
23
25
24
26 def postorder(start, edges):
25 def postorder(start, edges):
27 result = []
26 result = []
28 visit = list(start)
27 visit = list(start)
29 finished = set()
28 finished = set()
30
29
31 while visit:
30 while visit:
32 cur = visit[-1]
31 cur = visit[-1]
33 for p in edges[cur]:
32 for p in edges[cur]:
34 if p not in finished:
33 if p not in finished:
35 visit.append(p)
34 visit.append(p)
36 break
35 break
37 else:
36 else:
38 result.append(cur)
37 result.append(cur)
39 finished.add(cur)
38 finished.add(cur)
40 visit.pop()
39 visit.pop()
41
40
42 return result
41 return result
43
42
44 def toposort_reversepostorder(ui, rl):
43 def toposort_reversepostorder(ui, rl):
45 # postorder of the reverse directed graph
44 # postorder of the reverse directed graph
46
45
47 # map rev to list of parent revs (p2 first)
46 # map rev to list of parent revs (p2 first)
48 parents = {}
47 parents = {}
49 heads = set()
48 heads = set()
50 ui.status(_('reading revs\n'))
49 ui.status(_('reading revs\n'))
51 try:
50 try:
52 for rev in rl:
51 for rev in rl:
53 ui.progress(_('reading'), rev, total=len(rl))
52 ui.progress(_('reading'), rev, total=len(rl))
54 (p1, p2) = rl.parentrevs(rev)
53 (p1, p2) = rl.parentrevs(rev)
55 if p1 == p2 == node.nullrev:
54 if p1 == p2 == node.nullrev:
56 parents[rev] = () # root node
55 parents[rev] = () # root node
57 elif p1 == p2 or p2 == node.nullrev:
56 elif p1 == p2 or p2 == node.nullrev:
58 parents[rev] = (p1,) # normal node
57 parents[rev] = (p1,) # normal node
59 else:
58 else:
60 parents[rev] = (p2, p1) # merge node
59 parents[rev] = (p2, p1) # merge node
61 heads.add(rev)
60 heads.add(rev)
62 for p in parents[rev]:
61 for p in parents[rev]:
63 heads.discard(p)
62 heads.discard(p)
64 finally:
63 finally:
65 ui.progress(_('reading'), None)
64 ui.progress(_('reading'), None)
66
65
67 heads = list(heads)
66 heads = list(heads)
68 heads.sort(reverse=True)
67 heads.sort(reverse=True)
69
68
70 ui.status(_('sorting revs\n'))
69 ui.status(_('sorting revs\n'))
71 return postorder(heads, parents)
70 return postorder(heads, parents)
72
71
73 def toposort_postorderreverse(ui, rl):
72 def toposort_postorderreverse(ui, rl):
74 # reverse-postorder of the reverse directed graph
73 # reverse-postorder of the reverse directed graph
75
74
76 children = {}
75 children = {}
77 roots = set()
76 roots = set()
78 ui.status(_('reading revs\n'))
77 ui.status(_('reading revs\n'))
79 try:
78 try:
80 for rev in rl:
79 for rev in rl:
81 ui.progress(_('reading'), rev, total=len(rl))
80 ui.progress(_('reading'), rev, total=len(rl))
82 (p1, p2) = rl.parentrevs(rev)
81 (p1, p2) = rl.parentrevs(rev)
83 if p1 == p2 == node.nullrev:
82 if p1 == p2 == node.nullrev:
84 roots.add(rev)
83 roots.add(rev)
85 children[rev] = []
84 children[rev] = []
86 if p1 != node.nullrev:
85 if p1 != node.nullrev:
87 children[p1].append(rev)
86 children[p1].append(rev)
88 if p2 != node.nullrev:
87 if p2 != node.nullrev:
89 children[p2].append(rev)
88 children[p2].append(rev)
90 finally:
89 finally:
91 ui.progress(_('reading'), None)
90 ui.progress(_('reading'), None)
92
91
93 roots = list(roots)
92 roots = list(roots)
94 roots.sort()
93 roots.sort()
95
94
96 ui.status(_('sorting revs\n'))
95 ui.status(_('sorting revs\n'))
97 result = postorder(roots, children)
96 result = postorder(roots, children)
98 result.reverse()
97 result.reverse()
99 return result
98 return result
100
99
101 def writerevs(ui, r1, r2, order, tr):
100 def writerevs(ui, r1, r2, order, tr):
102
101
103 ui.status(_('writing revs\n'))
102 ui.status(_('writing revs\n'))
104
103
105
104
106 order = [r1.node(r) for r in order]
105 order = [r1.node(r) for r in order]
107
106
108 # this is a bit ugly, but it works
107 # this is a bit ugly, but it works
109 count = [0]
108 count = [0]
110 def lookup(x):
109 def lookup(x):
111 count[0] += 1
110 count[0] += 1
112 ui.progress(_('writing'), count[0], total=len(order))
111 ui.progress(_('writing'), count[0], total=len(order))
113 return "%020d" % r1.linkrev(r1.rev(x))
112 return "%020d" % r1.linkrev(r1.rev(x))
114
113
115 unlookup = lambda x: int(x, 10)
114 unlookup = lambda x: int(x, 10)
116
115
117 try:
116 try:
118 group = util.chunkbuffer(r1.group(order, lookup, progress))
117 group = util.chunkbuffer(r1.group(order, lookup, progress))
119 group = changegroup.unbundle10(group, "UN")
118 group = changegroup.unbundle10(group, "UN")
120 r2.addgroup(group, unlookup, tr)
119 r2.addgroup(group, unlookup, tr)
121 finally:
120 finally:
122 ui.progress(_('writing'), None)
121 ui.progress(_('writing'), None)
123
122
124 def report(ui, r1, r2):
123 def report(ui, r1, r2):
125 def getsize(r):
124 def getsize(r):
126 s = 0
125 s = 0
127 for fn in (r.indexfile, r.datafile):
126 for fn in (r.indexfile, r.datafile):
128 try:
127 try:
129 s += os.stat(fn).st_size
128 s += os.stat(fn).st_size
130 except OSError, inst:
129 except OSError, inst:
131 if inst.errno != errno.ENOENT:
130 if inst.errno != errno.ENOENT:
132 raise
131 raise
133 return s
132 return s
134
133
135 oldsize = float(getsize(r1))
134 oldsize = float(getsize(r1))
136 newsize = float(getsize(r2))
135 newsize = float(getsize(r2))
137
136
138 # argh: have to pass an int to %d, because a float >= 2^32
137 # argh: have to pass an int to %d, because a float >= 2^32
139 # blows up under Python 2.5 or earlier
138 # blows up under Python 2.5 or earlier
140 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
139 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 % (int(oldsize), oldsize / 1024 / 1024))
140 % (int(oldsize), oldsize / 1024 / 1024))
142 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
141 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 % (int(newsize), newsize / 1024 / 1024))
142 % (int(newsize), newsize / 1024 / 1024))
144
143
145 shrink_percent = (oldsize - newsize) / oldsize * 100
144 shrink_percent = (oldsize - newsize) / oldsize * 100
146 shrink_factor = oldsize / newsize
145 shrink_factor = oldsize / newsize
147 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
146 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 % (shrink_percent, shrink_factor))
147 % (shrink_percent, shrink_factor))
149
148
150 def shrink(ui, repo, **opts):
149 def shrink(ui, repo, **opts):
151 """shrink a revlog by reordering revisions
150 """shrink a revlog by reordering revisions
152
151
153 Rewrites all the entries in some revlog of the current repository
152 Rewrites all the entries in some revlog of the current repository
154 (by default, the manifest log) to save space.
153 (by default, the manifest log) to save space.
155
154
156 Different sort algorithms have different performance
155 Different sort algorithms have different performance
157 characteristics. Use ``--sort`` to select a sort algorithm so you
156 characteristics. Use ``--sort`` to select a sort algorithm so you
158 can determine which works best for your data.
157 can determine which works best for your data.
159 """
158 """
160
159
161 if not repo.local():
160 if not repo.local():
162 raise util.Abort(_('not a local repository: %s') % repo.root)
161 raise util.Abort(_('not a local repository: %s') % repo.root)
163
162
164 fn = opts.get('revlog')
163 fn = opts.get('revlog')
165 if not fn:
164 if not fn:
166 indexfn = repo.sjoin('00manifest.i')
165 indexfn = repo.sjoin('00manifest.i')
167 else:
166 else:
168 if not fn.endswith('.i'):
167 if not fn.endswith('.i'):
169 raise util.Abort(_('--revlog option must specify the revlog index '
168 raise util.Abort(_('--revlog option must specify the revlog index '
170 'file (*.i), not %s') % opts.get('revlog'))
169 'file (*.i), not %s') % opts.get('revlog'))
171
170
172 indexfn = os.path.realpath(fn)
171 indexfn = os.path.realpath(fn)
173 store = repo.sjoin('')
172 store = repo.sjoin('')
174 if not indexfn.startswith(store):
173 if not indexfn.startswith(store):
175 raise util.Abort(_('--revlog option must specify a revlog in %s, '
174 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 'not %s') % (store, indexfn))
175 'not %s') % (store, indexfn))
177
176
178 sortname = opts['sort']
177 sortname = opts['sort']
179 try:
178 try:
180 toposort = globals()['toposort_' + sortname]
179 toposort = globals()['toposort_' + sortname]
181 except KeyError:
180 except KeyError:
182 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
181 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183
182
184 if not os.path.exists(indexfn):
183 if not os.path.exists(indexfn):
185 raise util.Abort(_('no such file: %s') % indexfn)
184 raise util.Abort(_('no such file: %s') % indexfn)
186 if '00changelog' in indexfn:
185 if '00changelog' in indexfn:
187 raise util.Abort(_('shrinking the changelog '
186 raise util.Abort(_('shrinking the changelog '
188 'will corrupt your repository'))
187 'will corrupt your repository'))
189
188
190 ui.write(_('shrinking %s\n') % indexfn)
189 ui.write(_('shrinking %s\n') % indexfn)
191 prefix = os.path.basename(indexfn)[:-1]
190 prefix = os.path.basename(indexfn)[:-1]
192 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
191 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
193
192
194 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
193 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
194 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196
195
197 datafn, tmpdatafn = r1.datafile, r2.datafile
196 datafn, tmpdatafn = r1.datafile, r2.datafile
198
197
199 oldindexfn = indexfn + '.old'
198 oldindexfn = indexfn + '.old'
200 olddatafn = datafn + '.old'
199 olddatafn = datafn + '.old'
201 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
200 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 raise util.Abort(_('one or both of\n'
201 raise util.Abort(_('one or both of\n'
203 ' %s\n'
202 ' %s\n'
204 ' %s\n'
203 ' %s\n'
205 'exists from a previous run; please clean up '
204 'exists from a previous run; please clean up '
206 'before running again') % (oldindexfn, olddatafn))
205 'before running again') % (oldindexfn, olddatafn))
207
206
208 # Don't use repo.transaction(), because then things get hairy with
207 # Don't use repo.transaction(), because then things get hairy with
209 # paths: some need to be relative to .hg, and some need to be
208 # paths: some need to be relative to .hg, and some need to be
210 # absolute. Doing it this way keeps things simple: everything is an
209 # absolute. Doing it this way keeps things simple: everything is an
211 # absolute path.
210 # absolute path.
212 lock = repo.lock(wait=False)
211 lock = repo.lock(wait=False)
213 tr = transaction.transaction(ui.warn,
212 tr = transaction.transaction(ui.warn,
214 open,
213 open,
215 repo.sjoin('journal'))
214 repo.sjoin('journal'))
216
215
217 def ignoremissing(func):
216 def ignoremissing(func):
218 def f(*args, **kw):
217 def f(*args, **kw):
219 try:
218 try:
220 return func(*args, **kw)
219 return func(*args, **kw)
221 except OSError, inst:
220 except OSError, inst:
222 if inst.errno != errno.ENOENT:
221 if inst.errno != errno.ENOENT:
223 raise
222 raise
224 return f
223 return f
225
224
226 try:
225 try:
227 try:
226 try:
228 order = toposort(ui, r1)
227 order = toposort(ui, r1)
229
228
230 suboptimal = 0
229 suboptimal = 0
231 for i in xrange(1, len(order)):
230 for i in xrange(1, len(order)):
232 parents = [p for p in r1.parentrevs(order[i])
231 parents = [p for p in r1.parentrevs(order[i])
233 if p != node.nullrev]
232 if p != node.nullrev]
234 if parents and order[i - 1] not in parents:
233 if parents and order[i - 1] not in parents:
235 suboptimal += 1
234 suboptimal += 1
236 ui.note(_('%d suboptimal nodes\n') % suboptimal)
235 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237
236
238 writerevs(ui, r1, r2, order, tr)
237 writerevs(ui, r1, r2, order, tr)
239 report(ui, r1, r2)
238 report(ui, r1, r2)
240 tr.close()
239 tr.close()
241 except:
240 except:
242 # Abort transaction first, so we truncate the files before
241 # Abort transaction first, so we truncate the files before
243 # deleting them.
242 # deleting them.
244 tr.abort()
243 tr.abort()
245 for fn in (tmpindexfn, tmpdatafn):
244 for fn in (tmpindexfn, tmpdatafn):
246 ignoremissing(os.unlink)(fn)
245 ignoremissing(os.unlink)(fn)
247 raise
246 raise
248 if not opts.get('dry_run'):
247 if not opts.get('dry_run'):
249 # racy, both files cannot be renamed atomically
248 # racy, both files cannot be renamed atomically
250 # copy files
249 # copy files
251 util.os_link(indexfn, oldindexfn)
250 util.os_link(indexfn, oldindexfn)
252 ignoremissing(util.os_link)(datafn, olddatafn)
251 ignoremissing(util.os_link)(datafn, olddatafn)
253
252
254 # rename
253 # rename
255 util.rename(tmpindexfn, indexfn)
254 util.rename(tmpindexfn, indexfn)
256 try:
255 try:
257 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
256 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 util.rename(tmpdatafn, datafn)
257 util.rename(tmpdatafn, datafn)
259 except OSError, inst:
258 except OSError, inst:
260 if inst.errno != errno.ENOENT:
259 if inst.errno != errno.ENOENT:
261 raise
260 raise
262 ignoremissing(os.unlink)(datafn)
261 ignoremissing(os.unlink)(datafn)
263 else:
262 else:
264 for fn in (tmpindexfn, tmpdatafn):
263 for fn in (tmpindexfn, tmpdatafn):
265 ignoremissing(os.unlink)(fn)
264 ignoremissing(os.unlink)(fn)
266 finally:
265 finally:
267 lock.release()
266 lock.release()
268
267
269 if not opts.get('dry_run'):
268 if not opts.get('dry_run'):
270 ui.write(_('note: old revlog saved in:\n'
269 ui.write(_('note: old revlog saved in:\n'
271 ' %s\n'
270 ' %s\n'
272 ' %s\n'
271 ' %s\n'
273 '(You can delete those files when you are satisfied that your\n'
272 '(You can delete those files when you are satisfied that your\n'
274 'repository is still sane. '
273 'repository is still sane. '
275 'Running \'hg verify\' is strongly recommended.)\n')
274 'Running \'hg verify\' is strongly recommended.)\n')
276 % (oldindexfn, olddatafn))
275 % (oldindexfn, olddatafn))
277
276
278 cmdtable = {
277 cmdtable = {
279 'shrink': (shrink,
278 'shrink': (shrink,
280 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
279 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 ('n', 'dry-run', None, _('do not shrink, simulate only')),
280 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
281 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 ],
282 ],
284 _('hg shrink [--revlog PATH]'))
283 _('hg shrink [--revlog PATH]'))
285 }
284 }
286
285
287 if __name__ == "__main__":
286 if __name__ == "__main__":
288 print "shrink-revlog.py is now an extension (see hg help extensions)"
287 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now