##// END OF EJS Templates
shrink-revlog: use a bundler object (see d69c9510d648)
Augie Fackler -
r14030:e5dd974a default
parent child Browse files
Show More
@@ -1,287 +1,288 b''
1 """reorder a revlog (the manifest by default) to save space
1 """reorder a revlog (the manifest by default) to save space
2
2
3 Specifically, this topologically sorts the revisions in the revlog so that
3 Specifically, this topologically sorts the revisions in the revlog so that
4 revisions on the same branch are adjacent as much as possible. This is a
4 revisions on the same branch are adjacent as much as possible. This is a
5 workaround for the fact that Mercurial computes deltas relative to the
5 workaround for the fact that Mercurial computes deltas relative to the
6 previous revision rather than relative to a parent revision.
6 previous revision rather than relative to a parent revision.
7
7
8 This is *not* safe to run on a changelog.
8 This is *not* safe to run on a changelog.
9 """
9 """
10
10
11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
13 # renamed by Greg Ward <greg at gerg.ca>.
13 # renamed by Greg Ward <greg at gerg.ca>.
14
14
15 # XXX would be nice to have a way to verify the repository after shrinking,
15 # XXX would be nice to have a way to verify the repository after shrinking,
16 # e.g. by comparing "before" and "after" states of random changesets
16 # e.g. by comparing "before" and "after" states of random changesets
17 # (maybe: export before, shrink, export after, diff).
17 # (maybe: export before, shrink, export after, diff).
18
18
19 import os, tempfile, errno
19 import os, tempfile, errno
20 from mercurial import revlog, transaction, node, util, scmutil
20 from mercurial import revlog, transaction, node, util, scmutil
21 from mercurial import changegroup
21 from mercurial import changegroup
22 from mercurial.i18n import _
22 from mercurial.i18n import _
23
23
24
24
25 def postorder(start, edges):
25 def postorder(start, edges):
26 result = []
26 result = []
27 visit = list(start)
27 visit = list(start)
28 finished = set()
28 finished = set()
29
29
30 while visit:
30 while visit:
31 cur = visit[-1]
31 cur = visit[-1]
32 for p in edges[cur]:
32 for p in edges[cur]:
33 if p not in finished:
33 if p not in finished:
34 visit.append(p)
34 visit.append(p)
35 break
35 break
36 else:
36 else:
37 result.append(cur)
37 result.append(cur)
38 finished.add(cur)
38 finished.add(cur)
39 visit.pop()
39 visit.pop()
40
40
41 return result
41 return result
42
42
43 def toposort_reversepostorder(ui, rl):
43 def toposort_reversepostorder(ui, rl):
44 # postorder of the reverse directed graph
44 # postorder of the reverse directed graph
45
45
46 # map rev to list of parent revs (p2 first)
46 # map rev to list of parent revs (p2 first)
47 parents = {}
47 parents = {}
48 heads = set()
48 heads = set()
49 ui.status(_('reading revs\n'))
49 ui.status(_('reading revs\n'))
50 try:
50 try:
51 for rev in rl:
51 for rev in rl:
52 ui.progress(_('reading'), rev, total=len(rl))
52 ui.progress(_('reading'), rev, total=len(rl))
53 (p1, p2) = rl.parentrevs(rev)
53 (p1, p2) = rl.parentrevs(rev)
54 if p1 == p2 == node.nullrev:
54 if p1 == p2 == node.nullrev:
55 parents[rev] = () # root node
55 parents[rev] = () # root node
56 elif p1 == p2 or p2 == node.nullrev:
56 elif p1 == p2 or p2 == node.nullrev:
57 parents[rev] = (p1,) # normal node
57 parents[rev] = (p1,) # normal node
58 else:
58 else:
59 parents[rev] = (p2, p1) # merge node
59 parents[rev] = (p2, p1) # merge node
60 heads.add(rev)
60 heads.add(rev)
61 for p in parents[rev]:
61 for p in parents[rev]:
62 heads.discard(p)
62 heads.discard(p)
63 finally:
63 finally:
64 ui.progress(_('reading'), None)
64 ui.progress(_('reading'), None)
65
65
66 heads = list(heads)
66 heads = list(heads)
67 heads.sort(reverse=True)
67 heads.sort(reverse=True)
68
68
69 ui.status(_('sorting revs\n'))
69 ui.status(_('sorting revs\n'))
70 return postorder(heads, parents)
70 return postorder(heads, parents)
71
71
72 def toposort_postorderreverse(ui, rl):
72 def toposort_postorderreverse(ui, rl):
73 # reverse-postorder of the reverse directed graph
73 # reverse-postorder of the reverse directed graph
74
74
75 children = {}
75 children = {}
76 roots = set()
76 roots = set()
77 ui.status(_('reading revs\n'))
77 ui.status(_('reading revs\n'))
78 try:
78 try:
79 for rev in rl:
79 for rev in rl:
80 ui.progress(_('reading'), rev, total=len(rl))
80 ui.progress(_('reading'), rev, total=len(rl))
81 (p1, p2) = rl.parentrevs(rev)
81 (p1, p2) = rl.parentrevs(rev)
82 if p1 == p2 == node.nullrev:
82 if p1 == p2 == node.nullrev:
83 roots.add(rev)
83 roots.add(rev)
84 children[rev] = []
84 children[rev] = []
85 if p1 != node.nullrev:
85 if p1 != node.nullrev:
86 children[p1].append(rev)
86 children[p1].append(rev)
87 if p2 != node.nullrev:
87 if p2 != node.nullrev:
88 children[p2].append(rev)
88 children[p2].append(rev)
89 finally:
89 finally:
90 ui.progress(_('reading'), None)
90 ui.progress(_('reading'), None)
91
91
92 roots = list(roots)
92 roots = list(roots)
93 roots.sort()
93 roots.sort()
94
94
95 ui.status(_('sorting revs\n'))
95 ui.status(_('sorting revs\n'))
96 result = postorder(roots, children)
96 result = postorder(roots, children)
97 result.reverse()
97 result.reverse()
98 return result
98 return result
99
99
100 def writerevs(ui, r1, r2, order, tr):
100 def writerevs(ui, r1, r2, order, tr):
101
101
102 ui.status(_('writing revs\n'))
102 ui.status(_('writing revs\n'))
103
103
104
104
105 order = [r1.node(r) for r in order]
105 order = [r1.node(r) for r in order]
106
106
107 # this is a bit ugly, but it works
107 # this is a bit ugly, but it works
108 count = [0]
108 count = [0]
109 def lookup(x):
109 def lookup(revl, x):
110 count[0] += 1
110 count[0] += 1
111 ui.progress(_('writing'), count[0], total=len(order))
111 ui.progress(_('writing'), count[0], total=len(order))
112 return "%020d" % r1.linkrev(r1.rev(x))
112 return "%020d" % revl.linkrev(revl.rev(x))
113
113
114 unlookup = lambda x: int(x, 10)
114 unlookup = lambda x: int(x, 10)
115
115
116 try:
116 try:
117 group = util.chunkbuffer(r1.group(order, lookup, progress))
117 bundler = changegroup.bundle10(lookup)
118 group = util.chunkbuffer(r1.group(order, bundler))
118 group = changegroup.unbundle10(group, "UN")
119 group = changegroup.unbundle10(group, "UN")
119 r2.addgroup(group, unlookup, tr)
120 r2.addgroup(group, unlookup, tr)
120 finally:
121 finally:
121 ui.progress(_('writing'), None)
122 ui.progress(_('writing'), None)
122
123
123 def report(ui, r1, r2):
124 def report(ui, r1, r2):
124 def getsize(r):
125 def getsize(r):
125 s = 0
126 s = 0
126 for fn in (r.indexfile, r.datafile):
127 for fn in (r.indexfile, r.datafile):
127 try:
128 try:
128 s += os.stat(fn).st_size
129 s += os.stat(fn).st_size
129 except OSError, inst:
130 except OSError, inst:
130 if inst.errno != errno.ENOENT:
131 if inst.errno != errno.ENOENT:
131 raise
132 raise
132 return s
133 return s
133
134
134 oldsize = float(getsize(r1))
135 oldsize = float(getsize(r1))
135 newsize = float(getsize(r2))
136 newsize = float(getsize(r2))
136
137
137 # argh: have to pass an int to %d, because a float >= 2^32
138 # argh: have to pass an int to %d, because a float >= 2^32
138 # blows up under Python 2.5 or earlier
139 # blows up under Python 2.5 or earlier
139 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
140 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
140 % (int(oldsize), oldsize / 1024 / 1024))
141 % (int(oldsize), oldsize / 1024 / 1024))
141 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
142 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
142 % (int(newsize), newsize / 1024 / 1024))
143 % (int(newsize), newsize / 1024 / 1024))
143
144
144 shrink_percent = (oldsize - newsize) / oldsize * 100
145 shrink_percent = (oldsize - newsize) / oldsize * 100
145 shrink_factor = oldsize / newsize
146 shrink_factor = oldsize / newsize
146 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
147 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
147 % (shrink_percent, shrink_factor))
148 % (shrink_percent, shrink_factor))
148
149
149 def shrink(ui, repo, **opts):
150 def shrink(ui, repo, **opts):
150 """shrink a revlog by reordering revisions
151 """shrink a revlog by reordering revisions
151
152
152 Rewrites all the entries in some revlog of the current repository
153 Rewrites all the entries in some revlog of the current repository
153 (by default, the manifest log) to save space.
154 (by default, the manifest log) to save space.
154
155
155 Different sort algorithms have different performance
156 Different sort algorithms have different performance
156 characteristics. Use ``--sort`` to select a sort algorithm so you
157 characteristics. Use ``--sort`` to select a sort algorithm so you
157 can determine which works best for your data.
158 can determine which works best for your data.
158 """
159 """
159
160
160 if not repo.local():
161 if not repo.local():
161 raise util.Abort(_('not a local repository: %s') % repo.root)
162 raise util.Abort(_('not a local repository: %s') % repo.root)
162
163
163 fn = opts.get('revlog')
164 fn = opts.get('revlog')
164 if not fn:
165 if not fn:
165 indexfn = repo.sjoin('00manifest.i')
166 indexfn = repo.sjoin('00manifest.i')
166 else:
167 else:
167 if not fn.endswith('.i'):
168 if not fn.endswith('.i'):
168 raise util.Abort(_('--revlog option must specify the revlog index '
169 raise util.Abort(_('--revlog option must specify the revlog index '
169 'file (*.i), not %s') % opts.get('revlog'))
170 'file (*.i), not %s') % opts.get('revlog'))
170
171
171 indexfn = os.path.realpath(fn)
172 indexfn = os.path.realpath(fn)
172 store = repo.sjoin('')
173 store = repo.sjoin('')
173 if not indexfn.startswith(store):
174 if not indexfn.startswith(store):
174 raise util.Abort(_('--revlog option must specify a revlog in %s, '
175 raise util.Abort(_('--revlog option must specify a revlog in %s, '
175 'not %s') % (store, indexfn))
176 'not %s') % (store, indexfn))
176
177
177 sortname = opts['sort']
178 sortname = opts['sort']
178 try:
179 try:
179 toposort = globals()['toposort_' + sortname]
180 toposort = globals()['toposort_' + sortname]
180 except KeyError:
181 except KeyError:
181 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
182 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
182
183
183 if not os.path.exists(indexfn):
184 if not os.path.exists(indexfn):
184 raise util.Abort(_('no such file: %s') % indexfn)
185 raise util.Abort(_('no such file: %s') % indexfn)
185 if '00changelog' in indexfn:
186 if '00changelog' in indexfn:
186 raise util.Abort(_('shrinking the changelog '
187 raise util.Abort(_('shrinking the changelog '
187 'will corrupt your repository'))
188 'will corrupt your repository'))
188
189
189 ui.write(_('shrinking %s\n') % indexfn)
190 ui.write(_('shrinking %s\n') % indexfn)
190 prefix = os.path.basename(indexfn)[:-1]
191 prefix = os.path.basename(indexfn)[:-1]
191 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
192 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
192
193
193 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
194 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
194 r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
195 r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
195
196
196 datafn, tmpdatafn = r1.datafile, r2.datafile
197 datafn, tmpdatafn = r1.datafile, r2.datafile
197
198
198 oldindexfn = indexfn + '.old'
199 oldindexfn = indexfn + '.old'
199 olddatafn = datafn + '.old'
200 olddatafn = datafn + '.old'
200 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
201 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
201 raise util.Abort(_('one or both of\n'
202 raise util.Abort(_('one or both of\n'
202 ' %s\n'
203 ' %s\n'
203 ' %s\n'
204 ' %s\n'
204 'exists from a previous run; please clean up '
205 'exists from a previous run; please clean up '
205 'before running again') % (oldindexfn, olddatafn))
206 'before running again') % (oldindexfn, olddatafn))
206
207
207 # Don't use repo.transaction(), because then things get hairy with
208 # Don't use repo.transaction(), because then things get hairy with
208 # paths: some need to be relative to .hg, and some need to be
209 # paths: some need to be relative to .hg, and some need to be
209 # absolute. Doing it this way keeps things simple: everything is an
210 # absolute. Doing it this way keeps things simple: everything is an
210 # absolute path.
211 # absolute path.
211 lock = repo.lock(wait=False)
212 lock = repo.lock(wait=False)
212 tr = transaction.transaction(ui.warn,
213 tr = transaction.transaction(ui.warn,
213 open,
214 open,
214 repo.sjoin('journal'))
215 repo.sjoin('journal'))
215
216
216 def ignoremissing(func):
217 def ignoremissing(func):
217 def f(*args, **kw):
218 def f(*args, **kw):
218 try:
219 try:
219 return func(*args, **kw)
220 return func(*args, **kw)
220 except OSError, inst:
221 except OSError, inst:
221 if inst.errno != errno.ENOENT:
222 if inst.errno != errno.ENOENT:
222 raise
223 raise
223 return f
224 return f
224
225
225 try:
226 try:
226 try:
227 try:
227 order = toposort(ui, r1)
228 order = toposort(ui, r1)
228
229
229 suboptimal = 0
230 suboptimal = 0
230 for i in xrange(1, len(order)):
231 for i in xrange(1, len(order)):
231 parents = [p for p in r1.parentrevs(order[i])
232 parents = [p for p in r1.parentrevs(order[i])
232 if p != node.nullrev]
233 if p != node.nullrev]
233 if parents and order[i - 1] not in parents:
234 if parents and order[i - 1] not in parents:
234 suboptimal += 1
235 suboptimal += 1
235 ui.note(_('%d suboptimal nodes\n') % suboptimal)
236 ui.note(_('%d suboptimal nodes\n') % suboptimal)
236
237
237 writerevs(ui, r1, r2, order, tr)
238 writerevs(ui, r1, r2, order, tr)
238 report(ui, r1, r2)
239 report(ui, r1, r2)
239 tr.close()
240 tr.close()
240 except:
241 except:
241 # Abort transaction first, so we truncate the files before
242 # Abort transaction first, so we truncate the files before
242 # deleting them.
243 # deleting them.
243 tr.abort()
244 tr.abort()
244 for fn in (tmpindexfn, tmpdatafn):
245 for fn in (tmpindexfn, tmpdatafn):
245 ignoremissing(os.unlink)(fn)
246 ignoremissing(os.unlink)(fn)
246 raise
247 raise
247 if not opts.get('dry_run'):
248 if not opts.get('dry_run'):
248 # racy, both files cannot be renamed atomically
249 # racy, both files cannot be renamed atomically
249 # copy files
250 # copy files
250 util.os_link(indexfn, oldindexfn)
251 util.os_link(indexfn, oldindexfn)
251 ignoremissing(util.os_link)(datafn, olddatafn)
252 ignoremissing(util.os_link)(datafn, olddatafn)
252
253
253 # rename
254 # rename
254 util.rename(tmpindexfn, indexfn)
255 util.rename(tmpindexfn, indexfn)
255 try:
256 try:
256 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
257 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
257 util.rename(tmpdatafn, datafn)
258 util.rename(tmpdatafn, datafn)
258 except OSError, inst:
259 except OSError, inst:
259 if inst.errno != errno.ENOENT:
260 if inst.errno != errno.ENOENT:
260 raise
261 raise
261 ignoremissing(os.unlink)(datafn)
262 ignoremissing(os.unlink)(datafn)
262 else:
263 else:
263 for fn in (tmpindexfn, tmpdatafn):
264 for fn in (tmpindexfn, tmpdatafn):
264 ignoremissing(os.unlink)(fn)
265 ignoremissing(os.unlink)(fn)
265 finally:
266 finally:
266 lock.release()
267 lock.release()
267
268
268 if not opts.get('dry_run'):
269 if not opts.get('dry_run'):
269 ui.write(_('note: old revlog saved in:\n'
270 ui.write(_('note: old revlog saved in:\n'
270 ' %s\n'
271 ' %s\n'
271 ' %s\n'
272 ' %s\n'
272 '(You can delete those files when you are satisfied that your\n'
273 '(You can delete those files when you are satisfied that your\n'
273 'repository is still sane. '
274 'repository is still sane. '
274 'Running \'hg verify\' is strongly recommended.)\n')
275 'Running \'hg verify\' is strongly recommended.)\n')
275 % (oldindexfn, olddatafn))
276 % (oldindexfn, olddatafn))
276
277
277 cmdtable = {
278 cmdtable = {
278 'shrink': (shrink,
279 'shrink': (shrink,
279 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
280 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
280 ('n', 'dry-run', None, _('do not shrink, simulate only')),
281 ('n', 'dry-run', None, _('do not shrink, simulate only')),
281 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
282 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
282 ],
283 ],
283 _('hg shrink [--revlog PATH]'))
284 _('hg shrink [--revlog PATH]'))
284 }
285 }
285
286
286 if __name__ == "__main__":
287 if __name__ == "__main__":
287 print "shrink-revlog.py is now an extension (see hg help extensions)"
288 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now