##// END OF EJS Templates
bundle: fix shrink-revlog bundle usage
Matt Mackall -
r12348:7f97b484 default
parent child Browse files
Show More
@@ -1,288 +1,289 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 reorder a revlog (the manifest by default) to save space
5 5
6 6 Specifically, this topologically sorts the revisions in the revlog so that
7 7 revisions on the same branch are adjacent as much as possible. This is a
8 8 workaround for the fact that Mercurial computes deltas relative to the
9 9 previous revision rather than relative to a parent revision.
10 10
11 11 This is *not* safe to run on a changelog.
12 12 """
13 13
14 14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 16 # renamed by Greg Ward <greg at gerg.ca>.
17 17
18 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 19 # e.g. by comparing "before" and "after" states of random changesets
20 20 # (maybe: export before, shrink, export after, diff).
21 21
22 22 import os, tempfile, errno
23 23 from mercurial import revlog, transaction, node, util
24 24 from mercurial import changegroup
25 25 from mercurial.i18n import _
26 26
27 27
28 28 def postorder(start, edges):
29 29 result = []
30 30 visit = list(start)
31 31 finished = set()
32 32
33 33 while visit:
34 34 cur = visit[-1]
35 35 for p in edges[cur]:
36 36 if p not in finished:
37 37 visit.append(p)
38 38 break
39 39 else:
40 40 result.append(cur)
41 41 finished.add(cur)
42 42 visit.pop()
43 43
44 44 return result
45 45
46 46 def toposort_reversepostorder(ui, rl):
47 47 # postorder of the reverse directed graph
48 48
49 49 # map rev to list of parent revs (p2 first)
50 50 parents = {}
51 51 heads = set()
52 52 ui.status(_('reading revs\n'))
53 53 try:
54 54 for rev in rl:
55 55 ui.progress(_('reading'), rev, total=len(rl))
56 56 (p1, p2) = rl.parentrevs(rev)
57 57 if p1 == p2 == node.nullrev:
58 58 parents[rev] = () # root node
59 59 elif p1 == p2 or p2 == node.nullrev:
60 60 parents[rev] = (p1,) # normal node
61 61 else:
62 62 parents[rev] = (p2, p1) # merge node
63 63 heads.add(rev)
64 64 for p in parents[rev]:
65 65 heads.discard(p)
66 66 finally:
67 67 ui.progress(_('reading'), None)
68 68
69 69 heads = list(heads)
70 70 heads.sort(reverse=True)
71 71
72 72 ui.status(_('sorting revs\n'))
73 73 return postorder(heads, parents)
74 74
75 75 def toposort_postorderreverse(ui, rl):
76 76 # reverse-postorder of the reverse directed graph
77 77
78 78 children = {}
79 79 roots = set()
80 80 ui.status(_('reading revs\n'))
81 81 try:
82 82 for rev in rl:
83 83 ui.progress(_('reading'), rev, total=len(rl))
84 84 (p1, p2) = rl.parentrevs(rev)
85 85 if p1 == p2 == node.nullrev:
86 86 roots.add(rev)
87 87 children[rev] = []
88 88 if p1 != node.nullrev:
89 89 children[p1].append(rev)
90 90 if p2 != node.nullrev:
91 91 children[p2].append(rev)
92 92 finally:
93 93 ui.progress(_('reading'), None)
94 94
95 95 roots = list(roots)
96 96 roots.sort()
97 97
98 98 ui.status(_('sorting revs\n'))
99 99 result = postorder(roots, children)
100 100 result.reverse()
101 101 return result
102 102
103 103 def writerevs(ui, r1, r2, order, tr):
104 104
105 105 ui.status(_('writing revs\n'))
106 106
107 107 count = [0]
108 108 def progress(*args):
109 109 ui.progress(_('writing'), count[0], total=len(order))
110 110 count[0] += 1
111 111
112 112 order = [r1.node(r) for r in order]
113 113
114 114 # this is a bit ugly, but it works
115 115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 116 unlookup = lambda x: int(x, 10)
117 117
118 118 try:
119 119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 group = changegroup.unbundle10(group, "UN")
120 121 r2.addgroup(group, unlookup, tr)
121 122 finally:
122 123 ui.progress(_('writing'), None)
123 124
124 125 def report(ui, r1, r2):
125 126 def getsize(r):
126 127 s = 0
127 128 for fn in (r.indexfile, r.datafile):
128 129 try:
129 130 s += os.stat(fn).st_size
130 131 except OSError, inst:
131 132 if inst.errno != errno.ENOENT:
132 133 raise
133 134 return s
134 135
135 136 oldsize = float(getsize(r1))
136 137 newsize = float(getsize(r2))
137 138
138 139 # argh: have to pass an int to %d, because a float >= 2^32
139 140 # blows up under Python 2.5 or earlier
140 141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 142 % (int(oldsize), oldsize / 1024 / 1024))
142 143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 144 % (int(newsize), newsize / 1024 / 1024))
144 145
145 146 shrink_percent = (oldsize - newsize) / oldsize * 100
146 147 shrink_factor = oldsize / newsize
147 148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 149 % (shrink_percent, shrink_factor))
149 150
150 151 def shrink(ui, repo, **opts):
151 152 """shrink a revlog by reordering revisions
152 153
153 154 Rewrites all the entries in some revlog of the current repository
154 155 (by default, the manifest log) to save space.
155 156
156 157 Different sort algorithms have different performance
157 158 characteristics. Use ``--sort`` to select a sort algorithm so you
158 159 can determine which works best for your data.
159 160 """
160 161
161 162 if not repo.local():
162 163 raise util.Abort(_('not a local repository: %s') % repo.root)
163 164
164 165 fn = opts.get('revlog')
165 166 if not fn:
166 167 indexfn = repo.sjoin('00manifest.i')
167 168 else:
168 169 if not fn.endswith('.i'):
169 170 raise util.Abort(_('--revlog option must specify the revlog index '
170 171 'file (*.i), not %s') % opts.get('revlog'))
171 172
172 173 indexfn = os.path.realpath(fn)
173 174 store = repo.sjoin('')
174 175 if not indexfn.startswith(store):
175 176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 177 'not %s') % (store, indexfn))
177 178
178 179 sortname = opts['sort']
179 180 try:
180 181 toposort = globals()['toposort_' + sortname]
181 182 except KeyError:
182 183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183 184
184 185 if not os.path.exists(indexfn):
185 186 raise util.Abort(_('no such file: %s') % indexfn)
186 187 if '00changelog' in indexfn:
187 188 raise util.Abort(_('shrinking the changelog '
188 189 'will corrupt your repository'))
189 190
190 191 ui.write(_('shrinking %s\n') % indexfn)
191 192 prefix = os.path.basename(indexfn)[:-1]
192 193 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
193 194
194 195 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 196 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196 197
197 198 datafn, tmpdatafn = r1.datafile, r2.datafile
198 199
199 200 oldindexfn = indexfn + '.old'
200 201 olddatafn = datafn + '.old'
201 202 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 203 raise util.Abort(_('one or both of\n'
203 204 ' %s\n'
204 205 ' %s\n'
205 206 'exists from a previous run; please clean up '
206 207 'before running again') % (oldindexfn, olddatafn))
207 208
208 209 # Don't use repo.transaction(), because then things get hairy with
209 210 # paths: some need to be relative to .hg, and some need to be
210 211 # absolute. Doing it this way keeps things simple: everything is an
211 212 # absolute path.
212 213 lock = repo.lock(wait=False)
213 214 tr = transaction.transaction(ui.warn,
214 215 open,
215 216 repo.sjoin('journal'))
216 217
217 218 def ignoremissing(func):
218 219 def f(*args, **kw):
219 220 try:
220 221 return func(*args, **kw)
221 222 except OSError, inst:
222 223 if inst.errno != errno.ENOENT:
223 224 raise
224 225 return f
225 226
226 227 try:
227 228 try:
228 229 order = toposort(ui, r1)
229 230
230 231 suboptimal = 0
231 232 for i in xrange(1, len(order)):
232 233 parents = [p for p in r1.parentrevs(order[i])
233 234 if p != node.nullrev]
234 235 if parents and order[i - 1] not in parents:
235 236 suboptimal += 1
236 237 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237 238
238 239 writerevs(ui, r1, r2, order, tr)
239 240 report(ui, r1, r2)
240 241 tr.close()
241 242 except:
242 243 # Abort transaction first, so we truncate the files before
243 244 # deleting them.
244 245 tr.abort()
245 246 for fn in (tmpindexfn, tmpdatafn):
246 247 ignoremissing(os.unlink)(fn)
247 248 raise
248 249 if not opts.get('dry_run'):
249 250 # racy, both files cannot be renamed atomically
250 251 # copy files
251 252 util.os_link(indexfn, oldindexfn)
252 253 ignoremissing(util.os_link)(datafn, olddatafn)
253 254
254 255 # rename
255 256 util.rename(tmpindexfn, indexfn)
256 257 try:
257 258 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 259 util.rename(tmpdatafn, datafn)
259 260 except OSError, inst:
260 261 if inst.errno != errno.ENOENT:
261 262 raise
262 263 ignoremissing(os.unlink)(datafn)
263 264 else:
264 265 for fn in (tmpindexfn, tmpdatafn):
265 266 ignoremissing(os.unlink)(fn)
266 267 finally:
267 268 lock.release()
268 269
269 270 if not opts.get('dry_run'):
270 271 ui.write(_('note: old revlog saved in:\n'
271 272 ' %s\n'
272 273 ' %s\n'
273 274 '(You can delete those files when you are satisfied that your\n'
274 275 'repository is still sane. '
275 276 'Running \'hg verify\' is strongly recommended.)\n')
276 277 % (oldindexfn, olddatafn))
277 278
278 279 cmdtable = {
279 280 'shrink': (shrink,
280 281 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 282 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 283 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 284 ],
284 285 _('hg shrink [--revlog PATH]'))
285 286 }
286 287
287 288 if __name__ == "__main__":
288 289 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now