##// END OF EJS Templates
shrink-revlog: make pyflakes happy
Greg Ward -
r16305:90ca62bb default
parent child Browse files
Show More
@@ -1,291 +1,290 b''
1 1 """reorder a revlog (the manifest by default) to save space
2 2
3 3 Specifically, this topologically sorts the revisions in the revlog so that
4 4 revisions on the same branch are adjacent as much as possible. This is a
5 5 workaround for the fact that Mercurial computes deltas relative to the
6 6 previous revision rather than relative to a parent revision.
7 7
8 8 This is *not* safe to run on a changelog.
9 9 """
10 10
11 11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
12 12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
13 13 # renamed by Greg Ward <greg at gerg.ca>.
14 14
15 15 # XXX would be nice to have a way to verify the repository after shrinking,
16 16 # e.g. by comparing "before" and "after" states of random changesets
17 17 # (maybe: export before, shrink, export after, diff).
18 18
19 import os, tempfile, errno
19 import os, errno
20 20 from mercurial import revlog, transaction, node, util, scmutil
21 21 from mercurial import changegroup
22 22 from mercurial.i18n import _
23 23
24 24
25 25 def postorder(start, edges):
26 26 result = []
27 27 visit = list(start)
28 28 finished = set()
29 29
30 30 while visit:
31 31 cur = visit[-1]
32 32 for p in edges[cur]:
33 33 # defend against node.nullrev because it's occasionally
34 34 # possible for a node to have parents (null, something)
35 35 # rather than (something, null)
36 36 if p not in finished and p != node.nullrev:
37 37 visit.append(p)
38 38 break
39 39 else:
40 40 result.append(cur)
41 41 finished.add(cur)
42 42 visit.pop()
43 43
44 44 return result
45 45
46 46 def toposort_reversepostorder(ui, rl):
47 47 # postorder of the reverse directed graph
48 48
49 49 # map rev to list of parent revs (p2 first)
50 50 parents = {}
51 51 heads = set()
52 52 ui.status(_('reading revs\n'))
53 53 try:
54 54 for rev in rl:
55 55 ui.progress(_('reading'), rev, total=len(rl))
56 56 (p1, p2) = rl.parentrevs(rev)
57 57 if p1 == p2 == node.nullrev:
58 58 parents[rev] = () # root node
59 59 elif p1 == p2 or p2 == node.nullrev:
60 60 parents[rev] = (p1,) # normal node
61 61 else:
62 62 parents[rev] = (p2, p1) # merge node
63 63 heads.add(rev)
64 64 for p in parents[rev]:
65 65 heads.discard(p)
66 66 finally:
67 67 ui.progress(_('reading'), None)
68 68
69 69 heads = list(heads)
70 70 heads.sort(reverse=True)
71 71
72 72 ui.status(_('sorting revs\n'))
73 73 return postorder(heads, parents)
74 74
75 75 def toposort_postorderreverse(ui, rl):
76 76 # reverse-postorder of the reverse directed graph
77 77
78 78 children = {}
79 79 roots = set()
80 80 ui.status(_('reading revs\n'))
81 81 try:
82 82 for rev in rl:
83 83 ui.progress(_('reading'), rev, total=len(rl))
84 84 (p1, p2) = rl.parentrevs(rev)
85 85 if p1 == p2 == node.nullrev:
86 86 roots.add(rev)
87 87 children[rev] = []
88 88 if p1 != node.nullrev:
89 89 children[p1].append(rev)
90 90 if p2 != node.nullrev:
91 91 children[p2].append(rev)
92 92 finally:
93 93 ui.progress(_('reading'), None)
94 94
95 95 roots = list(roots)
96 96 roots.sort()
97 97
98 98 ui.status(_('sorting revs\n'))
99 99 result = postorder(roots, children)
100 100 result.reverse()
101 101 return result
102 102
103 103 def writerevs(ui, r1, r2, order, tr):
104 104
105 105 ui.status(_('writing revs\n'))
106 106
107 107
108 108 order = [r1.node(r) for r in order]
109 109
110 110 # this is a bit ugly, but it works
111 111 count = [0]
112 112 def lookup(revl, x):
113 113 count[0] += 1
114 114 ui.progress(_('writing'), count[0], total=len(order))
115 115 return "%020d" % revl.linkrev(revl.rev(x))
116 116
117 117 unlookup = lambda x: int(x, 10)
118 118
119 119 try:
120 120 bundler = changegroup.bundle10(lookup)
121 121 group = util.chunkbuffer(r1.group(order, bundler))
122 122 group = changegroup.unbundle10(group, "UN")
123 123 r2.addgroup(group, unlookup, tr)
124 124 finally:
125 125 ui.progress(_('writing'), None)
126 126
127 127 def report(ui, r1, r2):
128 128 def getsize(r):
129 129 s = 0
130 130 for fn in (r.indexfile, r.datafile):
131 131 try:
132 132 s += os.stat(fn).st_size
133 133 except OSError, inst:
134 134 if inst.errno != errno.ENOENT:
135 135 raise
136 136 return s
137 137
138 138 oldsize = float(getsize(r1))
139 139 newsize = float(getsize(r2))
140 140
141 141 # argh: have to pass an int to %d, because a float >= 2^32
142 142 # blows up under Python 2.5 or earlier
143 143 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
144 144 % (int(oldsize), oldsize / 1024 / 1024))
145 145 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
146 146 % (int(newsize), newsize / 1024 / 1024))
147 147
148 148 shrink_percent = (oldsize - newsize) / oldsize * 100
149 149 shrink_factor = oldsize / newsize
150 150 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
151 151 % (shrink_percent, shrink_factor))
152 152
153 153 def shrink(ui, repo, **opts):
154 154 """shrink a revlog by reordering revisions
155 155
156 156 Rewrites all the entries in some revlog of the current repository
157 157 (by default, the manifest log) to save space.
158 158
159 159 Different sort algorithms have different performance
160 160 characteristics. Use ``--sort`` to select a sort algorithm so you
161 161 can determine which works best for your data.
162 162 """
163 163
164 164 if not repo.local():
165 165 raise util.Abort(_('not a local repository: %s') % repo.root)
166 166
167 167 fn = opts.get('revlog')
168 168 if not fn:
169 169 indexfn = repo.sjoin('00manifest.i')
170 170 else:
171 171 if not fn.endswith('.i'):
172 172 raise util.Abort(_('--revlog option must specify the revlog index '
173 173 'file (*.i), not %s') % opts.get('revlog'))
174 174
175 175 indexfn = os.path.realpath(fn)
176 176 store = repo.sjoin('')
177 177 if not indexfn.startswith(store):
178 178 raise util.Abort(_('--revlog option must specify a revlog in %s, '
179 179 'not %s') % (store, indexfn))
180 180
181 181 sortname = opts['sort']
182 182 try:
183 183 toposort = globals()['toposort_' + sortname]
184 184 except KeyError:
185 185 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
186 186
187 187 if not os.path.exists(indexfn):
188 188 raise util.Abort(_('no such file: %s') % indexfn)
189 189 if '00changelog' in indexfn:
190 190 raise util.Abort(_('shrinking the changelog '
191 191 'will corrupt your repository'))
192 192
193 193 ui.write(_('shrinking %s\n') % indexfn)
194 prefix = os.path.basename(indexfn)[:-1]
195 194 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
196 195
197 196 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
198 197 r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
199 198
200 199 datafn, tmpdatafn = r1.datafile, r2.datafile
201 200
202 201 oldindexfn = indexfn + '.old'
203 202 olddatafn = datafn + '.old'
204 203 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
205 204 raise util.Abort(_('one or both of\n'
206 205 ' %s\n'
207 206 ' %s\n'
208 207 'exists from a previous run; please clean up '
209 208 'before running again') % (oldindexfn, olddatafn))
210 209
211 210 # Don't use repo.transaction(), because then things get hairy with
212 211 # paths: some need to be relative to .hg, and some need to be
213 212 # absolute. Doing it this way keeps things simple: everything is an
214 213 # absolute path.
215 214 lock = repo.lock(wait=False)
216 215 tr = transaction.transaction(ui.warn,
217 216 open,
218 217 repo.sjoin('journal'))
219 218
220 219 def ignoremissing(func):
221 220 def f(*args, **kw):
222 221 try:
223 222 return func(*args, **kw)
224 223 except OSError, inst:
225 224 if inst.errno != errno.ENOENT:
226 225 raise
227 226 return f
228 227
229 228 try:
230 229 try:
231 230 order = toposort(ui, r1)
232 231
233 232 suboptimal = 0
234 233 for i in xrange(1, len(order)):
235 234 parents = [p for p in r1.parentrevs(order[i])
236 235 if p != node.nullrev]
237 236 if parents and order[i - 1] not in parents:
238 237 suboptimal += 1
239 238 ui.note(_('%d suboptimal nodes\n') % suboptimal)
240 239
241 240 writerevs(ui, r1, r2, order, tr)
242 241 report(ui, r1, r2)
243 242 tr.close()
244 243 except:
245 244 # Abort transaction first, so we truncate the files before
246 245 # deleting them.
247 246 tr.abort()
248 247 for fn in (tmpindexfn, tmpdatafn):
249 248 ignoremissing(os.unlink)(fn)
250 249 raise
251 250 if not opts.get('dry_run'):
252 251 # racy, both files cannot be renamed atomically
253 252 # copy files
254 253 util.oslink(indexfn, oldindexfn)
255 254 ignoremissing(util.oslink)(datafn, olddatafn)
256 255
257 256 # rename
258 257 util.rename(tmpindexfn, indexfn)
259 258 try:
260 259 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
261 260 util.rename(tmpdatafn, datafn)
262 261 except OSError, inst:
263 262 if inst.errno != errno.ENOENT:
264 263 raise
265 264 ignoremissing(os.unlink)(datafn)
266 265 else:
267 266 for fn in (tmpindexfn, tmpdatafn):
268 267 ignoremissing(os.unlink)(fn)
269 268 finally:
270 269 lock.release()
271 270
272 271 if not opts.get('dry_run'):
273 272 ui.write(_('note: old revlog saved in:\n'
274 273 ' %s\n'
275 274 ' %s\n'
276 275 '(You can delete those files when you are satisfied that your\n'
277 276 'repository is still sane. '
278 277 'Running \'hg verify\' is strongly recommended.)\n')
279 278 % (oldindexfn, olddatafn))
280 279
281 280 cmdtable = {
282 281 'shrink': (shrink,
283 282 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
284 283 ('n', 'dry-run', None, _('do not shrink, simulate only')),
285 284 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
286 285 ],
287 286 _('hg shrink [--revlog PATH]'))
288 287 }
289 288
290 289 if __name__ == "__main__":
291 290 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now