##// END OF EJS Templates
shrink-revlog: update util.opener to scmutil.opener after d13913355390
Augie Fackler -
r14029:83d3f87c default
parent child Browse files
Show More
@@ -1,287 +1,287 b''
1 1 """reorder a revlog (the manifest by default) to save space
2 2
3 3 Specifically, this topologically sorts the revisions in the revlog so that
4 4 revisions on the same branch are adjacent as much as possible. This is a
5 5 workaround for the fact that Mercurial computes deltas relative to the
6 6 previous revision rather than relative to a parent revision.
7 7
8 8 This is *not* safe to run on a changelog.
9 9 """
10 10
11 11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
12 12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
13 13 # renamed by Greg Ward <greg at gerg.ca>.
14 14
15 15 # XXX would be nice to have a way to verify the repository after shrinking,
16 16 # e.g. by comparing "before" and "after" states of random changesets
17 17 # (maybe: export before, shrink, export after, diff).
18 18
19 19 import os, tempfile, errno
20 from mercurial import revlog, transaction, node, util
20 from mercurial import revlog, transaction, node, util, scmutil
21 21 from mercurial import changegroup
22 22 from mercurial.i18n import _
23 23
24 24
25 25 def postorder(start, edges):
26 26 result = []
27 27 visit = list(start)
28 28 finished = set()
29 29
30 30 while visit:
31 31 cur = visit[-1]
32 32 for p in edges[cur]:
33 33 if p not in finished:
34 34 visit.append(p)
35 35 break
36 36 else:
37 37 result.append(cur)
38 38 finished.add(cur)
39 39 visit.pop()
40 40
41 41 return result
42 42
43 43 def toposort_reversepostorder(ui, rl):
44 44 # postorder of the reverse directed graph
45 45
46 46 # map rev to list of parent revs (p2 first)
47 47 parents = {}
48 48 heads = set()
49 49 ui.status(_('reading revs\n'))
50 50 try:
51 51 for rev in rl:
52 52 ui.progress(_('reading'), rev, total=len(rl))
53 53 (p1, p2) = rl.parentrevs(rev)
54 54 if p1 == p2 == node.nullrev:
55 55 parents[rev] = () # root node
56 56 elif p1 == p2 or p2 == node.nullrev:
57 57 parents[rev] = (p1,) # normal node
58 58 else:
59 59 parents[rev] = (p2, p1) # merge node
60 60 heads.add(rev)
61 61 for p in parents[rev]:
62 62 heads.discard(p)
63 63 finally:
64 64 ui.progress(_('reading'), None)
65 65
66 66 heads = list(heads)
67 67 heads.sort(reverse=True)
68 68
69 69 ui.status(_('sorting revs\n'))
70 70 return postorder(heads, parents)
71 71
72 72 def toposort_postorderreverse(ui, rl):
73 73 # reverse-postorder of the reverse directed graph
74 74
75 75 children = {}
76 76 roots = set()
77 77 ui.status(_('reading revs\n'))
78 78 try:
79 79 for rev in rl:
80 80 ui.progress(_('reading'), rev, total=len(rl))
81 81 (p1, p2) = rl.parentrevs(rev)
82 82 if p1 == p2 == node.nullrev:
83 83 roots.add(rev)
84 84 children[rev] = []
85 85 if p1 != node.nullrev:
86 86 children[p1].append(rev)
87 87 if p2 != node.nullrev:
88 88 children[p2].append(rev)
89 89 finally:
90 90 ui.progress(_('reading'), None)
91 91
92 92 roots = list(roots)
93 93 roots.sort()
94 94
95 95 ui.status(_('sorting revs\n'))
96 96 result = postorder(roots, children)
97 97 result.reverse()
98 98 return result
99 99
100 100 def writerevs(ui, r1, r2, order, tr):
101 101
102 102 ui.status(_('writing revs\n'))
103 103
104 104
105 105 order = [r1.node(r) for r in order]
106 106
107 107 # this is a bit ugly, but it works
108 108 count = [0]
109 109 def lookup(x):
110 110 count[0] += 1
111 111 ui.progress(_('writing'), count[0], total=len(order))
112 112 return "%020d" % r1.linkrev(r1.rev(x))
113 113
114 114 unlookup = lambda x: int(x, 10)
115 115
116 116 try:
117 117 group = util.chunkbuffer(r1.group(order, lookup, progress))
118 118 group = changegroup.unbundle10(group, "UN")
119 119 r2.addgroup(group, unlookup, tr)
120 120 finally:
121 121 ui.progress(_('writing'), None)
122 122
123 123 def report(ui, r1, r2):
124 124 def getsize(r):
125 125 s = 0
126 126 for fn in (r.indexfile, r.datafile):
127 127 try:
128 128 s += os.stat(fn).st_size
129 129 except OSError, inst:
130 130 if inst.errno != errno.ENOENT:
131 131 raise
132 132 return s
133 133
134 134 oldsize = float(getsize(r1))
135 135 newsize = float(getsize(r2))
136 136
137 137 # argh: have to pass an int to %d, because a float >= 2^32
138 138 # blows up under Python 2.5 or earlier
139 139 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
140 140 % (int(oldsize), oldsize / 1024 / 1024))
141 141 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
142 142 % (int(newsize), newsize / 1024 / 1024))
143 143
144 144 shrink_percent = (oldsize - newsize) / oldsize * 100
145 145 shrink_factor = oldsize / newsize
146 146 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
147 147 % (shrink_percent, shrink_factor))
148 148
149 149 def shrink(ui, repo, **opts):
150 150 """shrink a revlog by reordering revisions
151 151
152 152 Rewrites all the entries in some revlog of the current repository
153 153 (by default, the manifest log) to save space.
154 154
155 155 Different sort algorithms have different performance
156 156 characteristics. Use ``--sort`` to select a sort algorithm so you
157 157 can determine which works best for your data.
158 158 """
159 159
160 160 if not repo.local():
161 161 raise util.Abort(_('not a local repository: %s') % repo.root)
162 162
163 163 fn = opts.get('revlog')
164 164 if not fn:
165 165 indexfn = repo.sjoin('00manifest.i')
166 166 else:
167 167 if not fn.endswith('.i'):
168 168 raise util.Abort(_('--revlog option must specify the revlog index '
169 169 'file (*.i), not %s') % opts.get('revlog'))
170 170
171 171 indexfn = os.path.realpath(fn)
172 172 store = repo.sjoin('')
173 173 if not indexfn.startswith(store):
174 174 raise util.Abort(_('--revlog option must specify a revlog in %s, '
175 175 'not %s') % (store, indexfn))
176 176
177 177 sortname = opts['sort']
178 178 try:
179 179 toposort = globals()['toposort_' + sortname]
180 180 except KeyError:
181 181 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
182 182
183 183 if not os.path.exists(indexfn):
184 184 raise util.Abort(_('no such file: %s') % indexfn)
185 185 if '00changelog' in indexfn:
186 186 raise util.Abort(_('shrinking the changelog '
187 187 'will corrupt your repository'))
188 188
189 189 ui.write(_('shrinking %s\n') % indexfn)
190 190 prefix = os.path.basename(indexfn)[:-1]
191 191 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
192 192
193 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
194 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
193 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
194 r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
195 195
196 196 datafn, tmpdatafn = r1.datafile, r2.datafile
197 197
198 198 oldindexfn = indexfn + '.old'
199 199 olddatafn = datafn + '.old'
200 200 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
201 201 raise util.Abort(_('one or both of\n'
202 202 ' %s\n'
203 203 ' %s\n'
204 204 'exists from a previous run; please clean up '
205 205 'before running again') % (oldindexfn, olddatafn))
206 206
207 207 # Don't use repo.transaction(), because then things get hairy with
208 208 # paths: some need to be relative to .hg, and some need to be
209 209 # absolute. Doing it this way keeps things simple: everything is an
210 210 # absolute path.
211 211 lock = repo.lock(wait=False)
212 212 tr = transaction.transaction(ui.warn,
213 213 open,
214 214 repo.sjoin('journal'))
215 215
216 216 def ignoremissing(func):
217 217 def f(*args, **kw):
218 218 try:
219 219 return func(*args, **kw)
220 220 except OSError, inst:
221 221 if inst.errno != errno.ENOENT:
222 222 raise
223 223 return f
224 224
225 225 try:
226 226 try:
227 227 order = toposort(ui, r1)
228 228
229 229 suboptimal = 0
230 230 for i in xrange(1, len(order)):
231 231 parents = [p for p in r1.parentrevs(order[i])
232 232 if p != node.nullrev]
233 233 if parents and order[i - 1] not in parents:
234 234 suboptimal += 1
235 235 ui.note(_('%d suboptimal nodes\n') % suboptimal)
236 236
237 237 writerevs(ui, r1, r2, order, tr)
238 238 report(ui, r1, r2)
239 239 tr.close()
240 240 except:
241 241 # Abort transaction first, so we truncate the files before
242 242 # deleting them.
243 243 tr.abort()
244 244 for fn in (tmpindexfn, tmpdatafn):
245 245 ignoremissing(os.unlink)(fn)
246 246 raise
247 247 if not opts.get('dry_run'):
248 248 # racy, both files cannot be renamed atomically
249 249 # copy files
250 250 util.os_link(indexfn, oldindexfn)
251 251 ignoremissing(util.os_link)(datafn, olddatafn)
252 252
253 253 # rename
254 254 util.rename(tmpindexfn, indexfn)
255 255 try:
256 256 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
257 257 util.rename(tmpdatafn, datafn)
258 258 except OSError, inst:
259 259 if inst.errno != errno.ENOENT:
260 260 raise
261 261 ignoremissing(os.unlink)(datafn)
262 262 else:
263 263 for fn in (tmpindexfn, tmpdatafn):
264 264 ignoremissing(os.unlink)(fn)
265 265 finally:
266 266 lock.release()
267 267
268 268 if not opts.get('dry_run'):
269 269 ui.write(_('note: old revlog saved in:\n'
270 270 ' %s\n'
271 271 ' %s\n'
272 272 '(You can delete those files when you are satisfied that your\n'
273 273 'repository is still sane. '
274 274 'Running \'hg verify\' is strongly recommended.)\n')
275 275 % (oldindexfn, olddatafn))
276 276
277 277 cmdtable = {
278 278 'shrink': (shrink,
279 279 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
280 280 ('n', 'dry-run', None, _('do not shrink, simulate only')),
281 281 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
282 282 ],
283 283 _('hg shrink [--revlog PATH]'))
284 284 }
285 285
286 286 if __name__ == "__main__":
287 287 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now