##// END OF EJS Templates
shrink-revlog: add missing whitespace in expression
Martin Geisler -
r10655:f63fb5c0 default
parent child Browse files
Show More
@@ -1,290 +1,290 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 reorder a revlog (the manifest by default) to save space
5 5
6 6 Specifically, this topologically sorts the revisions in the revlog so that
7 7 revisions on the same branch are adjacent as much as possible. This is a
8 8 workaround for the fact that Mercurial computes deltas relative to the
9 9 previous revision rather than relative to a parent revision.
10 10
11 11 This is *not* safe to run on a changelog.
12 12 """
13 13
14 14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 16 # renamed by Greg Ward <greg at gerg.ca>.
17 17
18 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 19 # e.g. by comparing "before" and "after" states of random changesets
20 20 # (maybe: export before, shrink, export after, diff).
21 21
22 22 import os, tempfile, errno
23 23 from mercurial import revlog, transaction, node, util
24 24 from mercurial import changegroup
25 25 from mercurial.i18n import _
26 26
27 27
28 28 def postorder(start, edges):
29 29 result = []
30 30 visit = list(start)
31 31 finished = set()
32 32
33 33 while visit:
34 34 cur = visit[-1]
35 35 for p in edges[cur]:
36 36 if p not in finished:
37 37 visit.append(p)
38 38 break
39 39 else:
40 40 result.append(cur)
41 41 finished.add(cur)
42 42 visit.pop()
43 43
44 44 return result
45 45
46 46 def toposort_reversepostorder(ui, rl):
47 47 # postorder of the reverse directed graph
48 48
49 49 # map rev to list of parent revs (p2 first)
50 50 parents = {}
51 51 heads = set()
52 52 ui.status(_('reading revs\n'))
53 53 try:
54 54 for rev in rl:
55 55 ui.progress(_('reading'), rev, total=len(rl))
56 56 (p1, p2) = rl.parentrevs(rev)
57 57 if p1 == p2 == node.nullrev:
58 58 parents[rev] = () # root node
59 59 elif p1 == p2 or p2 == node.nullrev:
60 60 parents[rev] = (p1,) # normal node
61 61 else:
62 62 parents[rev] = (p2, p1) # merge node
63 63 heads.add(rev)
64 64 for p in parents[rev]:
65 65 heads.discard(p)
66 66 finally:
67 67 ui.progress(_('reading'), None, total=len(rl))
68 68
69 69 heads = list(heads)
70 70 heads.sort(reverse=True)
71 71
72 72 ui.status(_('sorting revs\n'))
73 73 return postorder(heads, parents)
74 74
75 75 def toposort_postorderreverse(ui, rl):
76 76 # reverse-postorder of the reverse directed graph
77 77
78 78 children = {}
79 79 roots = set()
80 80 ui.status(_('reading revs\n'))
81 81 try:
82 82 for rev in rl:
83 83 ui.progress(_('reading'), rev, total=len(rl))
84 84 (p1, p2) = rl.parentrevs(rev)
85 85 if p1 == p2 == node.nullrev:
86 86 roots.add(rev)
87 87 children[rev] = []
88 88 if p1 != node.nullrev:
89 89 children[p1].append(rev)
90 90 if p2 != node.nullrev:
91 91 children[p2].append(rev)
92 92 finally:
93 93 ui.progress(_('reading'), None, total=len(rl))
94 94
95 95 root = list(roots)
96 96 roots.sort()
97 97
98 98 ui.status(_('sorting revs\n'))
99 99 result = postorder(roots, children)
100 100 result.reverse()
101 101 return result
102 102
103 103 def writerevs(ui, r1, r2, order, tr):
104 104
105 105 ui.status(_('writing revs\n'))
106 106
107 107 count = [0]
108 108 def progress(*args):
109 109 ui.progress(_('writing'), count[0], total=len(order))
110 110 count[0] += 1
111 111
112 112 order = [r1.node(r) for r in order]
113 113
114 114 # this is a bit ugly, but it works
115 115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 116 unlookup = lambda x: int(x, 10)
117 117
118 118 try:
119 119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 120 chunkiter = changegroup.chunkiter(group)
121 121 r2.addgroup(chunkiter, unlookup, tr)
122 122 finally:
123 123 ui.progress(_('writing'), None, len(order))
124 124
125 125 def report(ui, r1, r2):
126 126 def getsize(r):
127 127 s = 0
128 128 for fn in (r.indexfile, r.datafile):
129 129 try:
130 130 s += os.stat(fn).st_size
131 131 except OSError, inst:
132 132 if inst.errno != errno.ENOENT:
133 133 raise
134 134 return s
135 135
136 136 oldsize = float(getsize(r1))
137 137 newsize = float(getsize(r2))
138 138
139 139 # argh: have to pass an int to %d, because a float >= 2^32
140 140 # blows up under Python 2.5 or earlier
141 141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
142 142 % (int(oldsize), oldsize / 1024 / 1024))
143 143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
144 144 % (int(newsize), newsize / 1024 / 1024))
145 145
146 146 shrink_percent = (oldsize - newsize) / oldsize * 100
147 147 shrink_factor = oldsize / newsize
148 148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
149 149 % (shrink_percent, shrink_factor))
150 150
151 151 def shrink(ui, repo, **opts):
152 152 """shrink a revlog by reordering revisions
153 153
154 154 Rewrites all the entries in some revlog of the current repository
155 155 (by default, the manifest log) to save space.
156 156
157 157 Different sort algorithms have different performance
158 158 characteristics. Use ``--sort`` to select a sort algorithm so you
159 159 can determine which works best for your data.
160 160 """
161 161
162 162 if not repo.local():
163 163 raise util.Abort(_('not a local repository: %s') % repo.root)
164 164
165 165 fn = opts.get('revlog')
166 166 if not fn:
167 167 indexfn = repo.sjoin('00manifest.i')
168 168 else:
169 169 if not fn.endswith('.i'):
170 170 raise util.Abort(_('--revlog option must specify the revlog index '
171 171 'file (*.i), not %s') % opts.get('revlog'))
172 172
173 173 indexfn = os.path.realpath(fn)
174 174 store = repo.sjoin('')
175 175 if not indexfn.startswith(store):
176 176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
177 177 'not %s') % (store, indexfn))
178 178
179 179 sortname = opts['sort']
180 180 try:
181 181 toposort = globals()['toposort_' + sortname]
182 182 except KeyError:
183 183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
184 184
185 185 if not os.path.exists(indexfn):
186 186 raise util.Abort(_('no such file: %s') % indexfn)
187 187 if '00changelog' in indexfn:
188 188 raise util.Abort(_('shrinking the changelog '
189 189 'will corrupt your repository'))
190 190
191 191 ui.write(_('shrinking %s\n') % indexfn)
192 192 prefix = os.path.basename(indexfn)[:-1]
193 193 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
194 194 prefix=prefix,
195 195 suffix='.i')
196 196 os.close(tmpfd)
197 197
198 198 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
199 199 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
200 200
201 201 datafn, tmpdatafn = r1.datafile, r2.datafile
202 202
203 203 oldindexfn = indexfn + '.old'
204 204 olddatafn = datafn + '.old'
205 205 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
206 206 raise util.Abort(_('one or both of\n'
207 207 ' %s\n'
208 208 ' %s\n'
209 209 'exists from a previous run; please clean up '
210 210 'before running again') % (oldindexfn, olddatafn))
211 211
212 212 # Don't use repo.transaction(), because then things get hairy with
213 213 # paths: some need to be relative to .hg, and some need to be
214 214 # absolute. Doing it this way keeps things simple: everything is an
215 215 # absolute path.
216 216 lock = repo.lock(wait=False)
217 217 tr = transaction.transaction(ui.warn,
218 218 open,
219 219 repo.sjoin('journal'))
220 220
221 221 def ignoremissing(func):
222 222 def f(*args, **kw):
223 223 try:
224 224 return func(*args, **kw)
225 225 except OSError, inst:
226 226 if inst.errno != errno.ENOENT:
227 227 raise
228 228 return f
229 229
230 230 try:
231 231 try:
232 232 order = toposort(ui, r1)
233 233
234 234 suboptimal = 0
235 235 for i in xrange(1, len(order)):
236 236 parents = [p for p in r1.parentrevs(order[i])
237 237 if p != node.nullrev]
238 if parents and order[i-1] not in parents:
238 if parents and order[i - 1] not in parents:
239 239 suboptimal += 1
240 240 ui.note(_('%d suboptimal nodes\n') % suboptimal)
241 241
242 242 writerevs(ui, r1, r2, order, tr)
243 243 report(ui, r1, r2)
244 244 tr.close()
245 245 except:
246 246 # Abort transaction first, so we truncate the files before
247 247 # deleting them.
248 248 tr.abort()
249 249 for fn in (tmpindexfn, tmpdatafn):
250 250 ignoremissing(os.unlink)(fn)
251 251 raise
252 252 if not opts.get('dry_run'):
253 253 # racy, both files cannot be renamed atomically
254 254 # copy files
255 255 util.os_link(indexfn, oldindexfn)
256 256 ignoremissing(util.os_link)(datafn, olddatafn)
257 257 # rename
258 258 util.rename(tmpindexfn, indexfn)
259 259 try:
260 260 util.rename(tmpdatafn, datafn)
261 261 except OSError, inst:
262 262 if inst.errno != errno.ENOENT:
263 263 raise
264 264 ignoremissing(os.unlink)(datafn)
265 265 else:
266 266 for fn in (tmpindexfn, tmpdatafn):
267 267 ignoremissing(os.unlink)(fn)
268 268 finally:
269 269 lock.release()
270 270
271 271 if not opts.get('dry_run'):
272 272 ui.write(_('note: old revlog saved in:\n'
273 273 ' %s\n'
274 274 ' %s\n'
275 275 '(You can delete those files when you are satisfied that your\n'
276 276 'repository is still sane. '
277 277 'Running \'hg verify\' is strongly recommended.)\n')
278 278 % (oldindexfn, olddatafn))
279 279
280 280 cmdtable = {
281 281 'shrink': (shrink,
282 282 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
283 283 ('n', 'dry-run', None, _('do not shrink, simulate only')),
284 284 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
285 285 ],
286 286 _('hg shrink [--revlog PATH]'))
287 287 }
288 288
289 289 if __name__ == "__main__":
290 290 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now