##// END OF EJS Templates
shrink-repo: wrong variable name
Pradeepkumar Gayam -
r11298:3e46d76e default
parent child Browse files
Show More
@@ -1,289 +1,289 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 reorder a revlog (the manifest by default) to save space
5 5
6 6 Specifically, this topologically sorts the revisions in the revlog so that
7 7 revisions on the same branch are adjacent as much as possible. This is a
8 8 workaround for the fact that Mercurial computes deltas relative to the
9 9 previous revision rather than relative to a parent revision.
10 10
11 11 This is *not* safe to run on a changelog.
12 12 """
13 13
14 14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 16 # renamed by Greg Ward <greg at gerg.ca>.
17 17
18 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 19 # e.g. by comparing "before" and "after" states of random changesets
20 20 # (maybe: export before, shrink, export after, diff).
21 21
22 22 import os, tempfile, errno
23 23 from mercurial import revlog, transaction, node, util
24 24 from mercurial import changegroup
25 25 from mercurial.i18n import _
26 26
27 27
28 28 def postorder(start, edges):
29 29 result = []
30 30 visit = list(start)
31 31 finished = set()
32 32
33 33 while visit:
34 34 cur = visit[-1]
35 35 for p in edges[cur]:
36 36 if p not in finished:
37 37 visit.append(p)
38 38 break
39 39 else:
40 40 result.append(cur)
41 41 finished.add(cur)
42 42 visit.pop()
43 43
44 44 return result
45 45
46 46 def toposort_reversepostorder(ui, rl):
47 47 # postorder of the reverse directed graph
48 48
49 49 # map rev to list of parent revs (p2 first)
50 50 parents = {}
51 51 heads = set()
52 52 ui.status(_('reading revs\n'))
53 53 try:
54 54 for rev in rl:
55 55 ui.progress(_('reading'), rev, total=len(rl))
56 56 (p1, p2) = rl.parentrevs(rev)
57 57 if p1 == p2 == node.nullrev:
58 58 parents[rev] = () # root node
59 59 elif p1 == p2 or p2 == node.nullrev:
60 60 parents[rev] = (p1,) # normal node
61 61 else:
62 62 parents[rev] = (p2, p1) # merge node
63 63 heads.add(rev)
64 64 for p in parents[rev]:
65 65 heads.discard(p)
66 66 finally:
67 67 ui.progress(_('reading'), None)
68 68
69 69 heads = list(heads)
70 70 heads.sort(reverse=True)
71 71
72 72 ui.status(_('sorting revs\n'))
73 73 return postorder(heads, parents)
74 74
75 75 def toposort_postorderreverse(ui, rl):
76 76 # reverse-postorder of the reverse directed graph
77 77
78 78 children = {}
79 79 roots = set()
80 80 ui.status(_('reading revs\n'))
81 81 try:
82 82 for rev in rl:
83 83 ui.progress(_('reading'), rev, total=len(rl))
84 84 (p1, p2) = rl.parentrevs(rev)
85 85 if p1 == p2 == node.nullrev:
86 86 roots.add(rev)
87 87 children[rev] = []
88 88 if p1 != node.nullrev:
89 89 children[p1].append(rev)
90 90 if p2 != node.nullrev:
91 91 children[p2].append(rev)
92 92 finally:
93 93 ui.progress(_('reading'), None)
94 94
95 root = list(roots)
95 roots = list(roots)
96 96 roots.sort()
97 97
98 98 ui.status(_('sorting revs\n'))
99 99 result = postorder(roots, children)
100 100 result.reverse()
101 101 return result
102 102
103 103 def writerevs(ui, r1, r2, order, tr):
104 104
105 105 ui.status(_('writing revs\n'))
106 106
107 107 count = [0]
108 108 def progress(*args):
109 109 ui.progress(_('writing'), count[0], total=len(order))
110 110 count[0] += 1
111 111
112 112 order = [r1.node(r) for r in order]
113 113
114 114 # this is a bit ugly, but it works
115 115 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
116 116 unlookup = lambda x: int(x, 10)
117 117
118 118 try:
119 119 group = util.chunkbuffer(r1.group(order, lookup, progress))
120 120 chunkiter = changegroup.chunkiter(group)
121 121 r2.addgroup(chunkiter, unlookup, tr)
122 122 finally:
123 123 ui.progress(_('writing'), None)
124 124
125 125 def report(ui, r1, r2):
126 126 def getsize(r):
127 127 s = 0
128 128 for fn in (r.indexfile, r.datafile):
129 129 try:
130 130 s += os.stat(fn).st_size
131 131 except OSError, inst:
132 132 if inst.errno != errno.ENOENT:
133 133 raise
134 134 return s
135 135
136 136 oldsize = float(getsize(r1))
137 137 newsize = float(getsize(r2))
138 138
139 139 # argh: have to pass an int to %d, because a float >= 2^32
140 140 # blows up under Python 2.5 or earlier
141 141 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
142 142 % (int(oldsize), oldsize / 1024 / 1024))
143 143 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
144 144 % (int(newsize), newsize / 1024 / 1024))
145 145
146 146 shrink_percent = (oldsize - newsize) / oldsize * 100
147 147 shrink_factor = oldsize / newsize
148 148 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
149 149 % (shrink_percent, shrink_factor))
150 150
151 151 def shrink(ui, repo, **opts):
152 152 """shrink a revlog by reordering revisions
153 153
154 154 Rewrites all the entries in some revlog of the current repository
155 155 (by default, the manifest log) to save space.
156 156
157 157 Different sort algorithms have different performance
158 158 characteristics. Use ``--sort`` to select a sort algorithm so you
159 159 can determine which works best for your data.
160 160 """
161 161
162 162 if not repo.local():
163 163 raise util.Abort(_('not a local repository: %s') % repo.root)
164 164
165 165 fn = opts.get('revlog')
166 166 if not fn:
167 167 indexfn = repo.sjoin('00manifest.i')
168 168 else:
169 169 if not fn.endswith('.i'):
170 170 raise util.Abort(_('--revlog option must specify the revlog index '
171 171 'file (*.i), not %s') % opts.get('revlog'))
172 172
173 173 indexfn = os.path.realpath(fn)
174 174 store = repo.sjoin('')
175 175 if not indexfn.startswith(store):
176 176 raise util.Abort(_('--revlog option must specify a revlog in %s, '
177 177 'not %s') % (store, indexfn))
178 178
179 179 sortname = opts['sort']
180 180 try:
181 181 toposort = globals()['toposort_' + sortname]
182 182 except KeyError:
183 183 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
184 184
185 185 if not os.path.exists(indexfn):
186 186 raise util.Abort(_('no such file: %s') % indexfn)
187 187 if '00changelog' in indexfn:
188 188 raise util.Abort(_('shrinking the changelog '
189 189 'will corrupt your repository'))
190 190
191 191 ui.write(_('shrinking %s\n') % indexfn)
192 192 prefix = os.path.basename(indexfn)[:-1]
193 193 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
194 194
195 195 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
196 196 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
197 197
198 198 datafn, tmpdatafn = r1.datafile, r2.datafile
199 199
200 200 oldindexfn = indexfn + '.old'
201 201 olddatafn = datafn + '.old'
202 202 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
203 203 raise util.Abort(_('one or both of\n'
204 204 ' %s\n'
205 205 ' %s\n'
206 206 'exists from a previous run; please clean up '
207 207 'before running again') % (oldindexfn, olddatafn))
208 208
209 209 # Don't use repo.transaction(), because then things get hairy with
210 210 # paths: some need to be relative to .hg, and some need to be
211 211 # absolute. Doing it this way keeps things simple: everything is an
212 212 # absolute path.
213 213 lock = repo.lock(wait=False)
214 214 tr = transaction.transaction(ui.warn,
215 215 open,
216 216 repo.sjoin('journal'))
217 217
218 218 def ignoremissing(func):
219 219 def f(*args, **kw):
220 220 try:
221 221 return func(*args, **kw)
222 222 except OSError, inst:
223 223 if inst.errno != errno.ENOENT:
224 224 raise
225 225 return f
226 226
227 227 try:
228 228 try:
229 229 order = toposort(ui, r1)
230 230
231 231 suboptimal = 0
232 232 for i in xrange(1, len(order)):
233 233 parents = [p for p in r1.parentrevs(order[i])
234 234 if p != node.nullrev]
235 235 if parents and order[i - 1] not in parents:
236 236 suboptimal += 1
237 237 ui.note(_('%d suboptimal nodes\n') % suboptimal)
238 238
239 239 writerevs(ui, r1, r2, order, tr)
240 240 report(ui, r1, r2)
241 241 tr.close()
242 242 except:
243 243 # Abort transaction first, so we truncate the files before
244 244 # deleting them.
245 245 tr.abort()
246 246 for fn in (tmpindexfn, tmpdatafn):
247 247 ignoremissing(os.unlink)(fn)
248 248 raise
249 249 if not opts.get('dry_run'):
250 250 # racy, both files cannot be renamed atomically
251 251 # copy files
252 252 util.os_link(indexfn, oldindexfn)
253 253 ignoremissing(util.os_link)(datafn, olddatafn)
254 254
255 255 # rename
256 256 util.rename(tmpindexfn, indexfn)
257 257 try:
258 258 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
259 259 util.rename(tmpdatafn, datafn)
260 260 except OSError, inst:
261 261 if inst.errno != errno.ENOENT:
262 262 raise
263 263 ignoremissing(os.unlink)(datafn)
264 264 else:
265 265 for fn in (tmpindexfn, tmpdatafn):
266 266 ignoremissing(os.unlink)(fn)
267 267 finally:
268 268 lock.release()
269 269
270 270 if not opts.get('dry_run'):
271 271 ui.write(_('note: old revlog saved in:\n'
272 272 ' %s\n'
273 273 ' %s\n'
274 274 '(You can delete those files when you are satisfied that your\n'
275 275 'repository is still sane. '
276 276 'Running \'hg verify\' is strongly recommended.)\n')
277 277 % (oldindexfn, olddatafn))
278 278
279 279 cmdtable = {
280 280 'shrink': (shrink,
281 281 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
282 282 ('n', 'dry-run', None, _('do not shrink, simulate only')),
283 283 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
284 284 ],
285 285 _('hg shrink [--revlog PATH]'))
286 286 }
287 287
288 288 if __name__ == "__main__":
289 289 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now