##// END OF EJS Templates
shrink-revlog: remove \ from docstring
Augie Fackler -
r14028:7e453770 default
parent child Browse files
Show More
@@ -1,288 +1,287 b''
1 """\
2 reorder a revlog (the manifest by default) to save space
1 """reorder a revlog (the manifest by default) to save space
3 2
4 3 Specifically, this topologically sorts the revisions in the revlog so that
5 4 revisions on the same branch are adjacent as much as possible. This is a
6 5 workaround for the fact that Mercurial computes deltas relative to the
7 6 previous revision rather than relative to a parent revision.
8 7
9 8 This is *not* safe to run on a changelog.
10 9 """
11 10
12 11 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 12 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 13 # renamed by Greg Ward <greg at gerg.ca>.
15 14
16 15 # XXX would be nice to have a way to verify the repository after shrinking,
17 16 # e.g. by comparing "before" and "after" states of random changesets
18 17 # (maybe: export before, shrink, export after, diff).
19 18
20 19 import os, tempfile, errno
21 20 from mercurial import revlog, transaction, node, util
22 21 from mercurial import changegroup
23 22 from mercurial.i18n import _
24 23
25 24
26 25 def postorder(start, edges):
27 26 result = []
28 27 visit = list(start)
29 28 finished = set()
30 29
31 30 while visit:
32 31 cur = visit[-1]
33 32 for p in edges[cur]:
34 33 if p not in finished:
35 34 visit.append(p)
36 35 break
37 36 else:
38 37 result.append(cur)
39 38 finished.add(cur)
40 39 visit.pop()
41 40
42 41 return result
43 42
44 43 def toposort_reversepostorder(ui, rl):
45 44 # postorder of the reverse directed graph
46 45
47 46 # map rev to list of parent revs (p2 first)
48 47 parents = {}
49 48 heads = set()
50 49 ui.status(_('reading revs\n'))
51 50 try:
52 51 for rev in rl:
53 52 ui.progress(_('reading'), rev, total=len(rl))
54 53 (p1, p2) = rl.parentrevs(rev)
55 54 if p1 == p2 == node.nullrev:
56 55 parents[rev] = () # root node
57 56 elif p1 == p2 or p2 == node.nullrev:
58 57 parents[rev] = (p1,) # normal node
59 58 else:
60 59 parents[rev] = (p2, p1) # merge node
61 60 heads.add(rev)
62 61 for p in parents[rev]:
63 62 heads.discard(p)
64 63 finally:
65 64 ui.progress(_('reading'), None)
66 65
67 66 heads = list(heads)
68 67 heads.sort(reverse=True)
69 68
70 69 ui.status(_('sorting revs\n'))
71 70 return postorder(heads, parents)
72 71
73 72 def toposort_postorderreverse(ui, rl):
74 73 # reverse-postorder of the reverse directed graph
75 74
76 75 children = {}
77 76 roots = set()
78 77 ui.status(_('reading revs\n'))
79 78 try:
80 79 for rev in rl:
81 80 ui.progress(_('reading'), rev, total=len(rl))
82 81 (p1, p2) = rl.parentrevs(rev)
83 82 if p1 == p2 == node.nullrev:
84 83 roots.add(rev)
85 84 children[rev] = []
86 85 if p1 != node.nullrev:
87 86 children[p1].append(rev)
88 87 if p2 != node.nullrev:
89 88 children[p2].append(rev)
90 89 finally:
91 90 ui.progress(_('reading'), None)
92 91
93 92 roots = list(roots)
94 93 roots.sort()
95 94
96 95 ui.status(_('sorting revs\n'))
97 96 result = postorder(roots, children)
98 97 result.reverse()
99 98 return result
100 99
101 100 def writerevs(ui, r1, r2, order, tr):
102 101
103 102 ui.status(_('writing revs\n'))
104 103
105 104
106 105 order = [r1.node(r) for r in order]
107 106
108 107 # this is a bit ugly, but it works
109 108 count = [0]
110 109 def lookup(x):
111 110 count[0] += 1
112 111 ui.progress(_('writing'), count[0], total=len(order))
113 112 return "%020d" % r1.linkrev(r1.rev(x))
114 113
115 114 unlookup = lambda x: int(x, 10)
116 115
117 116 try:
118 117 group = util.chunkbuffer(r1.group(order, lookup, progress))
119 118 group = changegroup.unbundle10(group, "UN")
120 119 r2.addgroup(group, unlookup, tr)
121 120 finally:
122 121 ui.progress(_('writing'), None)
123 122
124 123 def report(ui, r1, r2):
125 124 def getsize(r):
126 125 s = 0
127 126 for fn in (r.indexfile, r.datafile):
128 127 try:
129 128 s += os.stat(fn).st_size
130 129 except OSError, inst:
131 130 if inst.errno != errno.ENOENT:
132 131 raise
133 132 return s
134 133
135 134 oldsize = float(getsize(r1))
136 135 newsize = float(getsize(r2))
137 136
138 137 # argh: have to pass an int to %d, because a float >= 2^32
139 138 # blows up under Python 2.5 or earlier
140 139 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
141 140 % (int(oldsize), oldsize / 1024 / 1024))
142 141 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
143 142 % (int(newsize), newsize / 1024 / 1024))
144 143
145 144 shrink_percent = (oldsize - newsize) / oldsize * 100
146 145 shrink_factor = oldsize / newsize
147 146 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
148 147 % (shrink_percent, shrink_factor))
149 148
150 149 def shrink(ui, repo, **opts):
151 150 """shrink a revlog by reordering revisions
152 151
153 152 Rewrites all the entries in some revlog of the current repository
154 153 (by default, the manifest log) to save space.
155 154
156 155 Different sort algorithms have different performance
157 156 characteristics. Use ``--sort`` to select a sort algorithm so you
158 157 can determine which works best for your data.
159 158 """
160 159
161 160 if not repo.local():
162 161 raise util.Abort(_('not a local repository: %s') % repo.root)
163 162
164 163 fn = opts.get('revlog')
165 164 if not fn:
166 165 indexfn = repo.sjoin('00manifest.i')
167 166 else:
168 167 if not fn.endswith('.i'):
169 168 raise util.Abort(_('--revlog option must specify the revlog index '
170 169 'file (*.i), not %s') % opts.get('revlog'))
171 170
172 171 indexfn = os.path.realpath(fn)
173 172 store = repo.sjoin('')
174 173 if not indexfn.startswith(store):
175 174 raise util.Abort(_('--revlog option must specify a revlog in %s, '
176 175 'not %s') % (store, indexfn))
177 176
178 177 sortname = opts['sort']
179 178 try:
180 179 toposort = globals()['toposort_' + sortname]
181 180 except KeyError:
182 181 raise util.Abort(_('no such toposort algorithm: %s') % sortname)
183 182
184 183 if not os.path.exists(indexfn):
185 184 raise util.Abort(_('no such file: %s') % indexfn)
186 185 if '00changelog' in indexfn:
187 186 raise util.Abort(_('shrinking the changelog '
188 187 'will corrupt your repository'))
189 188
190 189 ui.write(_('shrinking %s\n') % indexfn)
191 190 prefix = os.path.basename(indexfn)[:-1]
192 191 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
193 192
194 193 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
195 194 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
196 195
197 196 datafn, tmpdatafn = r1.datafile, r2.datafile
198 197
199 198 oldindexfn = indexfn + '.old'
200 199 olddatafn = datafn + '.old'
201 200 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
202 201 raise util.Abort(_('one or both of\n'
203 202 ' %s\n'
204 203 ' %s\n'
205 204 'exists from a previous run; please clean up '
206 205 'before running again') % (oldindexfn, olddatafn))
207 206
208 207 # Don't use repo.transaction(), because then things get hairy with
209 208 # paths: some need to be relative to .hg, and some need to be
210 209 # absolute. Doing it this way keeps things simple: everything is an
211 210 # absolute path.
212 211 lock = repo.lock(wait=False)
213 212 tr = transaction.transaction(ui.warn,
214 213 open,
215 214 repo.sjoin('journal'))
216 215
217 216 def ignoremissing(func):
218 217 def f(*args, **kw):
219 218 try:
220 219 return func(*args, **kw)
221 220 except OSError, inst:
222 221 if inst.errno != errno.ENOENT:
223 222 raise
224 223 return f
225 224
226 225 try:
227 226 try:
228 227 order = toposort(ui, r1)
229 228
230 229 suboptimal = 0
231 230 for i in xrange(1, len(order)):
232 231 parents = [p for p in r1.parentrevs(order[i])
233 232 if p != node.nullrev]
234 233 if parents and order[i - 1] not in parents:
235 234 suboptimal += 1
236 235 ui.note(_('%d suboptimal nodes\n') % suboptimal)
237 236
238 237 writerevs(ui, r1, r2, order, tr)
239 238 report(ui, r1, r2)
240 239 tr.close()
241 240 except:
242 241 # Abort transaction first, so we truncate the files before
243 242 # deleting them.
244 243 tr.abort()
245 244 for fn in (tmpindexfn, tmpdatafn):
246 245 ignoremissing(os.unlink)(fn)
247 246 raise
248 247 if not opts.get('dry_run'):
249 248 # racy, both files cannot be renamed atomically
250 249 # copy files
251 250 util.os_link(indexfn, oldindexfn)
252 251 ignoremissing(util.os_link)(datafn, olddatafn)
253 252
254 253 # rename
255 254 util.rename(tmpindexfn, indexfn)
256 255 try:
257 256 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
258 257 util.rename(tmpdatafn, datafn)
259 258 except OSError, inst:
260 259 if inst.errno != errno.ENOENT:
261 260 raise
262 261 ignoremissing(os.unlink)(datafn)
263 262 else:
264 263 for fn in (tmpindexfn, tmpdatafn):
265 264 ignoremissing(os.unlink)(fn)
266 265 finally:
267 266 lock.release()
268 267
269 268 if not opts.get('dry_run'):
270 269 ui.write(_('note: old revlog saved in:\n'
271 270 ' %s\n'
272 271 ' %s\n'
273 272 '(You can delete those files when you are satisfied that your\n'
274 273 'repository is still sane. '
275 274 'Running \'hg verify\' is strongly recommended.)\n')
276 275 % (oldindexfn, olddatafn))
277 276
278 277 cmdtable = {
279 278 'shrink': (shrink,
280 279 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
281 280 ('n', 'dry-run', None, _('do not shrink, simulate only')),
282 281 ('', 'sort', 'reversepostorder', 'name of sort algorithm to use'),
283 282 ],
284 283 _('hg shrink [--revlog PATH]'))
285 284 }
286 285
287 286 if __name__ == "__main__":
288 287 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now