##// END OF EJS Templates
shrink-revlog: preserve mode of the shrunken index and data file....
Greg Ward -
r11267:d3ebb1a0 stable
parent child Browse files
Show More
@@ -1,237 +1,242 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 reorder a revlog (the manifest by default) to save space
5 5
6 6 Specifically, this topologically sorts the revisions in the revlog so that
7 7 revisions on the same branch are adjacent as much as possible. This is a
8 8 workaround for the fact that Mercurial computes deltas relative to the
9 9 previous revision rather than relative to a parent revision.
10 10
11 11 This is *not* safe to run on a changelog.
12 12 """
13 13
14 14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 16 # renamed by Greg Ward <greg at gerg.ca>.
17 17
18 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 19 # e.g. by comparing "before" and "after" states of random changesets
20 20 # (maybe: export before, shrink, export after, diff).
21 21
22 22 import os, tempfile, errno
23 23 from mercurial import revlog, transaction, node, util
24 24 from mercurial import changegroup
25 25 from mercurial.i18n import _
26 26
27 27 def toposort(ui, rl):
28 28
29 29 children = {}
30 30 root = []
31 31 # build children and roots
32 32 ui.status(_('reading revs\n'))
33 33 try:
34 34 for i in rl:
35 35 ui.progress(_('reading'), i, total=len(rl))
36 36 children[i] = []
37 37 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
38 38 # in case of duplicate parents
39 39 if len(parents) == 2 and parents[0] == parents[1]:
40 40 del parents[1]
41 41 for p in parents:
42 42 assert p in children
43 43 children[p].append(i)
44 44
45 45 if len(parents) == 0:
46 46 root.append(i)
47 47 finally:
48 48 ui.progress(_('reading'), None, total=len(rl))
49 49
50 50 # XXX this is a reimplementation of the 'branchsort' topo sort
51 51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
52 52 # the algorithm
53 53 ui.status(_('sorting revs\n'))
54 54 visit = root
55 55 ret = []
56 56 while visit:
57 57 i = visit.pop(0)
58 58 ret.append(i)
59 59 if i not in children:
60 60 # This only happens if some node's p1 == p2, which can
61 61 # happen in the manifest in certain circumstances.
62 62 continue
63 63 next = []
64 64 for c in children.pop(i):
65 65 parents_unseen = [p for p in rl.parentrevs(c)
66 66 if p != node.nullrev and p in children]
67 67 if len(parents_unseen) == 0:
68 68 next.append(c)
69 69 visit = next + visit
70 70 return ret
71 71
72 72 def writerevs(ui, r1, r2, order, tr):
73 73
74 74 ui.status(_('writing revs\n'))
75 75
76 76 count = [0]
77 77 def progress(*args):
78 78 ui.progress(_('writing'), count[0], total=len(order))
79 79 count[0] += 1
80 80
81 81 order = [r1.node(r) for r in order]
82 82
83 83 # this is a bit ugly, but it works
84 84 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
85 85 unlookup = lambda x: int(x, 10)
86 86
87 87 try:
88 88 group = util.chunkbuffer(r1.group(order, lookup, progress))
89 89 chunkiter = changegroup.chunkiter(group)
90 90 r2.addgroup(chunkiter, unlookup, tr)
91 91 finally:
92 92 ui.progress(_('writing'), None, len(order))
93 93
94 94 def report(ui, r1, r2):
95 95 def getsize(r):
96 96 s = 0
97 97 for fn in (r.indexfile, r.datafile):
98 98 try:
99 99 s += os.stat(fn).st_size
100 100 except OSError, inst:
101 101 if inst.errno != errno.ENOENT:
102 102 raise
103 103 return s
104 104
105 105 oldsize = float(getsize(r1))
106 106 newsize = float(getsize(r2))
107 107
108 108 # argh: have to pass an int to %d, because a float >= 2^32
109 109 # blows up under Python 2.5 or earlier
110 110 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
111 111 % (int(oldsize), oldsize / 1024 / 1024))
112 112 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
113 113 % (int(newsize), newsize / 1024 / 1024))
114 114
115 115 shrink_percent = (oldsize - newsize) / oldsize * 100
116 116 shrink_factor = oldsize / newsize
117 117 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
118 118 % (shrink_percent, shrink_factor))
119 119
120 120 def shrink(ui, repo, **opts):
121 121 """
122 122 Shrink revlog by re-ordering revisions. Will operate on manifest for
123 123 the given repository if no other revlog is specified."""
124 124
125 125 if not repo.local():
126 126 raise util.Abort(_('not a local repository: %s') % repo.root)
127 127
128 128 fn = opts.get('revlog')
129 129 if not fn:
130 130 indexfn = repo.sjoin('00manifest.i')
131 131 else:
132 132 if not fn.endswith('.i'):
133 133 raise util.Abort(_('--revlog option must specify the revlog index '
134 134 'file (*.i), not %s') % opts.get('revlog'))
135 135
136 136 indexfn = os.path.realpath(fn)
137 137 store = repo.sjoin('')
138 138 if not indexfn.startswith(store):
139 139 raise util.Abort(_('--revlog option must specify a revlog in %s, '
140 140 'not %s') % (store, indexfn))
141 141
142 142 if not os.path.exists(indexfn):
143 143 raise util.Abort(_('no such file: %s') % indexfn)
144 144 if '00changelog' in indexfn:
145 145 raise util.Abort(_('shrinking the changelog '
146 146 'will corrupt your repository'))
147 147
148 148 ui.write(_('shrinking %s\n') % indexfn)
149 149 prefix = os.path.basename(indexfn)[:-1]
150 150 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
151 151 prefix=prefix,
152 152 suffix='.i')
153 153 os.close(tmpfd)
154 154
155 155 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
156 156 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
157 157
158 158 datafn, tmpdatafn = r1.datafile, r2.datafile
159 159
160 160 oldindexfn = indexfn + '.old'
161 161 olddatafn = datafn + '.old'
162 162 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
163 163 raise util.Abort(_('one or both of\n'
164 164 ' %s\n'
165 165 ' %s\n'
166 166 'exists from a previous run; please clean up '
167 167 'before running again') % (oldindexfn, olddatafn))
168 168
169 169 # Don't use repo.transaction(), because then things get hairy with
170 170 # paths: some need to be relative to .hg, and some need to be
171 171 # absolute. Doing it this way keeps things simple: everything is an
172 172 # absolute path.
173 173 lock = repo.lock(wait=False)
174 174 tr = transaction.transaction(ui.warn,
175 175 open,
176 176 repo.sjoin('journal'))
177 177
178 178 def ignoremissing(func):
179 179 def f(*args, **kw):
180 180 try:
181 181 return func(*args, **kw)
182 182 except OSError, inst:
183 183 if inst.errno != errno.ENOENT:
184 184 raise
185 185 return f
186 186
187 187 try:
188 188 try:
189 189 order = toposort(ui, r1)
190 190 writerevs(ui, r1, r2, order, tr)
191 191 report(ui, r1, r2)
192 192 tr.close()
193 193 except:
194 194 # Abort transaction first, so we truncate the files before
195 195 # deleting them.
196 196 tr.abort()
197 197 for fn in (tmpindexfn, tmpdatafn):
198 198 ignoremissing(os.unlink)(fn)
199 199 raise
200 200 if not opts.get('dry_run'):
201 201 # racy, both files cannot be renamed atomically
202 202 # copy files
203 203 util.os_link(indexfn, oldindexfn)
204 204 ignoremissing(util.os_link)(datafn, olddatafn)
205
206 # mkstemp() creates files only readable by the owner
207 os.chmod(tmpindexfn, os.stat(indexfn).st_mode)
208
205 209 # rename
206 210 util.rename(tmpindexfn, indexfn)
207 211 try:
212 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
208 213 util.rename(tmpdatafn, datafn)
209 214 except OSError, inst:
210 215 if inst.errno != errno.ENOENT:
211 216 raise
212 217 ignoremissing(os.unlink)(datafn)
213 218 else:
214 219 for fn in (tmpindexfn, tmpdatafn):
215 220 ignoremissing(os.unlink)(fn)
216 221 finally:
217 222 lock.release()
218 223
219 224 if not opts.get('dry_run'):
220 225 ui.write(_('note: old revlog saved in:\n'
221 226 ' %s\n'
222 227 ' %s\n'
223 228 '(You can delete those files when you are satisfied that your\n'
224 229 'repository is still sane. '
225 230 'Running \'hg verify\' is strongly recommended.)\n')
226 231 % (oldindexfn, olddatafn))
227 232
228 233 cmdtable = {
229 234 'shrink': (shrink,
230 235 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
231 236 ('n', 'dry-run', None, _('do not shrink, simulate only')),
232 237 ],
233 238 _('hg shrink [--revlog PATH]'))
234 239 }
235 240
236 241 if __name__ == "__main__":
237 242 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now