##// END OF EJS Templates
shrink-revlog: preserve mode of the shrunken index and data file....
Greg Ward -
r11267:d3ebb1a0 stable
parent child Browse files
Show More
@@ -1,237 +1,242 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import os, tempfile, errno
22 import os, tempfile, errno
23 from mercurial import revlog, transaction, node, util
23 from mercurial import revlog, transaction, node, util
24 from mercurial import changegroup
24 from mercurial import changegroup
25 from mercurial.i18n import _
25 from mercurial.i18n import _
26
26
27 def toposort(ui, rl):
27 def toposort(ui, rl):
28
28
29 children = {}
29 children = {}
30 root = []
30 root = []
31 # build children and roots
31 # build children and roots
32 ui.status(_('reading revs\n'))
32 ui.status(_('reading revs\n'))
33 try:
33 try:
34 for i in rl:
34 for i in rl:
35 ui.progress(_('reading'), i, total=len(rl))
35 ui.progress(_('reading'), i, total=len(rl))
36 children[i] = []
36 children[i] = []
37 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
37 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
38 # in case of duplicate parents
38 # in case of duplicate parents
39 if len(parents) == 2 and parents[0] == parents[1]:
39 if len(parents) == 2 and parents[0] == parents[1]:
40 del parents[1]
40 del parents[1]
41 for p in parents:
41 for p in parents:
42 assert p in children
42 assert p in children
43 children[p].append(i)
43 children[p].append(i)
44
44
45 if len(parents) == 0:
45 if len(parents) == 0:
46 root.append(i)
46 root.append(i)
47 finally:
47 finally:
48 ui.progress(_('reading'), None, total=len(rl))
48 ui.progress(_('reading'), None, total=len(rl))
49
49
50 # XXX this is a reimplementation of the 'branchsort' topo sort
50 # XXX this is a reimplementation of the 'branchsort' topo sort
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
52 # the algorithm
52 # the algorithm
53 ui.status(_('sorting revs\n'))
53 ui.status(_('sorting revs\n'))
54 visit = root
54 visit = root
55 ret = []
55 ret = []
56 while visit:
56 while visit:
57 i = visit.pop(0)
57 i = visit.pop(0)
58 ret.append(i)
58 ret.append(i)
59 if i not in children:
59 if i not in children:
60 # This only happens if some node's p1 == p2, which can
60 # This only happens if some node's p1 == p2, which can
61 # happen in the manifest in certain circumstances.
61 # happen in the manifest in certain circumstances.
62 continue
62 continue
63 next = []
63 next = []
64 for c in children.pop(i):
64 for c in children.pop(i):
65 parents_unseen = [p for p in rl.parentrevs(c)
65 parents_unseen = [p for p in rl.parentrevs(c)
66 if p != node.nullrev and p in children]
66 if p != node.nullrev and p in children]
67 if len(parents_unseen) == 0:
67 if len(parents_unseen) == 0:
68 next.append(c)
68 next.append(c)
69 visit = next + visit
69 visit = next + visit
70 return ret
70 return ret
71
71
72 def writerevs(ui, r1, r2, order, tr):
72 def writerevs(ui, r1, r2, order, tr):
73
73
74 ui.status(_('writing revs\n'))
74 ui.status(_('writing revs\n'))
75
75
76 count = [0]
76 count = [0]
77 def progress(*args):
77 def progress(*args):
78 ui.progress(_('writing'), count[0], total=len(order))
78 ui.progress(_('writing'), count[0], total=len(order))
79 count[0] += 1
79 count[0] += 1
80
80
81 order = [r1.node(r) for r in order]
81 order = [r1.node(r) for r in order]
82
82
83 # this is a bit ugly, but it works
83 # this is a bit ugly, but it works
84 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
84 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
85 unlookup = lambda x: int(x, 10)
85 unlookup = lambda x: int(x, 10)
86
86
87 try:
87 try:
88 group = util.chunkbuffer(r1.group(order, lookup, progress))
88 group = util.chunkbuffer(r1.group(order, lookup, progress))
89 chunkiter = changegroup.chunkiter(group)
89 chunkiter = changegroup.chunkiter(group)
90 r2.addgroup(chunkiter, unlookup, tr)
90 r2.addgroup(chunkiter, unlookup, tr)
91 finally:
91 finally:
92 ui.progress(_('writing'), None, len(order))
92 ui.progress(_('writing'), None, len(order))
93
93
94 def report(ui, r1, r2):
94 def report(ui, r1, r2):
95 def getsize(r):
95 def getsize(r):
96 s = 0
96 s = 0
97 for fn in (r.indexfile, r.datafile):
97 for fn in (r.indexfile, r.datafile):
98 try:
98 try:
99 s += os.stat(fn).st_size
99 s += os.stat(fn).st_size
100 except OSError, inst:
100 except OSError, inst:
101 if inst.errno != errno.ENOENT:
101 if inst.errno != errno.ENOENT:
102 raise
102 raise
103 return s
103 return s
104
104
105 oldsize = float(getsize(r1))
105 oldsize = float(getsize(r1))
106 newsize = float(getsize(r2))
106 newsize = float(getsize(r2))
107
107
108 # argh: have to pass an int to %d, because a float >= 2^32
108 # argh: have to pass an int to %d, because a float >= 2^32
109 # blows up under Python 2.5 or earlier
109 # blows up under Python 2.5 or earlier
110 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
110 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
111 % (int(oldsize), oldsize / 1024 / 1024))
111 % (int(oldsize), oldsize / 1024 / 1024))
112 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
112 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
113 % (int(newsize), newsize / 1024 / 1024))
113 % (int(newsize), newsize / 1024 / 1024))
114
114
115 shrink_percent = (oldsize - newsize) / oldsize * 100
115 shrink_percent = (oldsize - newsize) / oldsize * 100
116 shrink_factor = oldsize / newsize
116 shrink_factor = oldsize / newsize
117 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
117 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
118 % (shrink_percent, shrink_factor))
118 % (shrink_percent, shrink_factor))
119
119
120 def shrink(ui, repo, **opts):
120 def shrink(ui, repo, **opts):
121 """
121 """
122 Shrink revlog by re-ordering revisions. Will operate on manifest for
122 Shrink revlog by re-ordering revisions. Will operate on manifest for
123 the given repository if no other revlog is specified."""
123 the given repository if no other revlog is specified."""
124
124
125 if not repo.local():
125 if not repo.local():
126 raise util.Abort(_('not a local repository: %s') % repo.root)
126 raise util.Abort(_('not a local repository: %s') % repo.root)
127
127
128 fn = opts.get('revlog')
128 fn = opts.get('revlog')
129 if not fn:
129 if not fn:
130 indexfn = repo.sjoin('00manifest.i')
130 indexfn = repo.sjoin('00manifest.i')
131 else:
131 else:
132 if not fn.endswith('.i'):
132 if not fn.endswith('.i'):
133 raise util.Abort(_('--revlog option must specify the revlog index '
133 raise util.Abort(_('--revlog option must specify the revlog index '
134 'file (*.i), not %s') % opts.get('revlog'))
134 'file (*.i), not %s') % opts.get('revlog'))
135
135
136 indexfn = os.path.realpath(fn)
136 indexfn = os.path.realpath(fn)
137 store = repo.sjoin('')
137 store = repo.sjoin('')
138 if not indexfn.startswith(store):
138 if not indexfn.startswith(store):
139 raise util.Abort(_('--revlog option must specify a revlog in %s, '
139 raise util.Abort(_('--revlog option must specify a revlog in %s, '
140 'not %s') % (store, indexfn))
140 'not %s') % (store, indexfn))
141
141
142 if not os.path.exists(indexfn):
142 if not os.path.exists(indexfn):
143 raise util.Abort(_('no such file: %s') % indexfn)
143 raise util.Abort(_('no such file: %s') % indexfn)
144 if '00changelog' in indexfn:
144 if '00changelog' in indexfn:
145 raise util.Abort(_('shrinking the changelog '
145 raise util.Abort(_('shrinking the changelog '
146 'will corrupt your repository'))
146 'will corrupt your repository'))
147
147
148 ui.write(_('shrinking %s\n') % indexfn)
148 ui.write(_('shrinking %s\n') % indexfn)
149 prefix = os.path.basename(indexfn)[:-1]
149 prefix = os.path.basename(indexfn)[:-1]
150 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
150 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
151 prefix=prefix,
151 prefix=prefix,
152 suffix='.i')
152 suffix='.i')
153 os.close(tmpfd)
153 os.close(tmpfd)
154
154
155 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
155 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
156 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
156 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
157
157
158 datafn, tmpdatafn = r1.datafile, r2.datafile
158 datafn, tmpdatafn = r1.datafile, r2.datafile
159
159
160 oldindexfn = indexfn + '.old'
160 oldindexfn = indexfn + '.old'
161 olddatafn = datafn + '.old'
161 olddatafn = datafn + '.old'
162 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
162 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
163 raise util.Abort(_('one or both of\n'
163 raise util.Abort(_('one or both of\n'
164 ' %s\n'
164 ' %s\n'
165 ' %s\n'
165 ' %s\n'
166 'exists from a previous run; please clean up '
166 'exists from a previous run; please clean up '
167 'before running again') % (oldindexfn, olddatafn))
167 'before running again') % (oldindexfn, olddatafn))
168
168
169 # Don't use repo.transaction(), because then things get hairy with
169 # Don't use repo.transaction(), because then things get hairy with
170 # paths: some need to be relative to .hg, and some need to be
170 # paths: some need to be relative to .hg, and some need to be
171 # absolute. Doing it this way keeps things simple: everything is an
171 # absolute. Doing it this way keeps things simple: everything is an
172 # absolute path.
172 # absolute path.
173 lock = repo.lock(wait=False)
173 lock = repo.lock(wait=False)
174 tr = transaction.transaction(ui.warn,
174 tr = transaction.transaction(ui.warn,
175 open,
175 open,
176 repo.sjoin('journal'))
176 repo.sjoin('journal'))
177
177
178 def ignoremissing(func):
178 def ignoremissing(func):
179 def f(*args, **kw):
179 def f(*args, **kw):
180 try:
180 try:
181 return func(*args, **kw)
181 return func(*args, **kw)
182 except OSError, inst:
182 except OSError, inst:
183 if inst.errno != errno.ENOENT:
183 if inst.errno != errno.ENOENT:
184 raise
184 raise
185 return f
185 return f
186
186
187 try:
187 try:
188 try:
188 try:
189 order = toposort(ui, r1)
189 order = toposort(ui, r1)
190 writerevs(ui, r1, r2, order, tr)
190 writerevs(ui, r1, r2, order, tr)
191 report(ui, r1, r2)
191 report(ui, r1, r2)
192 tr.close()
192 tr.close()
193 except:
193 except:
194 # Abort transaction first, so we truncate the files before
194 # Abort transaction first, so we truncate the files before
195 # deleting them.
195 # deleting them.
196 tr.abort()
196 tr.abort()
197 for fn in (tmpindexfn, tmpdatafn):
197 for fn in (tmpindexfn, tmpdatafn):
198 ignoremissing(os.unlink)(fn)
198 ignoremissing(os.unlink)(fn)
199 raise
199 raise
200 if not opts.get('dry_run'):
200 if not opts.get('dry_run'):
201 # racy, both files cannot be renamed atomically
201 # racy, both files cannot be renamed atomically
202 # copy files
202 # copy files
203 util.os_link(indexfn, oldindexfn)
203 util.os_link(indexfn, oldindexfn)
204 ignoremissing(util.os_link)(datafn, olddatafn)
204 ignoremissing(util.os_link)(datafn, olddatafn)
205
206 # mkstemp() creates files only readable by the owner
207 os.chmod(tmpindexfn, os.stat(indexfn).st_mode)
208
205 # rename
209 # rename
206 util.rename(tmpindexfn, indexfn)
210 util.rename(tmpindexfn, indexfn)
207 try:
211 try:
212 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
208 util.rename(tmpdatafn, datafn)
213 util.rename(tmpdatafn, datafn)
209 except OSError, inst:
214 except OSError, inst:
210 if inst.errno != errno.ENOENT:
215 if inst.errno != errno.ENOENT:
211 raise
216 raise
212 ignoremissing(os.unlink)(datafn)
217 ignoremissing(os.unlink)(datafn)
213 else:
218 else:
214 for fn in (tmpindexfn, tmpdatafn):
219 for fn in (tmpindexfn, tmpdatafn):
215 ignoremissing(os.unlink)(fn)
220 ignoremissing(os.unlink)(fn)
216 finally:
221 finally:
217 lock.release()
222 lock.release()
218
223
219 if not opts.get('dry_run'):
224 if not opts.get('dry_run'):
220 ui.write(_('note: old revlog saved in:\n'
225 ui.write(_('note: old revlog saved in:\n'
221 ' %s\n'
226 ' %s\n'
222 ' %s\n'
227 ' %s\n'
223 '(You can delete those files when you are satisfied that your\n'
228 '(You can delete those files when you are satisfied that your\n'
224 'repository is still sane. '
229 'repository is still sane. '
225 'Running \'hg verify\' is strongly recommended.)\n')
230 'Running \'hg verify\' is strongly recommended.)\n')
226 % (oldindexfn, olddatafn))
231 % (oldindexfn, olddatafn))
227
232
228 cmdtable = {
233 cmdtable = {
229 'shrink': (shrink,
234 'shrink': (shrink,
230 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
235 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
231 ('n', 'dry-run', None, _('do not shrink, simulate only')),
236 ('n', 'dry-run', None, _('do not shrink, simulate only')),
232 ],
237 ],
233 _('hg shrink [--revlog PATH]'))
238 _('hg shrink [--revlog PATH]'))
234 }
239 }
235
240
236 if __name__ == "__main__":
241 if __name__ == "__main__":
237 print "shrink-revlog.py is now an extension (see hg help extensions)"
242 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now