##// END OF EJS Templates
shrink: use progress API
Benoit Boissinot -
r10440:b39b32c3 default
parent child Browse files
Show More
@@ -1,222 +1,219
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import sys, os, tempfile
22 import sys, os, tempfile
23 import optparse
23 import optparse
24 from mercurial import ui as ui_, hg, revlog, transaction, node, util
24 from mercurial import ui as ui_, hg, revlog, transaction, node, util
25 from mercurial import changegroup
25 from mercurial import changegroup
26
26
27 def toposort(ui, rl):
27 def toposort(ui, rl):
28
28
29 children = {}
29 children = {}
30 root = []
30 root = []
31 # build children and roots
31 # build children and roots
32 ui.write('reading %d revs ' % len(rl))
32 ui.status('reading revs\n')
33 try:
33 try:
34 for i in rl:
34 for i in rl:
35 ui.progress('reading', i, total=len(rl))
35 children[i] = []
36 children[i] = []
36 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
37 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
37 # in case of duplicate parents
38 # in case of duplicate parents
38 if len(parents) == 2 and parents[0] == parents[1]:
39 if len(parents) == 2 and parents[0] == parents[1]:
39 del parents[1]
40 del parents[1]
40 for p in parents:
41 for p in parents:
41 assert p in children
42 assert p in children
42 children[p].append(i)
43 children[p].append(i)
43
44
44 if len(parents) == 0:
45 if len(parents) == 0:
45 root.append(i)
46 root.append(i)
46
47 if i % 1000 == 0:
48 ui.write('.')
49 finally:
47 finally:
50 ui.write('\n')
48 ui.progress('reading', None, total=len(rl))
51
49
52 # XXX this is a reimplementation of the 'branchsort' topo sort
50 # XXX this is a reimplementation of the 'branchsort' topo sort
53 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
54 # the algorithm
52 # the algorithm
55 ui.write('sorting ...')
53 ui.status('sorting revs\n')
56 visit = root
54 visit = root
57 ret = []
55 ret = []
58 while visit:
56 while visit:
59 i = visit.pop(0)
57 i = visit.pop(0)
60 ret.append(i)
58 ret.append(i)
61 if i not in children:
59 if i not in children:
62 # This only happens if some node's p1 == p2, which can
60 # This only happens if some node's p1 == p2, which can
63 # happen in the manifest in certain circumstances.
61 # happen in the manifest in certain circumstances.
64 continue
62 continue
65 next = []
63 next = []
66 for c in children.pop(i):
64 for c in children.pop(i):
67 parents_unseen = [p for p in rl.parentrevs(c)
65 parents_unseen = [p for p in rl.parentrevs(c)
68 if p != node.nullrev and p in children]
66 if p != node.nullrev and p in children]
69 if len(parents_unseen) == 0:
67 if len(parents_unseen) == 0:
70 next.append(c)
68 next.append(c)
71 visit = next + visit
69 visit = next + visit
72 ui.write('\n')
73 return ret
70 return ret
74
71
75 def writerevs(ui, r1, r2, order, tr):
72 def writerevs(ui, r1, r2, order, tr):
76
73
77 ui.write('writing %d revs ' % len(order))
74 ui.status('writing revs\n')
75
78 count = [0]
76 count = [0]
79 def progress(*args):
77 def progress(*args):
80 if count[0] % 1000 == 0:
78 ui.progress('writing', count[0], total=len(order))
81 ui.write('.')
82 count[0] += 1
79 count[0] += 1
83
80
84 order = [r1.node(r) for r in order]
81 order = [r1.node(r) for r in order]
85
82
86 # this is a bit ugly, but it works
83 # this is a bit ugly, but it works
87 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
84 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
88 unlookup = lambda x: int(x, 10)
85 unlookup = lambda x: int(x, 10)
89
86
90 try:
87 try:
91 group = util.chunkbuffer(r1.group(order, lookup, progress))
88 group = util.chunkbuffer(r1.group(order, lookup, progress))
92 chunkiter = changegroup.chunkiter(group)
89 chunkiter = changegroup.chunkiter(group)
93 r2.addgroup(chunkiter, unlookup, tr)
90 r2.addgroup(chunkiter, unlookup, tr)
94 finally:
91 finally:
95 ui.write('\n')
92 ui.progress('writing', None, len(order))
96
93
97 def report(ui, olddatafn, newdatafn):
94 def report(ui, olddatafn, newdatafn):
98 oldsize = float(os.stat(olddatafn).st_size)
95 oldsize = float(os.stat(olddatafn).st_size)
99 newsize = float(os.stat(newdatafn).st_size)
96 newsize = float(os.stat(newdatafn).st_size)
100
97
101 # argh: have to pass an int to %d, because a float >= 2^32
98 # argh: have to pass an int to %d, because a float >= 2^32
102 # blows up under Python 2.5 or earlier
99 # blows up under Python 2.5 or earlier
103 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
100 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
104 % (int(oldsize), oldsize / 1024 / 1024))
101 % (int(oldsize), oldsize / 1024 / 1024))
105 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
102 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
106 % (int(newsize), newsize / 1024 / 1024))
103 % (int(newsize), newsize / 1024 / 1024))
107
104
108 shrink_percent = (oldsize - newsize) / oldsize * 100
105 shrink_percent = (oldsize - newsize) / oldsize * 100
109 shrink_factor = oldsize / newsize
106 shrink_factor = oldsize / newsize
110 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
107 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
111
108
112 def shrink(ui, repo, **opts):
109 def shrink(ui, repo, **opts):
113 """
110 """
114 Shrink revlog by re-ordering revisions. Will operate on manifest for
111 Shrink revlog by re-ordering revisions. Will operate on manifest for
115 the given repository if no other revlog is specified."""
112 the given repository if no other revlog is specified."""
116
113
117 # Unbuffer stdout for nice progress output.
114 # Unbuffer stdout for nice progress output.
118 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
115 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
119
116
120 if not repo.local():
117 if not repo.local():
121 raise util.Abort('not a local repository: %s' % repo.root)
118 raise util.Abort('not a local repository: %s' % repo.root)
122
119
123 fn = opts.get('revlog')
120 fn = opts.get('revlog')
124 if not fn:
121 if not fn:
125 indexfn = repo.sjoin('00manifest.i')
122 indexfn = repo.sjoin('00manifest.i')
126 else:
123 else:
127 if not fn.endswith('.i'):
124 if not fn.endswith('.i'):
128 raise util.Abort('--revlog option must specify the revlog index '
125 raise util.Abort('--revlog option must specify the revlog index '
129 'file (*.i), not %s' % opts.get('revlog'))
126 'file (*.i), not %s' % opts.get('revlog'))
130
127
131 indexfn = os.path.realpath(fn)
128 indexfn = os.path.realpath(fn)
132 store = repo.sjoin('')
129 store = repo.sjoin('')
133 if not indexfn.startswith(store):
130 if not indexfn.startswith(store):
134 raise util.Abort('--revlog option must specify a revlog in %s, '
131 raise util.Abort('--revlog option must specify a revlog in %s, '
135 'not %s' % (store, indexfn))
132 'not %s' % (store, indexfn))
136
133
137 datafn = indexfn[:-2] + '.d'
134 datafn = indexfn[:-2] + '.d'
138 if not os.path.exists(indexfn):
135 if not os.path.exists(indexfn):
139 raise util.Abort('no such file: %s' % indexfn)
136 raise util.Abort('no such file: %s' % indexfn)
140 if '00changelog' in indexfn:
137 if '00changelog' in indexfn:
141 raise util.Abort('shrinking the changelog will corrupt your repository')
138 raise util.Abort('shrinking the changelog will corrupt your repository')
142 if not os.path.exists(datafn):
139 if not os.path.exists(datafn):
143 # This is just a lazy shortcut because I can't be bothered to
140 # This is just a lazy shortcut because I can't be bothered to
144 # handle all the special cases that entail from no .d file.
141 # handle all the special cases that entail from no .d file.
145 raise util.Abort('%s does not exist: revlog not big enough '
142 raise util.Abort('%s does not exist: revlog not big enough '
146 'to be worth shrinking' % datafn)
143 'to be worth shrinking' % datafn)
147
144
148 oldindexfn = indexfn + '.old'
145 oldindexfn = indexfn + '.old'
149 olddatafn = datafn + '.old'
146 olddatafn = datafn + '.old'
150 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
147 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
151 raise util.Abort('one or both of\n'
148 raise util.Abort('one or both of\n'
152 ' %s\n'
149 ' %s\n'
153 ' %s\n'
150 ' %s\n'
154 'exists from a previous run; please clean up before '
151 'exists from a previous run; please clean up before '
155 'running again' % (oldindexfn, olddatafn))
152 'running again' % (oldindexfn, olddatafn))
156
153
157 ui.write('shrinking %s\n' % indexfn)
154 ui.write('shrinking %s\n' % indexfn)
158 prefix = os.path.basename(indexfn)[:-1]
155 prefix = os.path.basename(indexfn)[:-1]
159 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
156 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
160 prefix=prefix,
157 prefix=prefix,
161 suffix='.i')
158 suffix='.i')
162 tmpdatafn = tmpindexfn[:-2] + '.d'
159 tmpdatafn = tmpindexfn[:-2] + '.d'
163 os.close(tmpfd)
160 os.close(tmpfd)
164
161
165 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
162 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
166 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
163 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
167
164
168 # Don't use repo.transaction(), because then things get hairy with
165 # Don't use repo.transaction(), because then things get hairy with
169 # paths: some need to be relative to .hg, and some need to be
166 # paths: some need to be relative to .hg, and some need to be
170 # absolute. Doing it this way keeps things simple: everything is an
167 # absolute. Doing it this way keeps things simple: everything is an
171 # absolute path.
168 # absolute path.
172 lock = repo.lock(wait=False)
169 lock = repo.lock(wait=False)
173 tr = transaction.transaction(ui.warn,
170 tr = transaction.transaction(ui.warn,
174 open,
171 open,
175 repo.sjoin('journal'))
172 repo.sjoin('journal'))
176
173
177 try:
174 try:
178 try:
175 try:
179 order = toposort(ui, r1)
176 order = toposort(ui, r1)
180 writerevs(ui, r1, r2, order, tr)
177 writerevs(ui, r1, r2, order, tr)
181 report(ui, datafn, tmpdatafn)
178 report(ui, datafn, tmpdatafn)
182 tr.close()
179 tr.close()
183 except:
180 except:
184 # Abort transaction first, so we truncate the files before
181 # Abort transaction first, so we truncate the files before
185 # deleting them.
182 # deleting them.
186 tr.abort()
183 tr.abort()
187 if os.path.exists(tmpindexfn):
184 if os.path.exists(tmpindexfn):
188 os.unlink(tmpindexfn)
185 os.unlink(tmpindexfn)
189 if os.path.exists(tmpdatafn):
186 if os.path.exists(tmpdatafn):
190 os.unlink(tmpdatafn)
187 os.unlink(tmpdatafn)
191 raise
188 raise
192 if not opts.get('dry_run'):
189 if not opts.get('dry_run'):
193 # Racy since both files cannot be renamed atomically
190 # Racy since both files cannot be renamed atomically
194 util.os_link(indexfn, oldindexfn)
191 util.os_link(indexfn, oldindexfn)
195 util.os_link(datafn, olddatafn)
192 util.os_link(datafn, olddatafn)
196 util.rename(tmpindexfn, indexfn)
193 util.rename(tmpindexfn, indexfn)
197 util.rename(tmpdatafn, datafn)
194 util.rename(tmpdatafn, datafn)
198 else:
195 else:
199 os.unlink(tmpindexfn)
196 os.unlink(tmpindexfn)
200 os.unlink(tmpdatafn)
197 os.unlink(tmpdatafn)
201 finally:
198 finally:
202 lock.release()
199 lock.release()
203
200
204 if not opts.get('dry_run'):
201 if not opts.get('dry_run'):
205 ui.write('note: old revlog saved in:\n'
202 ui.write('note: old revlog saved in:\n'
206 ' %s\n'
203 ' %s\n'
207 ' %s\n'
204 ' %s\n'
208 '(You can delete those files when you are satisfied that your\n'
205 '(You can delete those files when you are satisfied that your\n'
209 'repository is still sane. '
206 'repository is still sane. '
210 'Running \'hg verify\' is strongly recommended.)\n'
207 'Running \'hg verify\' is strongly recommended.)\n'
211 % (oldindexfn, olddatafn))
208 % (oldindexfn, olddatafn))
212
209
213 cmdtable = {
210 cmdtable = {
214 'shrink': (shrink,
211 'shrink': (shrink,
215 [('', 'revlog', '', 'index (.i) file of the revlog to shrink'),
212 [('', 'revlog', '', 'index (.i) file of the revlog to shrink'),
216 ('n', 'dry-run', None, 'do not shrink, simulate only'),
213 ('n', 'dry-run', None, 'do not shrink, simulate only'),
217 ],
214 ],
218 'hg shrink [--revlog PATH]')
215 'hg shrink [--revlog PATH]')
219 }
216 }
220
217
221 if __name__ == "__main__":
218 if __name__ == "__main__":
222 print "shrink-revlog.py is now an extension (see hg help extensions)"
219 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now