##// END OF EJS Templates
shrink-revlog: remove unneeded imports and useless code
Benoit Boissinot -
r10509:3e7e789d stable
parent child Browse files
Show More
@@ -1,222 +1,218
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 reorder a revlog (the manifest by default) to save space
4 reorder a revlog (the manifest by default) to save space
5
5
6 Specifically, this topologically sorts the revisions in the revlog so that
6 Specifically, this topologically sorts the revisions in the revlog so that
7 revisions on the same branch are adjacent as much as possible. This is a
7 revisions on the same branch are adjacent as much as possible. This is a
8 workaround for the fact that Mercurial computes deltas relative to the
8 workaround for the fact that Mercurial computes deltas relative to the
9 previous revision rather than relative to a parent revision.
9 previous revision rather than relative to a parent revision.
10
10
11 This is *not* safe to run on a changelog.
11 This is *not* safe to run on a changelog.
12 """
12 """
13
13
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 # renamed by Greg Ward <greg at gerg.ca>.
16 # renamed by Greg Ward <greg at gerg.ca>.
17
17
18 # XXX would be nice to have a way to verify the repository after shrinking,
18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets
19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff).
20 # (maybe: export before, shrink, export after, diff).
21
21
22 import sys, os, tempfile
22 import os, tempfile
23 import optparse
23 from mercurial import revlog, transaction, node, util
24 from mercurial import ui as ui_, hg, revlog, transaction, node, util
25 from mercurial import changegroup
24 from mercurial import changegroup
26 from mercurial.i18n import _
25 from mercurial.i18n import _
27
26
28 def toposort(ui, rl):
27 def toposort(ui, rl):
29
28
30 children = {}
29 children = {}
31 root = []
30 root = []
32 # build children and roots
31 # build children and roots
33 ui.status(_('reading revs\n'))
32 ui.status(_('reading revs\n'))
34 try:
33 try:
35 for i in rl:
34 for i in rl:
36 ui.progress(_('reading'), i, total=len(rl))
35 ui.progress(_('reading'), i, total=len(rl))
37 children[i] = []
36 children[i] = []
38 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
37 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
39 # in case of duplicate parents
38 # in case of duplicate parents
40 if len(parents) == 2 and parents[0] == parents[1]:
39 if len(parents) == 2 and parents[0] == parents[1]:
41 del parents[1]
40 del parents[1]
42 for p in parents:
41 for p in parents:
43 assert p in children
42 assert p in children
44 children[p].append(i)
43 children[p].append(i)
45
44
46 if len(parents) == 0:
45 if len(parents) == 0:
47 root.append(i)
46 root.append(i)
48 finally:
47 finally:
49 ui.progress(_('reading'), None, total=len(rl))
48 ui.progress(_('reading'), None, total=len(rl))
50
49
51 # XXX this is a reimplementation of the 'branchsort' topo sort
50 # XXX this is a reimplementation of the 'branchsort' topo sort
52 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
53 # the algorithm
52 # the algorithm
54 ui.status(_('sorting revs\n'))
53 ui.status(_('sorting revs\n'))
55 visit = root
54 visit = root
56 ret = []
55 ret = []
57 while visit:
56 while visit:
58 i = visit.pop(0)
57 i = visit.pop(0)
59 ret.append(i)
58 ret.append(i)
60 if i not in children:
59 if i not in children:
61 # This only happens if some node's p1 == p2, which can
60 # This only happens if some node's p1 == p2, which can
62 # happen in the manifest in certain circumstances.
61 # happen in the manifest in certain circumstances.
63 continue
62 continue
64 next = []
63 next = []
65 for c in children.pop(i):
64 for c in children.pop(i):
66 parents_unseen = [p for p in rl.parentrevs(c)
65 parents_unseen = [p for p in rl.parentrevs(c)
67 if p != node.nullrev and p in children]
66 if p != node.nullrev and p in children]
68 if len(parents_unseen) == 0:
67 if len(parents_unseen) == 0:
69 next.append(c)
68 next.append(c)
70 visit = next + visit
69 visit = next + visit
71 return ret
70 return ret
72
71
73 def writerevs(ui, r1, r2, order, tr):
72 def writerevs(ui, r1, r2, order, tr):
74
73
75 ui.status(_('writing revs\n'))
74 ui.status(_('writing revs\n'))
76
75
77 count = [0]
76 count = [0]
78 def progress(*args):
77 def progress(*args):
79 ui.progress(_('writing'), count[0], total=len(order))
78 ui.progress(_('writing'), count[0], total=len(order))
80 count[0] += 1
79 count[0] += 1
81
80
82 order = [r1.node(r) for r in order]
81 order = [r1.node(r) for r in order]
83
82
84 # this is a bit ugly, but it works
83 # this is a bit ugly, but it works
85 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
84 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
86 unlookup = lambda x: int(x, 10)
85 unlookup = lambda x: int(x, 10)
87
86
88 try:
87 try:
89 group = util.chunkbuffer(r1.group(order, lookup, progress))
88 group = util.chunkbuffer(r1.group(order, lookup, progress))
90 chunkiter = changegroup.chunkiter(group)
89 chunkiter = changegroup.chunkiter(group)
91 r2.addgroup(chunkiter, unlookup, tr)
90 r2.addgroup(chunkiter, unlookup, tr)
92 finally:
91 finally:
93 ui.progress(_('writing'), None, len(order))
92 ui.progress(_('writing'), None, len(order))
94
93
95 def report(ui, olddatafn, newdatafn):
94 def report(ui, olddatafn, newdatafn):
96 oldsize = float(os.stat(olddatafn).st_size)
95 oldsize = float(os.stat(olddatafn).st_size)
97 newsize = float(os.stat(newdatafn).st_size)
96 newsize = float(os.stat(newdatafn).st_size)
98
97
99 # argh: have to pass an int to %d, because a float >= 2^32
98 # argh: have to pass an int to %d, because a float >= 2^32
100 # blows up under Python 2.5 or earlier
99 # blows up under Python 2.5 or earlier
101 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
100 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
102 % (int(oldsize), oldsize / 1024 / 1024))
101 % (int(oldsize), oldsize / 1024 / 1024))
103 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
102 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
104 % (int(newsize), newsize / 1024 / 1024))
103 % (int(newsize), newsize / 1024 / 1024))
105
104
106 shrink_percent = (oldsize - newsize) / oldsize * 100
105 shrink_percent = (oldsize - newsize) / oldsize * 100
107 shrink_factor = oldsize / newsize
106 shrink_factor = oldsize / newsize
108 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
107 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
109 % (shrink_percent, shrink_factor))
108 % (shrink_percent, shrink_factor))
110
109
111 def shrink(ui, repo, **opts):
110 def shrink(ui, repo, **opts):
112 """
111 """
113 Shrink revlog by re-ordering revisions. Will operate on manifest for
112 Shrink revlog by re-ordering revisions. Will operate on manifest for
114 the given repository if no other revlog is specified."""
113 the given repository if no other revlog is specified."""
115
114
116 # Unbuffer stdout for nice progress output.
117 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
118
119 if not repo.local():
115 if not repo.local():
120 raise util.Abort(_('not a local repository: %s') % repo.root)
116 raise util.Abort(_('not a local repository: %s') % repo.root)
121
117
122 fn = opts.get('revlog')
118 fn = opts.get('revlog')
123 if not fn:
119 if not fn:
124 indexfn = repo.sjoin('00manifest.i')
120 indexfn = repo.sjoin('00manifest.i')
125 else:
121 else:
126 if not fn.endswith('.i'):
122 if not fn.endswith('.i'):
127 raise util.Abort(_('--revlog option must specify the revlog index '
123 raise util.Abort(_('--revlog option must specify the revlog index '
128 'file (*.i), not %s') % opts.get('revlog'))
124 'file (*.i), not %s') % opts.get('revlog'))
129
125
130 indexfn = os.path.realpath(fn)
126 indexfn = os.path.realpath(fn)
131 store = repo.sjoin('')
127 store = repo.sjoin('')
132 if not indexfn.startswith(store):
128 if not indexfn.startswith(store):
133 raise util.Abort(_('--revlog option must specify a revlog in %s, '
129 raise util.Abort(_('--revlog option must specify a revlog in %s, '
134 'not %s') % (store, indexfn))
130 'not %s') % (store, indexfn))
135
131
136 datafn = indexfn[:-2] + '.d'
132 datafn = indexfn[:-2] + '.d'
137 if not os.path.exists(indexfn):
133 if not os.path.exists(indexfn):
138 raise util.Abort(_('no such file: %s') % indexfn)
134 raise util.Abort(_('no such file: %s') % indexfn)
139 if '00changelog' in indexfn:
135 if '00changelog' in indexfn:
140 raise util.Abort(_('shrinking the changelog '
136 raise util.Abort(_('shrinking the changelog '
141 'will corrupt your repository'))
137 'will corrupt your repository'))
142 if not os.path.exists(datafn):
138 if not os.path.exists(datafn):
143 # This is just a lazy shortcut because I can't be bothered to
139 # This is just a lazy shortcut because I can't be bothered to
144 # handle all the special cases that entail from no .d file.
140 # handle all the special cases that entail from no .d file.
145 raise util.Abort(_('%s does not exist: revlog not big enough '
141 raise util.Abort(_('%s does not exist: revlog not big enough '
146 'to be worth shrinking') % datafn)
142 'to be worth shrinking') % datafn)
147
143
148 oldindexfn = indexfn + '.old'
144 oldindexfn = indexfn + '.old'
149 olddatafn = datafn + '.old'
145 olddatafn = datafn + '.old'
150 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
146 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
151 raise util.Abort(_('one or both of\n'
147 raise util.Abort(_('one or both of\n'
152 ' %s\n'
148 ' %s\n'
153 ' %s\n'
149 ' %s\n'
154 'exists from a previous run; please clean up '
150 'exists from a previous run; please clean up '
155 'before running again') % (oldindexfn, olddatafn))
151 'before running again') % (oldindexfn, olddatafn))
156
152
157 ui.write(_('shrinking %s\n') % indexfn)
153 ui.write(_('shrinking %s\n') % indexfn)
158 prefix = os.path.basename(indexfn)[:-1]
154 prefix = os.path.basename(indexfn)[:-1]
159 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
155 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
160 prefix=prefix,
156 prefix=prefix,
161 suffix='.i')
157 suffix='.i')
162 tmpdatafn = tmpindexfn[:-2] + '.d'
158 tmpdatafn = tmpindexfn[:-2] + '.d'
163 os.close(tmpfd)
159 os.close(tmpfd)
164
160
165 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
161 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
166 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
162 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
167
163
168 # Don't use repo.transaction(), because then things get hairy with
164 # Don't use repo.transaction(), because then things get hairy with
169 # paths: some need to be relative to .hg, and some need to be
165 # paths: some need to be relative to .hg, and some need to be
170 # absolute. Doing it this way keeps things simple: everything is an
166 # absolute. Doing it this way keeps things simple: everything is an
171 # absolute path.
167 # absolute path.
172 lock = repo.lock(wait=False)
168 lock = repo.lock(wait=False)
173 tr = transaction.transaction(ui.warn,
169 tr = transaction.transaction(ui.warn,
174 open,
170 open,
175 repo.sjoin('journal'))
171 repo.sjoin('journal'))
176
172
177 try:
173 try:
178 try:
174 try:
179 order = toposort(ui, r1)
175 order = toposort(ui, r1)
180 writerevs(ui, r1, r2, order, tr)
176 writerevs(ui, r1, r2, order, tr)
181 report(ui, datafn, tmpdatafn)
177 report(ui, datafn, tmpdatafn)
182 tr.close()
178 tr.close()
183 except:
179 except:
184 # Abort transaction first, so we truncate the files before
180 # Abort transaction first, so we truncate the files before
185 # deleting them.
181 # deleting them.
186 tr.abort()
182 tr.abort()
187 if os.path.exists(tmpindexfn):
183 if os.path.exists(tmpindexfn):
188 os.unlink(tmpindexfn)
184 os.unlink(tmpindexfn)
189 if os.path.exists(tmpdatafn):
185 if os.path.exists(tmpdatafn):
190 os.unlink(tmpdatafn)
186 os.unlink(tmpdatafn)
191 raise
187 raise
192 if not opts.get('dry_run'):
188 if not opts.get('dry_run'):
193 # Racy since both files cannot be renamed atomically
189 # Racy since both files cannot be renamed atomically
194 util.os_link(indexfn, oldindexfn)
190 util.os_link(indexfn, oldindexfn)
195 util.os_link(datafn, olddatafn)
191 util.os_link(datafn, olddatafn)
196 util.rename(tmpindexfn, indexfn)
192 util.rename(tmpindexfn, indexfn)
197 util.rename(tmpdatafn, datafn)
193 util.rename(tmpdatafn, datafn)
198 else:
194 else:
199 os.unlink(tmpindexfn)
195 os.unlink(tmpindexfn)
200 os.unlink(tmpdatafn)
196 os.unlink(tmpdatafn)
201 finally:
197 finally:
202 lock.release()
198 lock.release()
203
199
204 if not opts.get('dry_run'):
200 if not opts.get('dry_run'):
205 ui.write(_('note: old revlog saved in:\n'
201 ui.write(_('note: old revlog saved in:\n'
206 ' %s\n'
202 ' %s\n'
207 ' %s\n'
203 ' %s\n'
208 '(You can delete those files when you are satisfied that your\n'
204 '(You can delete those files when you are satisfied that your\n'
209 'repository is still sane. '
205 'repository is still sane. '
210 'Running \'hg verify\' is strongly recommended.)\n')
206 'Running \'hg verify\' is strongly recommended.)\n')
211 % (oldindexfn, olddatafn))
207 % (oldindexfn, olddatafn))
212
208
213 cmdtable = {
209 cmdtable = {
214 'shrink': (shrink,
210 'shrink': (shrink,
215 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
211 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
216 ('n', 'dry-run', None, _('do not shrink, simulate only')),
212 ('n', 'dry-run', None, _('do not shrink, simulate only')),
217 ],
213 ],
218 _('hg shrink [--revlog PATH]'))
214 _('hg shrink [--revlog PATH]'))
219 }
215 }
220
216
221 if __name__ == "__main__":
217 if __name__ == "__main__":
222 print "shrink-revlog.py is now an extension (see hg help extensions)"
218 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now