##// END OF EJS Templates
contrib: turn shrink-revlog.py into an extension
Dirkjan Ochtman -
r10215:9d79b8f5 default
parent child Browse files
Show More
@@ -1,221 +1,209 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 Reorder a revlog (by default the the manifest file in the current
4 Reorder a revlog (by default the the manifest file in the current
5 repository) to save space. Specifically, this topologically sorts the
5 repository) to save space. Specifically, this topologically sorts the
6 revisions in the revlog so that revisions on the same branch are adjacent
6 revisions in the revlog so that revisions on the same branch are adjacent
7 as much as possible. This is a workaround for the fact that Mercurial
7 as much as possible. This is a workaround for the fact that Mercurial
8 computes deltas relative to the previous revision rather than relative to a
8 computes deltas relative to the previous revision rather than relative to a
9 parent revision. This is *not* safe to run on a changelog.
9 parent revision. This is *not* safe to run on a changelog.
10 """
10 """
11
11
12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 # renamed by Greg Ward <greg at gerg.ca>.
14 # renamed by Greg Ward <greg at gerg.ca>.
15
15
16 # XXX would be nice to have a way to verify the repository after shrinking,
16 # XXX would be nice to have a way to verify the repository after shrinking,
17 # e.g. by comparing "before" and "after" states of random changesets
17 # e.g. by comparing "before" and "after" states of random changesets
18 # (maybe: export before, shrink, export after, diff).
18 # (maybe: export before, shrink, export after, diff).
19
19
20 import sys, os, tempfile
20 import sys, os, tempfile
21 import optparse
21 import optparse
22 from mercurial import ui as ui_, hg, revlog, transaction, node, util
22 from mercurial import ui as ui_, hg, revlog, transaction, node, util
23 from mercurial import changegroup
23 from mercurial import changegroup
24
24
25 def toposort(ui, rl):
25 def toposort(ui, rl):
26
26
27 children = {}
27 children = {}
28 root = []
28 root = []
29 # build children and roots
29 # build children and roots
30 ui.write('reading %d revs ' % len(rl))
30 ui.write('reading %d revs ' % len(rl))
31 try:
31 try:
32 for i in rl:
32 for i in rl:
33 children[i] = []
33 children[i] = []
34 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
34 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
35 # in case of duplicate parents
35 # in case of duplicate parents
36 if len(parents) == 2 and parents[0] == parents[1]:
36 if len(parents) == 2 and parents[0] == parents[1]:
37 del parents[1]
37 del parents[1]
38 for p in parents:
38 for p in parents:
39 assert p in children
39 assert p in children
40 children[p].append(i)
40 children[p].append(i)
41
41
42 if len(parents) == 0:
42 if len(parents) == 0:
43 root.append(i)
43 root.append(i)
44
44
45 if i % 1000 == 0:
45 if i % 1000 == 0:
46 ui.write('.')
46 ui.write('.')
47 finally:
47 finally:
48 ui.write('\n')
48 ui.write('\n')
49
49
50 # XXX this is a reimplementation of the 'branchsort' topo sort
50 # XXX this is a reimplementation of the 'branchsort' topo sort
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
52 # the algorithm
52 # the algorithm
53 ui.write('sorting ...')
53 ui.write('sorting ...')
54 visit = root
54 visit = root
55 ret = []
55 ret = []
56 while visit:
56 while visit:
57 i = visit.pop(0)
57 i = visit.pop(0)
58 ret.append(i)
58 ret.append(i)
59 if i not in children:
59 if i not in children:
60 # This only happens if some node's p1 == p2, which can
60 # This only happens if some node's p1 == p2, which can
61 # happen in the manifest in certain circumstances.
61 # happen in the manifest in certain circumstances.
62 continue
62 continue
63 next = []
63 next = []
64 for c in children.pop(i):
64 for c in children.pop(i):
65 parents_unseen = [p for p in rl.parentrevs(c)
65 parents_unseen = [p for p in rl.parentrevs(c)
66 if p != node.nullrev and p in children]
66 if p != node.nullrev and p in children]
67 if len(parents_unseen) == 0:
67 if len(parents_unseen) == 0:
68 next.append(c)
68 next.append(c)
69 visit = next + visit
69 visit = next + visit
70 ui.write('\n')
70 ui.write('\n')
71 return ret
71 return ret
72
72
73 def writerevs(ui, r1, r2, order, tr):
73 def writerevs(ui, r1, r2, order, tr):
74
74
75 ui.write('writing %d revs ' % len(order))
75 ui.write('writing %d revs ' % len(order))
76 count = [0]
76 count = [0]
77 def progress(*args):
77 def progress(*args):
78 if count[0] % 1000 == 0:
78 if count[0] % 1000 == 0:
79 ui.write('.')
79 ui.write('.')
80 count[0] += 1
80 count[0] += 1
81
81
82 order = [r1.node(r) for r in order]
82 order = [r1.node(r) for r in order]
83
83
84 # this is a bit ugly, but it works
84 # this is a bit ugly, but it works
85 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
85 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
86 unlookup = lambda x: int(x, 10)
86 unlookup = lambda x: int(x, 10)
87
87
88 try:
88 try:
89 group = util.chunkbuffer(r1.group(order, lookup, progress))
89 group = util.chunkbuffer(r1.group(order, lookup, progress))
90 chunkiter = changegroup.chunkiter(group)
90 chunkiter = changegroup.chunkiter(group)
91 r2.addgroup(chunkiter, unlookup, tr)
91 r2.addgroup(chunkiter, unlookup, tr)
92 finally:
92 finally:
93 ui.write('\n')
93 ui.write('\n')
94
94
95 def report(ui, olddatafn, newdatafn):
95 def report(ui, olddatafn, newdatafn):
96 oldsize = float(os.stat(olddatafn).st_size)
96 oldsize = float(os.stat(olddatafn).st_size)
97 newsize = float(os.stat(newdatafn).st_size)
97 newsize = float(os.stat(newdatafn).st_size)
98
98
99 # argh: have to pass an int to %d, because a float >= 2^32
99 # argh: have to pass an int to %d, because a float >= 2^32
100 # blows up under Python 2.5 or earlier
100 # blows up under Python 2.5 or earlier
101 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
101 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
102 % (int(oldsize), oldsize/1024/1024))
102 % (int(oldsize), oldsize/1024/1024))
103 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
103 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
104 % (int(newsize), newsize/1024/1024))
104 % (int(newsize), newsize/1024/1024))
105
105
106 shrink_percent = (oldsize - newsize) / oldsize * 100
106 shrink_percent = (oldsize - newsize) / oldsize * 100
107 shrink_factor = oldsize / newsize
107 shrink_factor = oldsize / newsize
108 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
108 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
109
109
110 def main():
110 def shrink(ui, repo, **opts):
111 """
112 Shrink revlog by re-ordering revisions. Will operate on manifest for
113 the given repository if no other revlog is specified."""
111
114
112 # Unbuffer stdout for nice progress output.
115 # Unbuffer stdout for nice progress output.
113 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
116 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
114
117
115 parser = optparse.OptionParser(description=__doc__)
118 if not repo.local():
116 parser.add_option('-R', '--repository',
119 raise util.Abort('not a local repository: %s' % repo.root)
117 default=os.path.curdir,
118 metavar='REPO',
119 help='repository root directory [default: current dir]')
120 parser.add_option('--revlog',
121 metavar='FILE',
122 help='shrink FILE [default: REPO/hg/store/00manifest.i]')
123 (options, args) = parser.parse_args()
124 if args:
125 raise util.Abort('too many arguments')
126
120
127 # Open the specified repository.
121 fn = opts.get('revlog')
128 ui = ui_.ui()
122 if not fn:
129 repo = hg.repository(ui, options.repository)
130 if not repo.local():
131 raise util.Abort('not a local repository: %s' % options.repository)
132
133 if options.revlog is None:
134 indexfn = repo.sjoin('00manifest.i')
123 indexfn = repo.sjoin('00manifest.i')
135 else:
124 else:
136 if not options.revlog.endswith('.i'):
125 if not fn.endswith('.i'):
137 raise util.Abort('--revlog option must specify the revlog index '
126 raise util.Abort('--revlog option must specify the revlog index '
138 'file (*.i), not %s' % options.revlog)
127 'file (*.i), not %s' % opts.get('revlog'))
139
128
140 indexfn = os.path.realpath(options.revlog)
129 indexfn = os.path.realpath(fn)
141 store = repo.sjoin('')
130 store = repo.sjoin('')
142 if not indexfn.startswith(store):
131 if not indexfn.startswith(store):
143 raise util.Abort('--revlog option must specify a revlog in %s, '
132 raise util.Abort('--revlog option must specify a revlog in %s, '
144 'not %s' % (store, indexfn))
133 'not %s' % (store, indexfn))
145
134
146 datafn = indexfn[:-2] + '.d'
135 datafn = indexfn[:-2] + '.d'
147 if not os.path.exists(indexfn):
136 if not os.path.exists(indexfn):
148 raise util.Abort('no such file: %s' % indexfn)
137 raise util.Abort('no such file: %s' % indexfn)
149 if '00changelog' in indexfn:
138 if '00changelog' in indexfn:
150 raise util.Abort('shrinking the changelog will corrupt your repository')
139 raise util.Abort('shrinking the changelog will corrupt your repository')
151 if not os.path.exists(datafn):
140 if not os.path.exists(datafn):
152 # This is just a lazy shortcut because I can't be bothered to
141 # This is just a lazy shortcut because I can't be bothered to
153 # handle all the special cases that entail from no .d file.
142 # handle all the special cases that entail from no .d file.
154 raise util.Abort('%s does not exist: revlog not big enough '
143 raise util.Abort('%s does not exist: revlog not big enough '
155 'to be worth shrinking' % datafn)
144 'to be worth shrinking' % datafn)
156
145
157 oldindexfn = indexfn + '.old'
146 oldindexfn = indexfn + '.old'
158 olddatafn = datafn + '.old'
147 olddatafn = datafn + '.old'
159 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
148 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
160 raise util.Abort('one or both of\n'
149 raise util.Abort('one or both of\n'
161 ' %s\n'
150 ' %s\n'
162 ' %s\n'
151 ' %s\n'
163 'exists from a previous run; please clean up before '
152 'exists from a previous run; please clean up before '
164 'running again' % (oldindexfn, olddatafn))
153 'running again' % (oldindexfn, olddatafn))
165
154
166 ui.write('shrinking %s\n' % indexfn)
155 ui.write('shrinking %s\n' % indexfn)
167 prefix = os.path.basename(indexfn)[:-1]
156 prefix = os.path.basename(indexfn)[:-1]
168 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
157 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
169 prefix=prefix,
158 prefix=prefix,
170 suffix='.i')
159 suffix='.i')
171 tmpdatafn = tmpindexfn[:-2] + '.d'
160 tmpdatafn = tmpindexfn[:-2] + '.d'
172 os.close(tmpfd)
161 os.close(tmpfd)
173
162
174 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
163 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
175 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
164 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
176
165
177 # Don't use repo.transaction(), because then things get hairy with
166 # Don't use repo.transaction(), because then things get hairy with
178 # paths: some need to be relative to .hg, and some need to be
167 # paths: some need to be relative to .hg, and some need to be
179 # absolute. Doing it this way keeps things simple: everything is an
168 # absolute. Doing it this way keeps things simple: everything is an
180 # absolute path.
169 # absolute path.
181 lock = repo.lock(wait=False)
170 lock = repo.lock(wait=False)
182 tr = transaction.transaction(sys.stderr.write,
171 tr = transaction.transaction(sys.stderr.write,
183 open,
172 open,
184 repo.sjoin('journal'))
173 repo.sjoin('journal'))
185
174
186 try:
175 try:
187 try:
176 try:
188 order = toposort(ui, r1)
177 order = toposort(ui, r1)
189 writerevs(ui, r1, r2, order, tr)
178 writerevs(ui, r1, r2, order, tr)
190 report(ui, datafn, tmpdatafn)
179 report(ui, datafn, tmpdatafn)
191 tr.close()
180 tr.close()
192 except:
181 except:
193 # Abort transaction first, so we truncate the files before
182 # Abort transaction first, so we truncate the files before
194 # deleting them.
183 # deleting them.
195 tr.abort()
184 tr.abort()
196 if os.path.exists(tmpindexfn):
185 if os.path.exists(tmpindexfn):
197 os.unlink(tmpindexfn)
186 os.unlink(tmpindexfn)
198 if os.path.exists(tmpdatafn):
187 if os.path.exists(tmpdatafn):
199 os.unlink(tmpdatafn)
188 os.unlink(tmpdatafn)
200 raise
189 raise
201 finally:
190 finally:
202 lock.release()
191 lock.release()
203
192
204 os.link(indexfn, oldindexfn)
193 os.link(indexfn, oldindexfn)
205 os.link(datafn, olddatafn)
194 os.link(datafn, olddatafn)
206 os.rename(tmpindexfn, indexfn)
195 os.rename(tmpindexfn, indexfn)
207 os.rename(tmpdatafn, datafn)
196 os.rename(tmpdatafn, datafn)
208 ui.write('note: old revlog saved in:\n'
197 ui.write('note: old revlog saved in:\n'
209 ' %s\n'
198 ' %s\n'
210 ' %s\n'
199 ' %s\n'
211 '(You can delete those files when you are satisfied that your\n'
200 '(You can delete those files when you are satisfied that your\n'
212 'repository is still sane. '
201 'repository is still sane. '
213 'Running \'hg verify\' is strongly recommended.)\n'
202 'Running \'hg verify\' is strongly recommended.)\n'
214 % (oldindexfn, olddatafn))
203 % (oldindexfn, olddatafn))
215
204
216 try:
205 cmdtable = {
217 main()
206 'shrink': (shrink,
218 except util.Abort, inst:
207 [('', 'revlog', '', 'shrink file')],
219 print inst.args[0]
208 'hg shrink [--revlog PATH]')
220 except KeyboardInterrupt:
209 }
221 sys.exit("interrupted")
General Comments 0
You need to be logged in to leave comments. Login now