##// END OF EJS Templates
contrib: use ui to write in shrink-revlog.py
Dirkjan Ochtman -
r10213:9e6848f3 default
parent child Browse files
Show More
@@ -1,224 +1,220 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """\
3 """\
4 Reorder a revlog (by default the the manifest file in the current
4 Reorder a revlog (by default the the manifest file in the current
5 repository) to save space. Specifically, this topologically sorts the
5 repository) to save space. Specifically, this topologically sorts the
6 revisions in the revlog so that revisions on the same branch are adjacent
6 revisions in the revlog so that revisions on the same branch are adjacent
7 as much as possible. This is a workaround for the fact that Mercurial
7 as much as possible. This is a workaround for the fact that Mercurial
8 computes deltas relative to the previous revision rather than relative to a
8 computes deltas relative to the previous revision rather than relative to a
9 parent revision. This is *not* safe to run on a changelog.
9 parent revision. This is *not* safe to run on a changelog.
10 """
10 """
11
11
12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 # renamed by Greg Ward <greg at gerg.ca>.
14 # renamed by Greg Ward <greg at gerg.ca>.
15
15
16 # XXX would be nice to have a way to verify the repository after shrinking,
16 # XXX would be nice to have a way to verify the repository after shrinking,
17 # e.g. by comparing "before" and "after" states of random changesets
17 # e.g. by comparing "before" and "after" states of random changesets
18 # (maybe: export before, shrink, export after, diff).
18 # (maybe: export before, shrink, export after, diff).
19
19
20 import sys, os, tempfile
20 import sys, os, tempfile
21 import optparse
21 import optparse
22 from mercurial import ui as ui_, hg, revlog, transaction, node, util
22 from mercurial import ui as ui_, hg, revlog, transaction, node, util
23 from mercurial import changegroup
23 from mercurial import changegroup
24
24
25 def toposort(rl):
25 def toposort(ui, rl):
26 write = sys.stdout.write
27
26
28 children = {}
27 children = {}
29 root = []
28 root = []
30 # build children and roots
29 # build children and roots
31 write('reading %d revs ' % len(rl))
30 ui.write('reading %d revs ' % len(rl))
32 try:
31 try:
33 for i in rl:
32 for i in rl:
34 children[i] = []
33 children[i] = []
35 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
34 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
36 # in case of duplicate parents
35 # in case of duplicate parents
37 if len(parents) == 2 and parents[0] == parents[1]:
36 if len(parents) == 2 and parents[0] == parents[1]:
38 del parents[1]
37 del parents[1]
39 for p in parents:
38 for p in parents:
40 assert p in children
39 assert p in children
41 children[p].append(i)
40 children[p].append(i)
42
41
43 if len(parents) == 0:
42 if len(parents) == 0:
44 root.append(i)
43 root.append(i)
45
44
46 if i % 1000 == 0:
45 if i % 1000 == 0:
47 write('.')
46 ui.write('.')
48 finally:
47 finally:
49 write('\n')
48 ui.write('\n')
50
49
51 # XXX this is a reimplementation of the 'branchsort' topo sort
50 # XXX this is a reimplementation of the 'branchsort' topo sort
52 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
53 # the algorithm
52 # the algorithm
54 write('sorting ...')
53 ui.write('sorting ...')
55 visit = root
54 visit = root
56 ret = []
55 ret = []
57 while visit:
56 while visit:
58 i = visit.pop(0)
57 i = visit.pop(0)
59 ret.append(i)
58 ret.append(i)
60 if i not in children:
59 if i not in children:
61 # This only happens if some node's p1 == p2, which can
60 # This only happens if some node's p1 == p2, which can
62 # happen in the manifest in certain circumstances.
61 # happen in the manifest in certain circumstances.
63 continue
62 continue
64 next = []
63 next = []
65 for c in children.pop(i):
64 for c in children.pop(i):
66 parents_unseen = [p for p in rl.parentrevs(c)
65 parents_unseen = [p for p in rl.parentrevs(c)
67 if p != node.nullrev and p in children]
66 if p != node.nullrev and p in children]
68 if len(parents_unseen) == 0:
67 if len(parents_unseen) == 0:
69 next.append(c)
68 next.append(c)
70 visit = next + visit
69 visit = next + visit
71 write('\n')
70 ui.write('\n')
72 return ret
71 return ret
73
72
74 def writerevs(r1, r2, order, tr):
73 def writerevs(ui, r1, r2, order, tr):
75 write = sys.stdout.write
76 write('writing %d revs ' % len(order))
77
74
75 ui.write('writing %d revs ' % len(order))
78 count = [0]
76 count = [0]
79 def progress(*args):
77 def progress(*args):
80 if count[0] % 1000 == 0:
78 if count[0] % 1000 == 0:
81 write('.')
79 ui.write('.')
82 count[0] += 1
80 count[0] += 1
83
81
84 order = [r1.node(r) for r in order]
82 order = [r1.node(r) for r in order]
85
83
86 # this is a bit ugly, but it works
84 # this is a bit ugly, but it works
87 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
85 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
88 unlookup = lambda x: int(x, 10)
86 unlookup = lambda x: int(x, 10)
89
87
90 try:
88 try:
91 group = util.chunkbuffer(r1.group(order, lookup, progress))
89 group = util.chunkbuffer(r1.group(order, lookup, progress))
92 chunkiter = changegroup.chunkiter(group)
90 chunkiter = changegroup.chunkiter(group)
93 r2.addgroup(chunkiter, unlookup, tr)
91 r2.addgroup(chunkiter, unlookup, tr)
94 finally:
92 finally:
95 write('\n')
93 ui.write('\n')
96
94
97 def report(olddatafn, newdatafn):
95 def report(ui, olddatafn, newdatafn):
98 oldsize = float(os.stat(olddatafn).st_size)
96 oldsize = float(os.stat(olddatafn).st_size)
99 newsize = float(os.stat(newdatafn).st_size)
97 newsize = float(os.stat(newdatafn).st_size)
100
98
101 # argh: have to pass an int to %d, because a float >= 2^32
99 # argh: have to pass an int to %d, because a float >= 2^32
102 # blows up under Python 2.5 or earlier
100 # blows up under Python 2.5 or earlier
103 sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'
101 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
104 % (int(oldsize), oldsize/1024/1024))
102 % (int(oldsize), oldsize/1024/1024))
105 sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'
103 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
106 % (int(newsize), newsize/1024/1024))
104 % (int(newsize), newsize/1024/1024))
107
105
108 shrink_percent = (oldsize - newsize) / oldsize * 100
106 shrink_percent = (oldsize - newsize) / oldsize * 100
109 shrink_factor = oldsize / newsize
107 shrink_factor = oldsize / newsize
110 sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'
108 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
111 % (shrink_percent, shrink_factor))
112
109
113 def main():
110 def main():
114
111
115 # Unbuffer stdout for nice progress output.
112 # Unbuffer stdout for nice progress output.
116 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
113 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
117 write = sys.stdout.write
118
114
119 parser = optparse.OptionParser(description=__doc__)
115 parser = optparse.OptionParser(description=__doc__)
120 parser.add_option('-R', '--repository',
116 parser.add_option('-R', '--repository',
121 default=os.path.curdir,
117 default=os.path.curdir,
122 metavar='REPO',
118 metavar='REPO',
123 help='repository root directory [default: current dir]')
119 help='repository root directory [default: current dir]')
124 parser.add_option('--revlog',
120 parser.add_option('--revlog',
125 metavar='FILE',
121 metavar='FILE',
126 help='shrink FILE [default: REPO/hg/store/00manifest.i]')
122 help='shrink FILE [default: REPO/hg/store/00manifest.i]')
127 (options, args) = parser.parse_args()
123 (options, args) = parser.parse_args()
128 if args:
124 if args:
129 parser.error('too many arguments')
125 parser.error('too many arguments')
130
126
131 # Open the specified repository.
127 # Open the specified repository.
132 ui = ui_.ui()
128 ui = ui_.ui()
133 repo = hg.repository(ui, options.repository)
129 repo = hg.repository(ui, options.repository)
134 if not repo.local():
130 if not repo.local():
135 parser.error('not a local repository: %s' % options.repository)
131 parser.error('not a local repository: %s' % options.repository)
136
132
137 if options.revlog is None:
133 if options.revlog is None:
138 indexfn = repo.sjoin('00manifest.i')
134 indexfn = repo.sjoin('00manifest.i')
139 else:
135 else:
140 if not options.revlog.endswith('.i'):
136 if not options.revlog.endswith('.i'):
141 parser.error('--revlog option must specify the revlog index file '
137 parser.error('--revlog option must specify the revlog index file '
142 '(*.i), not %s' % options.revlog)
138 '(*.i), not %s' % options.revlog)
143
139
144 indexfn = os.path.realpath(options.revlog)
140 indexfn = os.path.realpath(options.revlog)
145 store = repo.sjoin('')
141 store = repo.sjoin('')
146 if not indexfn.startswith(store):
142 if not indexfn.startswith(store):
147 parser.error('--revlog option must specify a revlog in %s, not %s'
143 parser.error('--revlog option must specify a revlog in %s, not %s'
148 % (store, indexfn))
144 % (store, indexfn))
149
145
150 datafn = indexfn[:-2] + '.d'
146 datafn = indexfn[:-2] + '.d'
151 if not os.path.exists(indexfn):
147 if not os.path.exists(indexfn):
152 parser.error('no such file: %s' % indexfn)
148 parser.error('no such file: %s' % indexfn)
153 if '00changelog' in indexfn:
149 if '00changelog' in indexfn:
154 parser.error('shrinking the changelog will corrupt your repository')
150 parser.error('shrinking the changelog will corrupt your repository')
155 if not os.path.exists(datafn):
151 if not os.path.exists(datafn):
156 # This is just a lazy shortcut because I can't be bothered to
152 # This is just a lazy shortcut because I can't be bothered to
157 # handle all the special cases that entail from no .d file.
153 # handle all the special cases that entail from no .d file.
158 parser.error('%s does not exist: revlog not big enough '
154 parser.error('%s does not exist: revlog not big enough '
159 'to be worth shrinking' % datafn)
155 'to be worth shrinking' % datafn)
160
156
161 oldindexfn = indexfn + '.old'
157 oldindexfn = indexfn + '.old'
162 olddatafn = datafn + '.old'
158 olddatafn = datafn + '.old'
163 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
159 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
164 parser.error('one or both of\n'
160 parser.error('one or both of\n'
165 ' %s\n'
161 ' %s\n'
166 ' %s\n'
162 ' %s\n'
167 'exists from a previous run; please clean up before '
163 'exists from a previous run; please clean up before '
168 'running again'
164 'running again'
169 % (oldindexfn, olddatafn))
165 % (oldindexfn, olddatafn))
170
166
171 write('shrinking %s\n' % indexfn)
167 ui.write('shrinking %s\n' % indexfn)
172 prefix = os.path.basename(indexfn)[:-1]
168 prefix = os.path.basename(indexfn)[:-1]
173 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
169 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
174 prefix=prefix,
170 prefix=prefix,
175 suffix='.i')
171 suffix='.i')
176 tmpdatafn = tmpindexfn[:-2] + '.d'
172 tmpdatafn = tmpindexfn[:-2] + '.d'
177 os.close(tmpfd)
173 os.close(tmpfd)
178
174
179 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
175 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
180 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
176 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
181
177
182 # Don't use repo.transaction(), because then things get hairy with
178 # Don't use repo.transaction(), because then things get hairy with
183 # paths: some need to be relative to .hg, and some need to be
179 # paths: some need to be relative to .hg, and some need to be
184 # absolute. Doing it this way keeps things simple: everything is an
180 # absolute. Doing it this way keeps things simple: everything is an
185 # absolute path.
181 # absolute path.
186 lock = repo.lock(wait=False)
182 lock = repo.lock(wait=False)
187 tr = transaction.transaction(sys.stderr.write,
183 tr = transaction.transaction(sys.stderr.write,
188 open,
184 open,
189 repo.sjoin('journal'))
185 repo.sjoin('journal'))
190
186
191 try:
187 try:
192 try:
188 try:
193 order = toposort(r1)
189 order = toposort(ui, r1)
194 writerevs(r1, r2, order, tr)
190 writerevs(ui, r1, r2, order, tr)
195 report(datafn, tmpdatafn)
191 report(ui, datafn, tmpdatafn)
196 tr.close()
192 tr.close()
197 except:
193 except:
198 # Abort transaction first, so we truncate the files before
194 # Abort transaction first, so we truncate the files before
199 # deleting them.
195 # deleting them.
200 tr.abort()
196 tr.abort()
201 if os.path.exists(tmpindexfn):
197 if os.path.exists(tmpindexfn):
202 os.unlink(tmpindexfn)
198 os.unlink(tmpindexfn)
203 if os.path.exists(tmpdatafn):
199 if os.path.exists(tmpdatafn):
204 os.unlink(tmpdatafn)
200 os.unlink(tmpdatafn)
205 raise
201 raise
206 finally:
202 finally:
207 lock.release()
203 lock.release()
208
204
209 os.link(indexfn, oldindexfn)
205 os.link(indexfn, oldindexfn)
210 os.link(datafn, olddatafn)
206 os.link(datafn, olddatafn)
211 os.rename(tmpindexfn, indexfn)
207 os.rename(tmpindexfn, indexfn)
212 os.rename(tmpdatafn, datafn)
208 os.rename(tmpdatafn, datafn)
213 write('note: old revlog saved in:\n'
209 ui.write('note: old revlog saved in:\n'
214 ' %s\n'
210 ' %s\n'
215 ' %s\n'
211 ' %s\n'
216 '(You can delete those files when you are satisfied that your\n'
212 '(You can delete those files when you are satisfied that your\n'
217 'repository is still sane. '
213 'repository is still sane. '
218 'Running \'hg verify\' is strongly recommended.)\n'
214 'Running \'hg verify\' is strongly recommended.)\n'
219 % (oldindexfn, olddatafn))
215 % (oldindexfn, olddatafn))
220
216
221 try:
217 try:
222 main()
218 main()
223 except KeyboardInterrupt:
219 except KeyboardInterrupt:
224 sys.exit("interrupted")
220 sys.exit("interrupted")
General Comments 0
You need to be logged in to leave comments. Login now