##// END OF EJS Templates
contrib: use ui to write in shrink-revlog.py
Dirkjan Ochtman -
r10213:9e6848f3 default
parent child Browse files
Show More
@@ -1,224 +1,220 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 Reorder a revlog (by default the the manifest file in the current
5 5 repository) to save space. Specifically, this topologically sorts the
6 6 revisions in the revlog so that revisions on the same branch are adjacent
7 7 as much as possible. This is a workaround for the fact that Mercurial
8 8 computes deltas relative to the previous revision rather than relative to a
9 9 parent revision. This is *not* safe to run on a changelog.
10 10 """
11 11
12 12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 14 # renamed by Greg Ward <greg at gerg.ca>.
15 15
16 16 # XXX would be nice to have a way to verify the repository after shrinking,
17 17 # e.g. by comparing "before" and "after" states of random changesets
18 18 # (maybe: export before, shrink, export after, diff).
19 19
20 20 import sys, os, tempfile
21 21 import optparse
22 22 from mercurial import ui as ui_, hg, revlog, transaction, node, util
23 23 from mercurial import changegroup
24 24
25 def toposort(rl):
26 write = sys.stdout.write
25 def toposort(ui, rl):
27 26
28 27 children = {}
29 28 root = []
30 29 # build children and roots
31 write('reading %d revs ' % len(rl))
30 ui.write('reading %d revs ' % len(rl))
32 31 try:
33 32 for i in rl:
34 33 children[i] = []
35 34 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
36 35 # in case of duplicate parents
37 36 if len(parents) == 2 and parents[0] == parents[1]:
38 37 del parents[1]
39 38 for p in parents:
40 39 assert p in children
41 40 children[p].append(i)
42 41
43 42 if len(parents) == 0:
44 43 root.append(i)
45 44
46 45 if i % 1000 == 0:
47 write('.')
46 ui.write('.')
48 47 finally:
49 write('\n')
48 ui.write('\n')
50 49
51 50 # XXX this is a reimplementation of the 'branchsort' topo sort
52 51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
53 52 # the algorithm
54 write('sorting ...')
53 ui.write('sorting ...')
55 54 visit = root
56 55 ret = []
57 56 while visit:
58 57 i = visit.pop(0)
59 58 ret.append(i)
60 59 if i not in children:
61 60 # This only happens if some node's p1 == p2, which can
62 61 # happen in the manifest in certain circumstances.
63 62 continue
64 63 next = []
65 64 for c in children.pop(i):
66 65 parents_unseen = [p for p in rl.parentrevs(c)
67 66 if p != node.nullrev and p in children]
68 67 if len(parents_unseen) == 0:
69 68 next.append(c)
70 69 visit = next + visit
71 write('\n')
70 ui.write('\n')
72 71 return ret
73 72
74 def writerevs(r1, r2, order, tr):
75 write = sys.stdout.write
76 write('writing %d revs ' % len(order))
73 def writerevs(ui, r1, r2, order, tr):
77 74
75 ui.write('writing %d revs ' % len(order))
78 76 count = [0]
79 77 def progress(*args):
80 78 if count[0] % 1000 == 0:
81 write('.')
79 ui.write('.')
82 80 count[0] += 1
83 81
84 82 order = [r1.node(r) for r in order]
85 83
86 84 # this is a bit ugly, but it works
87 85 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
88 86 unlookup = lambda x: int(x, 10)
89 87
90 88 try:
91 89 group = util.chunkbuffer(r1.group(order, lookup, progress))
92 90 chunkiter = changegroup.chunkiter(group)
93 91 r2.addgroup(chunkiter, unlookup, tr)
94 92 finally:
95 write('\n')
93 ui.write('\n')
96 94
97 def report(olddatafn, newdatafn):
95 def report(ui, olddatafn, newdatafn):
98 96 oldsize = float(os.stat(olddatafn).st_size)
99 97 newsize = float(os.stat(newdatafn).st_size)
100 98
101 99 # argh: have to pass an int to %d, because a float >= 2^32
102 100 # blows up under Python 2.5 or earlier
103 sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'
104 % (int(oldsize), oldsize/1024/1024))
105 sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'
106 % (int(newsize), newsize/1024/1024))
101 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
102 % (int(oldsize), oldsize/1024/1024))
103 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
104 % (int(newsize), newsize/1024/1024))
107 105
108 106 shrink_percent = (oldsize - newsize) / oldsize * 100
109 107 shrink_factor = oldsize / newsize
110 sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'
111 % (shrink_percent, shrink_factor))
108 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
112 109
113 110 def main():
114 111
115 112 # Unbuffer stdout for nice progress output.
116 113 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
117 write = sys.stdout.write
118 114
119 115 parser = optparse.OptionParser(description=__doc__)
120 116 parser.add_option('-R', '--repository',
121 117 default=os.path.curdir,
122 118 metavar='REPO',
123 119 help='repository root directory [default: current dir]')
124 120 parser.add_option('--revlog',
125 121 metavar='FILE',
126 122 help='shrink FILE [default: REPO/hg/store/00manifest.i]')
127 123 (options, args) = parser.parse_args()
128 124 if args:
129 125 parser.error('too many arguments')
130 126
131 127 # Open the specified repository.
132 128 ui = ui_.ui()
133 129 repo = hg.repository(ui, options.repository)
134 130 if not repo.local():
135 131 parser.error('not a local repository: %s' % options.repository)
136 132
137 133 if options.revlog is None:
138 134 indexfn = repo.sjoin('00manifest.i')
139 135 else:
140 136 if not options.revlog.endswith('.i'):
141 137 parser.error('--revlog option must specify the revlog index file '
142 138 '(*.i), not %s' % options.revlog)
143 139
144 140 indexfn = os.path.realpath(options.revlog)
145 141 store = repo.sjoin('')
146 142 if not indexfn.startswith(store):
147 143 parser.error('--revlog option must specify a revlog in %s, not %s'
148 144 % (store, indexfn))
149 145
150 146 datafn = indexfn[:-2] + '.d'
151 147 if not os.path.exists(indexfn):
152 148 parser.error('no such file: %s' % indexfn)
153 149 if '00changelog' in indexfn:
154 150 parser.error('shrinking the changelog will corrupt your repository')
155 151 if not os.path.exists(datafn):
156 152 # This is just a lazy shortcut because I can't be bothered to
157 153 # handle all the special cases that entail from no .d file.
158 154 parser.error('%s does not exist: revlog not big enough '
159 155 'to be worth shrinking' % datafn)
160 156
161 157 oldindexfn = indexfn + '.old'
162 158 olddatafn = datafn + '.old'
163 159 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
164 160 parser.error('one or both of\n'
165 161 ' %s\n'
166 162 ' %s\n'
167 163 'exists from a previous run; please clean up before '
168 164 'running again'
169 165 % (oldindexfn, olddatafn))
170 166
171 write('shrinking %s\n' % indexfn)
167 ui.write('shrinking %s\n' % indexfn)
172 168 prefix = os.path.basename(indexfn)[:-1]
173 169 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
174 170 prefix=prefix,
175 171 suffix='.i')
176 172 tmpdatafn = tmpindexfn[:-2] + '.d'
177 173 os.close(tmpfd)
178 174
179 175 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
180 176 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
181 177
182 178 # Don't use repo.transaction(), because then things get hairy with
183 179 # paths: some need to be relative to .hg, and some need to be
184 180 # absolute. Doing it this way keeps things simple: everything is an
185 181 # absolute path.
186 182 lock = repo.lock(wait=False)
187 183 tr = transaction.transaction(sys.stderr.write,
188 184 open,
189 185 repo.sjoin('journal'))
190 186
191 187 try:
192 188 try:
193 order = toposort(r1)
194 writerevs(r1, r2, order, tr)
195 report(datafn, tmpdatafn)
189 order = toposort(ui, r1)
190 writerevs(ui, r1, r2, order, tr)
191 report(ui, datafn, tmpdatafn)
196 192 tr.close()
197 193 except:
198 194 # Abort transaction first, so we truncate the files before
199 195 # deleting them.
200 196 tr.abort()
201 197 if os.path.exists(tmpindexfn):
202 198 os.unlink(tmpindexfn)
203 199 if os.path.exists(tmpdatafn):
204 200 os.unlink(tmpdatafn)
205 201 raise
206 202 finally:
207 203 lock.release()
208 204
209 205 os.link(indexfn, oldindexfn)
210 206 os.link(datafn, olddatafn)
211 207 os.rename(tmpindexfn, indexfn)
212 208 os.rename(tmpdatafn, datafn)
213 write('note: old revlog saved in:\n'
209 ui.write('note: old revlog saved in:\n'
214 210 ' %s\n'
215 211 ' %s\n'
216 212 '(You can delete those files when you are satisfied that your\n'
217 213 'repository is still sane. '
218 214 'Running \'hg verify\' is strongly recommended.)\n'
219 215 % (oldindexfn, olddatafn))
220 216
221 217 try:
222 218 main()
223 219 except KeyboardInterrupt:
224 220 sys.exit("interrupted")
General Comments 0
You need to be logged in to leave comments. Login now