##// END OF EJS Templates
shrink-revlog: add --dry-run option
Patrick Mezard -
r10241:4b2a086b default
parent child Browse files
Show More
@@ -1,215 +1,222 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 reorder a revlog (the manifest by default) to save space
5 5
6 6 Specifically, this topologically sorts the revisions in the revlog so that
7 7 revisions on the same branch are adjacent as much as possible. This is a
8 8 workaround for the fact that Mercurial computes deltas relative to the
9 9 previous revision rather than relative to a parent revision.
10 10
11 11 This is *not* safe to run on a changelog.
12 12 """
13 13
14 14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 16 # renamed by Greg Ward <greg at gerg.ca>.
17 17
18 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 19 # e.g. by comparing "before" and "after" states of random changesets
20 20 # (maybe: export before, shrink, export after, diff).
21 21
22 22 import sys, os, tempfile
23 23 import optparse
24 24 from mercurial import ui as ui_, hg, revlog, transaction, node, util
25 25 from mercurial import changegroup
26 26
27 27 def toposort(ui, rl):
28 28
29 29 children = {}
30 30 root = []
31 31 # build children and roots
32 32 ui.write('reading %d revs ' % len(rl))
33 33 try:
34 34 for i in rl:
35 35 children[i] = []
36 36 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
37 37 # in case of duplicate parents
38 38 if len(parents) == 2 and parents[0] == parents[1]:
39 39 del parents[1]
40 40 for p in parents:
41 41 assert p in children
42 42 children[p].append(i)
43 43
44 44 if len(parents) == 0:
45 45 root.append(i)
46 46
47 47 if i % 1000 == 0:
48 48 ui.write('.')
49 49 finally:
50 50 ui.write('\n')
51 51
52 52 # XXX this is a reimplementation of the 'branchsort' topo sort
53 53 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
54 54 # the algorithm
55 55 ui.write('sorting ...')
56 56 visit = root
57 57 ret = []
58 58 while visit:
59 59 i = visit.pop(0)
60 60 ret.append(i)
61 61 if i not in children:
62 62 # This only happens if some node's p1 == p2, which can
63 63 # happen in the manifest in certain circumstances.
64 64 continue
65 65 next = []
66 66 for c in children.pop(i):
67 67 parents_unseen = [p for p in rl.parentrevs(c)
68 68 if p != node.nullrev and p in children]
69 69 if len(parents_unseen) == 0:
70 70 next.append(c)
71 71 visit = next + visit
72 72 ui.write('\n')
73 73 return ret
74 74
75 75 def writerevs(ui, r1, r2, order, tr):
76 76
77 77 ui.write('writing %d revs ' % len(order))
78 78 count = [0]
79 79 def progress(*args):
80 80 if count[0] % 1000 == 0:
81 81 ui.write('.')
82 82 count[0] += 1
83 83
84 84 order = [r1.node(r) for r in order]
85 85
86 86 # this is a bit ugly, but it works
87 87 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
88 88 unlookup = lambda x: int(x, 10)
89 89
90 90 try:
91 91 group = util.chunkbuffer(r1.group(order, lookup, progress))
92 92 chunkiter = changegroup.chunkiter(group)
93 93 r2.addgroup(chunkiter, unlookup, tr)
94 94 finally:
95 95 ui.write('\n')
96 96
97 97 def report(ui, olddatafn, newdatafn):
98 98 oldsize = float(os.stat(olddatafn).st_size)
99 99 newsize = float(os.stat(newdatafn).st_size)
100 100
101 101 # argh: have to pass an int to %d, because a float >= 2^32
102 102 # blows up under Python 2.5 or earlier
103 103 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
104 104 % (int(oldsize), oldsize/1024/1024))
105 105 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
106 106 % (int(newsize), newsize/1024/1024))
107 107
108 108 shrink_percent = (oldsize - newsize) / oldsize * 100
109 109 shrink_factor = oldsize / newsize
110 110 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
111 111
112 112 def shrink(ui, repo, **opts):
113 113 """
114 114 Shrink revlog by re-ordering revisions. Will operate on manifest for
115 115 the given repository if no other revlog is specified."""
116 116
117 117 # Unbuffer stdout for nice progress output.
118 118 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
119 119
120 120 if not repo.local():
121 121 raise util.Abort('not a local repository: %s' % repo.root)
122 122
123 123 fn = opts.get('revlog')
124 124 if not fn:
125 125 indexfn = repo.sjoin('00manifest.i')
126 126 else:
127 127 if not fn.endswith('.i'):
128 128 raise util.Abort('--revlog option must specify the revlog index '
129 129 'file (*.i), not %s' % opts.get('revlog'))
130 130
131 131 indexfn = os.path.realpath(fn)
132 132 store = repo.sjoin('')
133 133 if not indexfn.startswith(store):
134 134 raise util.Abort('--revlog option must specify a revlog in %s, '
135 135 'not %s' % (store, indexfn))
136 136
137 137 datafn = indexfn[:-2] + '.d'
138 138 if not os.path.exists(indexfn):
139 139 raise util.Abort('no such file: %s' % indexfn)
140 140 if '00changelog' in indexfn:
141 141 raise util.Abort('shrinking the changelog will corrupt your repository')
142 142 if not os.path.exists(datafn):
143 143 # This is just a lazy shortcut because I can't be bothered to
144 144 # handle all the special cases that entail from no .d file.
145 145 raise util.Abort('%s does not exist: revlog not big enough '
146 146 'to be worth shrinking' % datafn)
147 147
148 148 oldindexfn = indexfn + '.old'
149 149 olddatafn = datafn + '.old'
150 150 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
151 151 raise util.Abort('one or both of\n'
152 152 ' %s\n'
153 153 ' %s\n'
154 154 'exists from a previous run; please clean up before '
155 155 'running again' % (oldindexfn, olddatafn))
156 156
157 157 ui.write('shrinking %s\n' % indexfn)
158 158 prefix = os.path.basename(indexfn)[:-1]
159 159 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
160 160 prefix=prefix,
161 161 suffix='.i')
162 162 tmpdatafn = tmpindexfn[:-2] + '.d'
163 163 os.close(tmpfd)
164 164
165 165 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
166 166 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
167 167
168 168 # Don't use repo.transaction(), because then things get hairy with
169 169 # paths: some need to be relative to .hg, and some need to be
170 170 # absolute. Doing it this way keeps things simple: everything is an
171 171 # absolute path.
172 172 lock = repo.lock(wait=False)
173 173 tr = transaction.transaction(ui.warn,
174 174 open,
175 175 repo.sjoin('journal'))
176 176
177 177 try:
178 178 try:
179 179 order = toposort(ui, r1)
180 180 writerevs(ui, r1, r2, order, tr)
181 181 report(ui, datafn, tmpdatafn)
182 182 tr.close()
183 183 except:
184 184 # Abort transaction first, so we truncate the files before
185 185 # deleting them.
186 186 tr.abort()
187 187 if os.path.exists(tmpindexfn):
188 188 os.unlink(tmpindexfn)
189 189 if os.path.exists(tmpdatafn):
190 190 os.unlink(tmpdatafn)
191 191 raise
192 # Racy since both files cannot be renamed atomically
193 util.os_link(indexfn, oldindexfn)
194 util.os_link(datafn, olddatafn)
195 util.rename(tmpindexfn, indexfn)
196 util.rename(tmpdatafn, datafn)
192 if not opts.get('dry_run'):
193 # Racy since both files cannot be renamed atomically
194 util.os_link(indexfn, oldindexfn)
195 util.os_link(datafn, olddatafn)
196 util.rename(tmpindexfn, indexfn)
197 util.rename(tmpdatafn, datafn)
198 else:
199 os.unlink(tmpindexfn)
200 os.unlink(tmpdatafn)
197 201 finally:
198 202 lock.release()
199 203
200 ui.write('note: old revlog saved in:\n'
201 ' %s\n'
202 ' %s\n'
203 '(You can delete those files when you are satisfied that your\n'
204 'repository is still sane. '
205 'Running \'hg verify\' is strongly recommended.)\n'
206 % (oldindexfn, olddatafn))
204 if not opts.get('dry_run'):
205 ui.write('note: old revlog saved in:\n'
206 ' %s\n'
207 ' %s\n'
208 '(You can delete those files when you are satisfied that your\n'
209 'repository is still sane. '
210 'Running \'hg verify\' is strongly recommended.)\n'
211 % (oldindexfn, olddatafn))
207 212
208 213 cmdtable = {
209 214 'shrink': (shrink,
210 [('', 'revlog', '', 'index (.i) file of the revlog to shrink')],
215 [('', 'revlog', '', 'index (.i) file of the revlog to shrink'),
216 ('n', 'dry-run', None, 'do not shrink, simulate only'),
217 ],
211 218 'hg shrink [--revlog PATH]')
212 219 }
213 220
214 221 if __name__ == "__main__":
215 222 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now