##// END OF EJS Templates
shrink-revlog: remove unneeded imports and useless code
Benoit Boissinot -
r10509:3e7e789d stable
parent child Browse files
Show More
@@ -1,222 +1,218
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 reorder a revlog (the manifest by default) to save space
5 5
6 6 Specifically, this topologically sorts the revisions in the revlog so that
7 7 revisions on the same branch are adjacent as much as possible. This is a
8 8 workaround for the fact that Mercurial computes deltas relative to the
9 9 previous revision rather than relative to a parent revision.
10 10
11 11 This is *not* safe to run on a changelog.
12 12 """
13 13
14 14 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
15 15 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
16 16 # renamed by Greg Ward <greg at gerg.ca>.
17 17
18 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 19 # e.g. by comparing "before" and "after" states of random changesets
20 20 # (maybe: export before, shrink, export after, diff).
21 21
22 import sys, os, tempfile
23 import optparse
24 from mercurial import ui as ui_, hg, revlog, transaction, node, util
22 import os, tempfile
23 from mercurial import revlog, transaction, node, util
25 24 from mercurial import changegroup
26 25 from mercurial.i18n import _
27 26
28 27 def toposort(ui, rl):
29 28
30 29 children = {}
31 30 root = []
32 31 # build children and roots
33 32 ui.status(_('reading revs\n'))
34 33 try:
35 34 for i in rl:
36 35 ui.progress(_('reading'), i, total=len(rl))
37 36 children[i] = []
38 37 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
39 38 # in case of duplicate parents
40 39 if len(parents) == 2 and parents[0] == parents[1]:
41 40 del parents[1]
42 41 for p in parents:
43 42 assert p in children
44 43 children[p].append(i)
45 44
46 45 if len(parents) == 0:
47 46 root.append(i)
48 47 finally:
49 48 ui.progress(_('reading'), None, total=len(rl))
50 49
51 50 # XXX this is a reimplementation of the 'branchsort' topo sort
52 51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
53 52 # the algorithm
54 53 ui.status(_('sorting revs\n'))
55 54 visit = root
56 55 ret = []
57 56 while visit:
58 57 i = visit.pop(0)
59 58 ret.append(i)
60 59 if i not in children:
61 60 # This only happens if some node's p1 == p2, which can
62 61 # happen in the manifest in certain circumstances.
63 62 continue
64 63 next = []
65 64 for c in children.pop(i):
66 65 parents_unseen = [p for p in rl.parentrevs(c)
67 66 if p != node.nullrev and p in children]
68 67 if len(parents_unseen) == 0:
69 68 next.append(c)
70 69 visit = next + visit
71 70 return ret
72 71
73 72 def writerevs(ui, r1, r2, order, tr):
74 73
75 74 ui.status(_('writing revs\n'))
76 75
77 76 count = [0]
78 77 def progress(*args):
79 78 ui.progress(_('writing'), count[0], total=len(order))
80 79 count[0] += 1
81 80
82 81 order = [r1.node(r) for r in order]
83 82
84 83 # this is a bit ugly, but it works
85 84 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
86 85 unlookup = lambda x: int(x, 10)
87 86
88 87 try:
89 88 group = util.chunkbuffer(r1.group(order, lookup, progress))
90 89 chunkiter = changegroup.chunkiter(group)
91 90 r2.addgroup(chunkiter, unlookup, tr)
92 91 finally:
93 92 ui.progress(_('writing'), None, len(order))
94 93
95 94 def report(ui, olddatafn, newdatafn):
96 95 oldsize = float(os.stat(olddatafn).st_size)
97 96 newsize = float(os.stat(newdatafn).st_size)
98 97
99 98 # argh: have to pass an int to %d, because a float >= 2^32
100 99 # blows up under Python 2.5 or earlier
101 100 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
102 101 % (int(oldsize), oldsize / 1024 / 1024))
103 102 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
104 103 % (int(newsize), newsize / 1024 / 1024))
105 104
106 105 shrink_percent = (oldsize - newsize) / oldsize * 100
107 106 shrink_factor = oldsize / newsize
108 107 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
109 108 % (shrink_percent, shrink_factor))
110 109
111 110 def shrink(ui, repo, **opts):
112 111 """
113 112 Shrink revlog by re-ordering revisions. Will operate on manifest for
114 113 the given repository if no other revlog is specified."""
115 114
116 # Unbuffer stdout for nice progress output.
117 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
118
119 115 if not repo.local():
120 116 raise util.Abort(_('not a local repository: %s') % repo.root)
121 117
122 118 fn = opts.get('revlog')
123 119 if not fn:
124 120 indexfn = repo.sjoin('00manifest.i')
125 121 else:
126 122 if not fn.endswith('.i'):
127 123 raise util.Abort(_('--revlog option must specify the revlog index '
128 124 'file (*.i), not %s') % opts.get('revlog'))
129 125
130 126 indexfn = os.path.realpath(fn)
131 127 store = repo.sjoin('')
132 128 if not indexfn.startswith(store):
133 129 raise util.Abort(_('--revlog option must specify a revlog in %s, '
134 130 'not %s') % (store, indexfn))
135 131
136 132 datafn = indexfn[:-2] + '.d'
137 133 if not os.path.exists(indexfn):
138 134 raise util.Abort(_('no such file: %s') % indexfn)
139 135 if '00changelog' in indexfn:
140 136 raise util.Abort(_('shrinking the changelog '
141 137 'will corrupt your repository'))
142 138 if not os.path.exists(datafn):
143 139 # This is just a lazy shortcut because I can't be bothered to
144 140 # handle all the special cases that entail from no .d file.
145 141 raise util.Abort(_('%s does not exist: revlog not big enough '
146 142 'to be worth shrinking') % datafn)
147 143
148 144 oldindexfn = indexfn + '.old'
149 145 olddatafn = datafn + '.old'
150 146 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
151 147 raise util.Abort(_('one or both of\n'
152 148 ' %s\n'
153 149 ' %s\n'
154 150 'exists from a previous run; please clean up '
155 151 'before running again') % (oldindexfn, olddatafn))
156 152
157 153 ui.write(_('shrinking %s\n') % indexfn)
158 154 prefix = os.path.basename(indexfn)[:-1]
159 155 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
160 156 prefix=prefix,
161 157 suffix='.i')
162 158 tmpdatafn = tmpindexfn[:-2] + '.d'
163 159 os.close(tmpfd)
164 160
165 161 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
166 162 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
167 163
168 164 # Don't use repo.transaction(), because then things get hairy with
169 165 # paths: some need to be relative to .hg, and some need to be
170 166 # absolute. Doing it this way keeps things simple: everything is an
171 167 # absolute path.
172 168 lock = repo.lock(wait=False)
173 169 tr = transaction.transaction(ui.warn,
174 170 open,
175 171 repo.sjoin('journal'))
176 172
177 173 try:
178 174 try:
179 175 order = toposort(ui, r1)
180 176 writerevs(ui, r1, r2, order, tr)
181 177 report(ui, datafn, tmpdatafn)
182 178 tr.close()
183 179 except:
184 180 # Abort transaction first, so we truncate the files before
185 181 # deleting them.
186 182 tr.abort()
187 183 if os.path.exists(tmpindexfn):
188 184 os.unlink(tmpindexfn)
189 185 if os.path.exists(tmpdatafn):
190 186 os.unlink(tmpdatafn)
191 187 raise
192 188 if not opts.get('dry_run'):
193 189 # Racy since both files cannot be renamed atomically
194 190 util.os_link(indexfn, oldindexfn)
195 191 util.os_link(datafn, olddatafn)
196 192 util.rename(tmpindexfn, indexfn)
197 193 util.rename(tmpdatafn, datafn)
198 194 else:
199 195 os.unlink(tmpindexfn)
200 196 os.unlink(tmpdatafn)
201 197 finally:
202 198 lock.release()
203 199
204 200 if not opts.get('dry_run'):
205 201 ui.write(_('note: old revlog saved in:\n'
206 202 ' %s\n'
207 203 ' %s\n'
208 204 '(You can delete those files when you are satisfied that your\n'
209 205 'repository is still sane. '
210 206 'Running \'hg verify\' is strongly recommended.)\n')
211 207 % (oldindexfn, olddatafn))
212 208
213 209 cmdtable = {
214 210 'shrink': (shrink,
215 211 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
216 212 ('n', 'dry-run', None, _('do not shrink, simulate only')),
217 213 ],
218 214 _('hg shrink [--revlog PATH]'))
219 215 }
220 216
221 217 if __name__ == "__main__":
222 218 print "shrink-revlog.py is now an extension (see hg help extensions)"
General Comments 0
You need to be logged in to leave comments. Login now