##// END OF EJS Templates
contrib: turn shrink-revlog.py into an extension
Dirkjan Ochtman -
r10215:9d79b8f5 default
parent child Browse files
Show More
@@ -1,221 +1,209 b''
1 1 #!/usr/bin/env python
2 2
3 3 """\
4 4 Reorder a revlog (by default the the manifest file in the current
5 5 repository) to save space. Specifically, this topologically sorts the
6 6 revisions in the revlog so that revisions on the same branch are adjacent
7 7 as much as possible. This is a workaround for the fact that Mercurial
8 8 computes deltas relative to the previous revision rather than relative to a
9 9 parent revision. This is *not* safe to run on a changelog.
10 10 """
11 11
12 12 # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
13 13 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
14 14 # renamed by Greg Ward <greg at gerg.ca>.
15 15
16 16 # XXX would be nice to have a way to verify the repository after shrinking,
17 17 # e.g. by comparing "before" and "after" states of random changesets
18 18 # (maybe: export before, shrink, export after, diff).
19 19
20 20 import sys, os, tempfile
21 21 import optparse
22 22 from mercurial import ui as ui_, hg, revlog, transaction, node, util
23 23 from mercurial import changegroup
24 24
25 25 def toposort(ui, rl):
26 26
27 27 children = {}
28 28 root = []
29 29 # build children and roots
30 30 ui.write('reading %d revs ' % len(rl))
31 31 try:
32 32 for i in rl:
33 33 children[i] = []
34 34 parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
35 35 # in case of duplicate parents
36 36 if len(parents) == 2 and parents[0] == parents[1]:
37 37 del parents[1]
38 38 for p in parents:
39 39 assert p in children
40 40 children[p].append(i)
41 41
42 42 if len(parents) == 0:
43 43 root.append(i)
44 44
45 45 if i % 1000 == 0:
46 46 ui.write('.')
47 47 finally:
48 48 ui.write('\n')
49 49
50 50 # XXX this is a reimplementation of the 'branchsort' topo sort
51 51 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
52 52 # the algorithm
53 53 ui.write('sorting ...')
54 54 visit = root
55 55 ret = []
56 56 while visit:
57 57 i = visit.pop(0)
58 58 ret.append(i)
59 59 if i not in children:
60 60 # This only happens if some node's p1 == p2, which can
61 61 # happen in the manifest in certain circumstances.
62 62 continue
63 63 next = []
64 64 for c in children.pop(i):
65 65 parents_unseen = [p for p in rl.parentrevs(c)
66 66 if p != node.nullrev and p in children]
67 67 if len(parents_unseen) == 0:
68 68 next.append(c)
69 69 visit = next + visit
70 70 ui.write('\n')
71 71 return ret
72 72
73 73 def writerevs(ui, r1, r2, order, tr):
74 74
75 75 ui.write('writing %d revs ' % len(order))
76 76 count = [0]
77 77 def progress(*args):
78 78 if count[0] % 1000 == 0:
79 79 ui.write('.')
80 80 count[0] += 1
81 81
82 82 order = [r1.node(r) for r in order]
83 83
84 84 # this is a bit ugly, but it works
85 85 lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
86 86 unlookup = lambda x: int(x, 10)
87 87
88 88 try:
89 89 group = util.chunkbuffer(r1.group(order, lookup, progress))
90 90 chunkiter = changegroup.chunkiter(group)
91 91 r2.addgroup(chunkiter, unlookup, tr)
92 92 finally:
93 93 ui.write('\n')
94 94
95 95 def report(ui, olddatafn, newdatafn):
96 96 oldsize = float(os.stat(olddatafn).st_size)
97 97 newsize = float(os.stat(newdatafn).st_size)
98 98
99 99 # argh: have to pass an int to %d, because a float >= 2^32
100 100 # blows up under Python 2.5 or earlier
101 101 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
102 102 % (int(oldsize), oldsize/1024/1024))
103 103 ui.write('new file size: %12d bytes (%6.1f MiB)\n'
104 104 % (int(newsize), newsize/1024/1024))
105 105
106 106 shrink_percent = (oldsize - newsize) / oldsize * 100
107 107 shrink_factor = oldsize / newsize
108 108 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
109 109
110 def main():
110 def shrink(ui, repo, **opts):
111 """
112 Shrink revlog by re-ordering revisions. Will operate on manifest for
113 the given repository if no other revlog is specified."""
111 114
112 115 # Unbuffer stdout for nice progress output.
113 116 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
114 117
115 parser = optparse.OptionParser(description=__doc__)
116 parser.add_option('-R', '--repository',
117 default=os.path.curdir,
118 metavar='REPO',
119 help='repository root directory [default: current dir]')
120 parser.add_option('--revlog',
121 metavar='FILE',
122 help='shrink FILE [default: REPO/hg/store/00manifest.i]')
123 (options, args) = parser.parse_args()
124 if args:
125 raise util.Abort('too many arguments')
118 if not repo.local():
119 raise util.Abort('not a local repository: %s' % repo.root)
126 120
127 # Open the specified repository.
128 ui = ui_.ui()
129 repo = hg.repository(ui, options.repository)
130 if not repo.local():
131 raise util.Abort('not a local repository: %s' % options.repository)
132
133 if options.revlog is None:
121 fn = opts.get('revlog')
122 if not fn:
134 123 indexfn = repo.sjoin('00manifest.i')
135 124 else:
136 if not options.revlog.endswith('.i'):
125 if not fn.endswith('.i'):
137 126 raise util.Abort('--revlog option must specify the revlog index '
138 'file (*.i), not %s' % options.revlog)
127 'file (*.i), not %s' % opts.get('revlog'))
139 128
140 indexfn = os.path.realpath(options.revlog)
129 indexfn = os.path.realpath(fn)
141 130 store = repo.sjoin('')
142 131 if not indexfn.startswith(store):
143 132 raise util.Abort('--revlog option must specify a revlog in %s, '
144 133 'not %s' % (store, indexfn))
145 134
146 135 datafn = indexfn[:-2] + '.d'
147 136 if not os.path.exists(indexfn):
148 137 raise util.Abort('no such file: %s' % indexfn)
149 138 if '00changelog' in indexfn:
150 139 raise util.Abort('shrinking the changelog will corrupt your repository')
151 140 if not os.path.exists(datafn):
152 141 # This is just a lazy shortcut because I can't be bothered to
153 142 # handle all the special cases that entail from no .d file.
154 143 raise util.Abort('%s does not exist: revlog not big enough '
155 144 'to be worth shrinking' % datafn)
156 145
157 146 oldindexfn = indexfn + '.old'
158 147 olddatafn = datafn + '.old'
159 148 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
160 149 raise util.Abort('one or both of\n'
161 150 ' %s\n'
162 151 ' %s\n'
163 152 'exists from a previous run; please clean up before '
164 153 'running again' % (oldindexfn, olddatafn))
165 154
166 155 ui.write('shrinking %s\n' % indexfn)
167 156 prefix = os.path.basename(indexfn)[:-1]
168 157 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
169 158 prefix=prefix,
170 159 suffix='.i')
171 160 tmpdatafn = tmpindexfn[:-2] + '.d'
172 161 os.close(tmpfd)
173 162
174 163 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
175 164 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
176 165
177 166 # Don't use repo.transaction(), because then things get hairy with
178 167 # paths: some need to be relative to .hg, and some need to be
179 168 # absolute. Doing it this way keeps things simple: everything is an
180 169 # absolute path.
181 170 lock = repo.lock(wait=False)
182 171 tr = transaction.transaction(sys.stderr.write,
183 172 open,
184 173 repo.sjoin('journal'))
185 174
186 175 try:
187 176 try:
188 177 order = toposort(ui, r1)
189 178 writerevs(ui, r1, r2, order, tr)
190 179 report(ui, datafn, tmpdatafn)
191 180 tr.close()
192 181 except:
193 182 # Abort transaction first, so we truncate the files before
194 183 # deleting them.
195 184 tr.abort()
196 185 if os.path.exists(tmpindexfn):
197 186 os.unlink(tmpindexfn)
198 187 if os.path.exists(tmpdatafn):
199 188 os.unlink(tmpdatafn)
200 189 raise
201 190 finally:
202 191 lock.release()
203 192
204 193 os.link(indexfn, oldindexfn)
205 194 os.link(datafn, olddatafn)
206 195 os.rename(tmpindexfn, indexfn)
207 196 os.rename(tmpdatafn, datafn)
208 197 ui.write('note: old revlog saved in:\n'
209 198 ' %s\n'
210 199 ' %s\n'
211 200 '(You can delete those files when you are satisfied that your\n'
212 201 'repository is still sane. '
213 202 'Running \'hg verify\' is strongly recommended.)\n'
214 203 % (oldindexfn, olddatafn))
215 204
216 try:
217 main()
218 except util.Abort, inst:
219 print inst.args[0]
220 except KeyboardInterrupt:
221 sys.exit("interrupted")
205 cmdtable = {
206 'shrink': (shrink,
207 [('', 'revlog', '', 'shrink file')],
208 'hg shrink [--revlog PATH]')
209 }
General Comments 0
You need to be logged in to leave comments. Login now