shrink-revlog.py
237 lines
| 8.1 KiB
| text/x-python
|
PythonLexer
/ contrib / shrink-revlog.py
Greg Ward
|
r9515 | #!/usr/bin/env python | ||
"""\ | ||||
Greg Ward
|
r10236 | reorder a revlog (the manifest by default) to save space | ||
Specifically, this topologically sorts the revisions in the revlog so that | ||||
revisions on the same branch are adjacent as much as possible. This is a | ||||
workaround for the fact that Mercurial computes deltas relative to the | ||||
Dirkjan Ochtman
|
r10216 | previous revision rather than relative to a parent revision. | ||
This is *not* safe to run on a changelog. | ||||
Greg Ward
|
r9515 | """ | ||
# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org> | ||||
Dirkjan Ochtman
|
r10216 | # as a patch to rewrite-log. Cleaned up, refactored, documented, and | ||
Greg Ward
|
r9515 | # renamed by Greg Ward <greg at gerg.ca>. | ||
# XXX would be nice to have a way to verify the repository after shrinking, | ||||
# e.g. by comparing "before" and "after" states of random changesets | ||||
# (maybe: export before, shrink, export after, diff). | ||||
Benoit Boissinot
|
r10542 | import os, tempfile, errno | ||
Benoit Boissinot
|
r10509 | from mercurial import revlog, transaction, node, util | ||
Benoit Boissinot
|
r10009 | from mercurial import changegroup | ||
Benoit Boissinot
|
r10508 | from mercurial.i18n import _ | ||
Greg Ward
|
r9515 | |||
Dirkjan Ochtman
|
r10213 | def toposort(ui, rl): | ||
Greg Ward
|
r9515 | |||
children = {} | ||||
root = [] | ||||
# build children and roots | ||||
Benoit Boissinot
|
r10508 | ui.status(_('reading revs\n')) | ||
Greg Ward
|
r9515 | try: | ||
for i in rl: | ||||
Martin Geisler
|
r10496 | ui.progress(_('reading'), i, total=len(rl)) | ||
Greg Ward
|
r9515 | children[i] = [] | ||
parents = [p for p in rl.parentrevs(i) if p != node.nullrev] | ||||
# in case of duplicate parents | ||||
if len(parents) == 2 and parents[0] == parents[1]: | ||||
del parents[1] | ||||
for p in parents: | ||||
assert p in children | ||||
children[p].append(i) | ||||
if len(parents) == 0: | ||||
root.append(i) | ||||
finally: | ||||
Martin Geisler
|
r10496 | ui.progress(_('reading'), None, total=len(rl)) | ||
Greg Ward
|
r9515 | |||
# XXX this is a reimplementation of the 'branchsort' topo sort | ||||
# algorithm in hgext.convert.convcmd... would be nice not to duplicate | ||||
# the algorithm | ||||
Benoit Boissinot
|
r10508 | ui.status(_('sorting revs\n')) | ||
Greg Ward
|
r9515 | visit = root | ||
ret = [] | ||||
while visit: | ||||
i = visit.pop(0) | ||||
ret.append(i) | ||||
if i not in children: | ||||
# This only happens if some node's p1 == p2, which can | ||||
# happen in the manifest in certain circumstances. | ||||
continue | ||||
next = [] | ||||
for c in children.pop(i): | ||||
parents_unseen = [p for p in rl.parentrevs(c) | ||||
if p != node.nullrev and p in children] | ||||
if len(parents_unseen) == 0: | ||||
next.append(c) | ||||
visit = next + visit | ||||
return ret | ||||
Dirkjan Ochtman
|
r10213 | def writerevs(ui, r1, r2, order, tr): | ||
Benoit Boissinot
|
r10009 | |||
Benoit Boissinot
|
r10508 | ui.status(_('writing revs\n')) | ||
Benoit Boissinot
|
r10440 | |||
Benoit Boissinot
|
r10009 | count = [0] | ||
def progress(*args): | ||||
Martin Geisler
|
r10496 | ui.progress(_('writing'), count[0], total=len(order)) | ||
Benoit Boissinot
|
r10009 | count[0] += 1 | ||
order = [r1.node(r) for r in order] | ||||
# this is a bit ugly, but it works | ||||
lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x)) | ||||
unlookup = lambda x: int(x, 10) | ||||
Greg Ward
|
r9515 | try: | ||
Benoit Boissinot
|
r10009 | group = util.chunkbuffer(r1.group(order, lookup, progress)) | ||
chunkiter = changegroup.chunkiter(group) | ||||
r2.addgroup(chunkiter, unlookup, tr) | ||||
Greg Ward
|
r9515 | finally: | ||
Martin Geisler
|
r10496 | ui.progress(_('writing'), None, len(order)) | ||
Greg Ward
|
r9515 | |||
Benoit Boissinot
|
r10542 | def report(ui, r1, r2): | ||
def getsize(r): | ||||
s = 0 | ||||
for fn in (r.indexfile, r.datafile): | ||||
try: | ||||
s += os.stat(fn).st_size | ||||
except OSError, inst: | ||||
if inst.errno != errno.ENOENT: | ||||
raise | ||||
return s | ||||
oldsize = float(getsize(r1)) | ||||
newsize = float(getsize(r2)) | ||||
Greg Ward
|
r9515 | |||
Dirkjan Ochtman
|
r9712 | # argh: have to pass an int to %d, because a float >= 2^32 | ||
Greg Ward
|
r9515 | # blows up under Python 2.5 or earlier | ||
Benoit Boissinot
|
r10508 | ui.write(_('old file size: %12d bytes (%6.1f MiB)\n') | ||
Matt Mackall
|
r10282 | % (int(oldsize), oldsize / 1024 / 1024)) | ||
Benoit Boissinot
|
r10508 | ui.write(_('new file size: %12d bytes (%6.1f MiB)\n') | ||
Matt Mackall
|
r10282 | % (int(newsize), newsize / 1024 / 1024)) | ||
Greg Ward
|
r9515 | |||
shrink_percent = (oldsize - newsize) / oldsize * 100 | ||||
shrink_factor = oldsize / newsize | ||||
Benoit Boissinot
|
r10508 | ui.write(_('shrinkage: %.1f%% (%.1fx)\n') | ||
% (shrink_percent, shrink_factor)) | ||||
Greg Ward
|
r9515 | |||
Dirkjan Ochtman
|
r10215 | def shrink(ui, repo, **opts): | ||
""" | ||||
Shrink revlog by re-ordering revisions. Will operate on manifest for | ||||
the given repository if no other revlog is specified.""" | ||||
Greg Ward
|
r9515 | |||
Dirkjan Ochtman
|
r10215 | if not repo.local(): | ||
Benoit Boissinot
|
r10508 | raise util.Abort(_('not a local repository: %s') % repo.root) | ||
Greg Ward
|
r9515 | |||
Dirkjan Ochtman
|
r10215 | fn = opts.get('revlog') | ||
if not fn: | ||||
Greg Ward
|
r9515 | indexfn = repo.sjoin('00manifest.i') | ||
else: | ||||
Dirkjan Ochtman
|
r10215 | if not fn.endswith('.i'): | ||
Benoit Boissinot
|
r10508 | raise util.Abort(_('--revlog option must specify the revlog index ' | ||
'file (*.i), not %s') % opts.get('revlog')) | ||||
Greg Ward
|
r9515 | |||
Dirkjan Ochtman
|
r10215 | indexfn = os.path.realpath(fn) | ||
Greg Ward
|
r9515 | store = repo.sjoin('') | ||
if not indexfn.startswith(store): | ||||
Benoit Boissinot
|
r10508 | raise util.Abort(_('--revlog option must specify a revlog in %s, ' | ||
'not %s') % (store, indexfn)) | ||||
Greg Ward
|
r9515 | |||
if not os.path.exists(indexfn): | ||||
Benoit Boissinot
|
r10508 | raise util.Abort(_('no such file: %s') % indexfn) | ||
Greg Ward
|
r9515 | if '00changelog' in indexfn: | ||
Benoit Boissinot
|
r10508 | raise util.Abort(_('shrinking the changelog ' | ||
'will corrupt your repository')) | ||||
Benoit Boissinot
|
r10542 | |||
ui.write(_('shrinking %s\n') % indexfn) | ||||
prefix = os.path.basename(indexfn)[:-1] | ||||
(tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn), | ||||
prefix=prefix, | ||||
suffix='.i') | ||||
os.close(tmpfd) | ||||
r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn) | ||||
r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn) | ||||
datafn, tmpdatafn = r1.datafile, r2.datafile | ||||
Greg Ward
|
r9515 | |||
oldindexfn = indexfn + '.old' | ||||
olddatafn = datafn + '.old' | ||||
if os.path.exists(oldindexfn) or os.path.exists(olddatafn): | ||||
Benoit Boissinot
|
r10508 | raise util.Abort(_('one or both of\n' | ||
' %s\n' | ||||
' %s\n' | ||||
'exists from a previous run; please clean up ' | ||||
'before running again') % (oldindexfn, olddatafn)) | ||||
Greg Ward
|
r9515 | |||
# Don't use repo.transaction(), because then things get hairy with | ||||
# paths: some need to be relative to .hg, and some need to be | ||||
Dirkjan Ochtman
|
r10216 | # absolute. Doing it this way keeps things simple: everything is an | ||
Greg Ward
|
r9515 | # absolute path. | ||
lock = repo.lock(wait=False) | ||||
Patrick Mezard
|
r10234 | tr = transaction.transaction(ui.warn, | ||
Greg Ward
|
r9515 | open, | ||
repo.sjoin('journal')) | ||||
Benoit Boissinot
|
r10542 | def ignoremissing(func): | ||
def f(*args, **kw): | ||||
try: | ||||
return func(*args, **kw) | ||||
except OSError, inst: | ||||
if inst.errno != errno.ENOENT: | ||||
raise | ||||
return f | ||||
Greg Ward
|
r9515 | try: | ||
try: | ||||
Dirkjan Ochtman
|
r10213 | order = toposort(ui, r1) | ||
writerevs(ui, r1, r2, order, tr) | ||||
Benoit Boissinot
|
r10542 | report(ui, r1, r2) | ||
Greg Ward
|
r9515 | tr.close() | ||
except: | ||||
# Abort transaction first, so we truncate the files before | ||||
# deleting them. | ||||
tr.abort() | ||||
Benoit Boissinot
|
r10542 | for fn in (tmpindexfn, tmpdatafn): | ||
ignoremissing(os.unlink)(fn) | ||||
Greg Ward
|
r9515 | raise | ||
Patrick Mezard
|
r10241 | if not opts.get('dry_run'): | ||
Benoit Boissinot
|
r10542 | # racy, both files cannot be renamed atomically | ||
# copy files | ||||
Patrick Mezard
|
r10241 | util.os_link(indexfn, oldindexfn) | ||
Benoit Boissinot
|
r10542 | ignoremissing(util.os_link)(datafn, olddatafn) | ||
# rename | ||||
Patrick Mezard
|
r10241 | util.rename(tmpindexfn, indexfn) | ||
Benoit Boissinot
|
r10542 | try: | ||
util.rename(tmpdatafn, datafn) | ||||
except OSError, inst: | ||||
if inst.errno != errno.ENOENT: | ||||
raise | ||||
ignoremissing(os.unlink)(datafn) | ||||
Patrick Mezard
|
r10241 | else: | ||
Benoit Boissinot
|
r10542 | for fn in (tmpindexfn, tmpdatafn): | ||
ignoremissing(os.unlink)(fn) | ||||
Greg Ward
|
r9515 | finally: | ||
lock.release() | ||||
Patrick Mezard
|
r10241 | if not opts.get('dry_run'): | ||
Benoit Boissinot
|
r10508 | ui.write(_('note: old revlog saved in:\n' | ||
' %s\n' | ||||
' %s\n' | ||||
'(You can delete those files when you are satisfied that your\n' | ||||
'repository is still sane. ' | ||||
'Running \'hg verify\' is strongly recommended.)\n') | ||||
Patrick Mezard
|
r10241 | % (oldindexfn, olddatafn)) | ||
Greg Ward
|
r9515 | |||
Dirkjan Ochtman
|
r10215 | cmdtable = { | ||
'shrink': (shrink, | ||||
Benoit Boissinot
|
r10508 | [('', 'revlog', '', _('index (.i) file of the revlog to shrink')), | ||
('n', 'dry-run', None, _('do not shrink, simulate only')), | ||||
Patrick Mezard
|
r10241 | ], | ||
Benoit Boissinot
|
r10508 | _('hg shrink [--revlog PATH]')) | ||
Dirkjan Ochtman
|
r10215 | } | ||
Greg Ward
|
r10236 | |||
if __name__ == "__main__": | ||||
Matt Mackall
|
r10282 | print "shrink-revlog.py is now an extension (see hg help extensions)" | ||