##// END OF EJS Templates
contrib: fix error handling in shrink-revlog.py to be more hg-like
contrib: fix error handling in shrink-revlog.py to be more hg-like

File last commit:

r10214:8111f998 default
r10214:8111f998 default
Show More
shrink-revlog.py
221 lines | 7.7 KiB | text/x-python | PythonLexer
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 #!/usr/bin/env python
"""\
Reorder a revlog (by default the the manifest file in the current
repository) to save space. Specifically, this topologically sorts the
revisions in the revlog so that revisions on the same branch are adjacent
as much as possible. This is a workaround for the fact that Mercurial
computes deltas relative to the previous revision rather than relative to a
parent revision. This is *not* safe to run on a changelog.
"""
# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
# as a patch to rewrite-log. Cleaned up, refactored, documented, and
# renamed by Greg Ward <greg at gerg.ca>.
# XXX would be nice to have a way to verify the repository after shrinking,
# e.g. by comparing "before" and "after" states of random changesets
# (maybe: export before, shrink, export after, diff).
import sys, os, tempfile
import optparse
from mercurial import ui as ui_, hg, revlog, transaction, node, util
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 from mercurial import changegroup
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 def toposort(ui, rl):
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
children = {}
root = []
# build children and roots
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('reading %d revs ' % len(rl))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
for i in rl:
children[i] = []
parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
# in case of duplicate parents
if len(parents) == 2 and parents[0] == parents[1]:
del parents[1]
for p in parents:
assert p in children
children[p].append(i)
if len(parents) == 0:
root.append(i)
if i % 1000 == 0:
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('.')
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 finally:
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('\n')
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
# XXX this is a reimplementation of the 'branchsort' topo sort
# algorithm in hgext.convert.convcmd... would be nice not to duplicate
# the algorithm
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('sorting ...')
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 visit = root
ret = []
while visit:
i = visit.pop(0)
ret.append(i)
if i not in children:
# This only happens if some node's p1 == p2, which can
# happen in the manifest in certain circumstances.
continue
next = []
for c in children.pop(i):
parents_unseen = [p for p in rl.parentrevs(c)
if p != node.nullrev and p in children]
if len(parents_unseen) == 0:
next.append(c)
visit = next + visit
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('\n')
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 return ret
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 def writerevs(ui, r1, r2, order, tr):
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('writing %d revs ' % len(order))
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 count = [0]
def progress(*args):
if count[0] % 1000 == 0:
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('.')
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 count[0] += 1
order = [r1.node(r) for r in order]
# this is a bit ugly, but it works
lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
unlookup = lambda x: int(x, 10)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 group = util.chunkbuffer(r1.group(order, lookup, progress))
chunkiter = changegroup.chunkiter(group)
r2.addgroup(chunkiter, unlookup, tr)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 finally:
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('\n')
Dirkjan Ochtman
kill trailing whitespace
r9712
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 def report(ui, olddatafn, newdatafn):
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 oldsize = float(os.stat(olddatafn).st_size)
newsize = float(os.stat(newdatafn).st_size)
Dirkjan Ochtman
kill trailing whitespace
r9712 # argh: have to pass an int to %d, because a float >= 2^32
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 # blows up under Python 2.5 or earlier
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('old file size: %12d bytes (%6.1f MiB)\n'
% (int(oldsize), oldsize/1024/1024))
ui.write('new file size: %12d bytes (%6.1f MiB)\n'
% (int(newsize), newsize/1024/1024))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
shrink_percent = (oldsize - newsize) / oldsize * 100
shrink_factor = oldsize / newsize
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('shrinkage: %.1f%% (%.1fx)\n' % (shrink_percent, shrink_factor))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
def main():
# Unbuffer stdout for nice progress output.
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
parser = optparse.OptionParser(description=__doc__)
parser.add_option('-R', '--repository',
default=os.path.curdir,
metavar='REPO',
help='repository root directory [default: current dir]')
parser.add_option('--revlog',
metavar='FILE',
help='shrink FILE [default: REPO/hg/store/00manifest.i]')
(options, args) = parser.parse_args()
if args:
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('too many arguments')
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
# Open the specified repository.
ui = ui_.ui()
repo = hg.repository(ui, options.repository)
if not repo.local():
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('not a local repository: %s' % options.repository)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
if options.revlog is None:
indexfn = repo.sjoin('00manifest.i')
else:
if not options.revlog.endswith('.i'):
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('--revlog option must specify the revlog index '
'file (*.i), not %s' % options.revlog)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
indexfn = os.path.realpath(options.revlog)
store = repo.sjoin('')
if not indexfn.startswith(store):
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('--revlog option must specify a revlog in %s, '
'not %s' % (store, indexfn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
datafn = indexfn[:-2] + '.d'
if not os.path.exists(indexfn):
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('no such file: %s' % indexfn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 if '00changelog' in indexfn:
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('shrinking the changelog will corrupt your repository')
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 if not os.path.exists(datafn):
# This is just a lazy shortcut because I can't be bothered to
# handle all the special cases that entail from no .d file.
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('%s does not exist: revlog not big enough '
'to be worth shrinking' % datafn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
oldindexfn = indexfn + '.old'
olddatafn = datafn + '.old'
if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 raise util.Abort('one or both of\n'
' %s\n'
' %s\n'
'exists from a previous run; please clean up before '
'running again' % (oldindexfn, olddatafn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('shrinking %s\n' % indexfn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 prefix = os.path.basename(indexfn)[:-1]
(tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
prefix=prefix,
suffix='.i')
tmpdatafn = tmpindexfn[:-2] + '.d'
os.close(tmpfd)
r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
# Don't use repo.transaction(), because then things get hairy with
# paths: some need to be relative to .hg, and some need to be
# absolute. Doing it this way keeps things simple: everything is an
# absolute path.
lock = repo.lock(wait=False)
tr = transaction.transaction(sys.stderr.write,
open,
repo.sjoin('journal'))
try:
try:
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 order = toposort(ui, r1)
writerevs(ui, r1, r2, order, tr)
report(ui, datafn, tmpdatafn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 tr.close()
except:
# Abort transaction first, so we truncate the files before
# deleting them.
tr.abort()
if os.path.exists(tmpindexfn):
os.unlink(tmpindexfn)
if os.path.exists(tmpdatafn):
os.unlink(tmpdatafn)
raise
finally:
lock.release()
os.link(indexfn, oldindexfn)
os.link(datafn, olddatafn)
os.rename(tmpindexfn, indexfn)
os.rename(tmpdatafn, datafn)
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 ui.write('note: old revlog saved in:\n'
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 ' %s\n'
' %s\n'
'(You can delete those files when you are satisfied that your\n'
'repository is still sane. '
'Running \'hg verify\' is strongly recommended.)\n'
% (oldindexfn, olddatafn))
try:
main()
Dirkjan Ochtman
contrib: fix error handling in shrink-revlog.py to be more hg-like
r10214 except util.Abort, inst:
print inst.args[0]
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 except KeyboardInterrupt:
sys.exit("interrupted")