##// END OF EJS Templates
merge with default
merge with default

File last commit:

r10542:989b2a5e default
r10543:dd352616 merge stable
Show More
shrink-revlog.py
237 lines | 8.1 KiB | text/x-python | PythonLexer
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 #!/usr/bin/env python
"""\
Greg Ward
shrink-revlog: help/doc tweaks...
r10236 reorder a revlog (the manifest by default) to save space
Specifically, this topologically sorts the revisions in the revlog so that
revisions on the same branch are adjacent as much as possible. This is a
workaround for the fact that Mercurial computes deltas relative to the
Dirkjan Ochtman
contrib: small documentation fixes in shrink-revlog.py
r10216 previous revision rather than relative to a parent revision.
This is *not* safe to run on a changelog.
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 """
# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
Dirkjan Ochtman
contrib: small documentation fixes in shrink-revlog.py
r10216 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 # renamed by Greg Ward <greg at gerg.ca>.
# XXX would be nice to have a way to verify the repository after shrinking,
# e.g. by comparing "before" and "after" states of random changesets
# (maybe: export before, shrink, export after, diff).
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 import os, tempfile, errno
Benoit Boissinot
shrink-revlog: remove unneeded imports and useless code
r10509 from mercurial import revlog, transaction, node, util
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 from mercurial import changegroup
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 from mercurial.i18n import _
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 def toposort(ui, rl):
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
children = {}
root = []
# build children and roots
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.status(_('reading revs\n'))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
for i in rl:
Martin Geisler
progress: mark strings for translation
r10496 ui.progress(_('reading'), i, total=len(rl))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 children[i] = []
parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
# in case of duplicate parents
if len(parents) == 2 and parents[0] == parents[1]:
del parents[1]
for p in parents:
assert p in children
children[p].append(i)
if len(parents) == 0:
root.append(i)
finally:
Martin Geisler
progress: mark strings for translation
r10496 ui.progress(_('reading'), None, total=len(rl))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
# XXX this is a reimplementation of the 'branchsort' topo sort
# algorithm in hgext.convert.convcmd... would be nice not to duplicate
# the algorithm
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.status(_('sorting revs\n'))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 visit = root
ret = []
while visit:
i = visit.pop(0)
ret.append(i)
if i not in children:
# This only happens if some node's p1 == p2, which can
# happen in the manifest in certain circumstances.
continue
next = []
for c in children.pop(i):
parents_unseen = [p for p in rl.parentrevs(c)
if p != node.nullrev and p in children]
if len(parents_unseen) == 0:
next.append(c)
visit = next + visit
return ret
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 def writerevs(ui, r1, r2, order, tr):
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.status(_('writing revs\n'))
Benoit Boissinot
shrink: use progress API
r10440
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 count = [0]
def progress(*args):
Martin Geisler
progress: mark strings for translation
r10496 ui.progress(_('writing'), count[0], total=len(order))
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 count[0] += 1
order = [r1.node(r) for r in order]
# this is a bit ugly, but it works
lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
unlookup = lambda x: int(x, 10)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 group = util.chunkbuffer(r1.group(order, lookup, progress))
chunkiter = changegroup.chunkiter(group)
r2.addgroup(chunkiter, unlookup, tr)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 finally:
Martin Geisler
progress: mark strings for translation
r10496 ui.progress(_('writing'), None, len(order))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 def report(ui, r1, r2):
def getsize(r):
s = 0
for fn in (r.indexfile, r.datafile):
try:
s += os.stat(fn).st_size
except OSError, inst:
if inst.errno != errno.ENOENT:
raise
return s
oldsize = float(getsize(r1))
newsize = float(getsize(r2))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
kill trailing whitespace
r9712 # argh: have to pass an int to %d, because a float >= 2^32
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 # blows up under Python 2.5 or earlier
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
Matt Mackall
many, many trivial check-code fixups
r10282 % (int(oldsize), oldsize / 1024 / 1024))
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
Matt Mackall
many, many trivial check-code fixups
r10282 % (int(newsize), newsize / 1024 / 1024))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
shrink_percent = (oldsize - newsize) / oldsize * 100
shrink_factor = oldsize / newsize
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
% (shrink_percent, shrink_factor))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 def shrink(ui, repo, **opts):
"""
Shrink revlog by re-ordering revisions. Will operate on manifest for
the given repository if no other revlog is specified."""
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 if not repo.local():
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('not a local repository: %s') % repo.root)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 fn = opts.get('revlog')
if not fn:
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 indexfn = repo.sjoin('00manifest.i')
else:
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 if not fn.endswith('.i'):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('--revlog option must specify the revlog index '
'file (*.i), not %s') % opts.get('revlog'))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 indexfn = os.path.realpath(fn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 store = repo.sjoin('')
if not indexfn.startswith(store):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('--revlog option must specify a revlog in %s, '
'not %s') % (store, indexfn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
if not os.path.exists(indexfn):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('no such file: %s') % indexfn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 if '00changelog' in indexfn:
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('shrinking the changelog '
'will corrupt your repository'))
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542
ui.write(_('shrinking %s\n') % indexfn)
prefix = os.path.basename(indexfn)[:-1]
(tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
prefix=prefix,
suffix='.i')
os.close(tmpfd)
r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
datafn, tmpdatafn = r1.datafile, r2.datafile
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
oldindexfn = indexfn + '.old'
olddatafn = datafn + '.old'
if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('one or both of\n'
' %s\n'
' %s\n'
'exists from a previous run; please clean up '
'before running again') % (oldindexfn, olddatafn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
# Don't use repo.transaction(), because then things get hairy with
# paths: some need to be relative to .hg, and some need to be
Dirkjan Ochtman
contrib: small documentation fixes in shrink-revlog.py
r10216 # absolute. Doing it this way keeps things simple: everything is an
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 # absolute path.
lock = repo.lock(wait=False)
Patrick Mezard
shrink-revlog: make it work on windows (issue1976)
r10234 tr = transaction.transaction(ui.warn,
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 open,
repo.sjoin('journal'))
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 def ignoremissing(func):
def f(*args, **kw):
try:
return func(*args, **kw)
except OSError, inst:
if inst.errno != errno.ENOENT:
raise
return f
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
try:
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 order = toposort(ui, r1)
writerevs(ui, r1, r2, order, tr)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 report(ui, r1, r2)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 tr.close()
except:
# Abort transaction first, so we truncate the files before
# deleting them.
tr.abort()
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 for fn in (tmpindexfn, tmpdatafn):
ignoremissing(os.unlink)(fn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 raise
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 if not opts.get('dry_run'):
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 # racy, both files cannot be renamed atomically
# copy files
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 util.os_link(indexfn, oldindexfn)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 ignoremissing(util.os_link)(datafn, olddatafn)
# rename
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 util.rename(tmpindexfn, indexfn)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 try:
util.rename(tmpdatafn, datafn)
except OSError, inst:
if inst.errno != errno.ENOENT:
raise
ignoremissing(os.unlink)(datafn)
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 else:
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 for fn in (tmpindexfn, tmpdatafn):
ignoremissing(os.unlink)(fn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 finally:
lock.release()
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 if not opts.get('dry_run'):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.write(_('note: old revlog saved in:\n'
' %s\n'
' %s\n'
'(You can delete those files when you are satisfied that your\n'
'repository is still sane. '
'Running \'hg verify\' is strongly recommended.)\n')
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 % (oldindexfn, olddatafn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 cmdtable = {
'shrink': (shrink,
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 [('', 'revlog', '', _('index (.i) file of the revlog to shrink')),
('n', 'dry-run', None, _('do not shrink, simulate only')),
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 ],
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 _('hg shrink [--revlog PATH]'))
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 }
Greg Ward
shrink-revlog: help/doc tweaks...
r10236
if __name__ == "__main__":
Matt Mackall
many, many trivial check-code fixups
r10282 print "shrink-revlog.py is now an extension (see hg help extensions)"