##// END OF EJS Templates
changegroup: decompress GZ algorithm in larger chunks for better performance
changegroup: decompress GZ algorithm in larger chunks for better performance

File last commit:

r16306:d76b9abd default
r16557:9dba5536 stable
Show More
shrink-revlog.py
294 lines | 9.5 KiB | text/x-python | PythonLexer
Augie Fackler
shrink-revlog: remove \ from docstring
r14028 """reorder a revlog (the manifest by default) to save space
Greg Ward
shrink-revlog: help/doc tweaks...
r10236
Specifically, this topologically sorts the revisions in the revlog so that
revisions on the same branch are adjacent as much as possible. This is a
workaround for the fact that Mercurial computes deltas relative to the
Dirkjan Ochtman
contrib: small documentation fixes in shrink-revlog.py
r10216 previous revision rather than relative to a parent revision.
This is *not* safe to run on a changelog.
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 """
# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
Dirkjan Ochtman
contrib: small documentation fixes in shrink-revlog.py
r10216 # as a patch to rewrite-log. Cleaned up, refactored, documented, and
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 # renamed by Greg Ward <greg at gerg.ca>.
# XXX would be nice to have a way to verify the repository after shrinking,
# e.g. by comparing "before" and "after" states of random changesets
# (maybe: export before, shrink, export after, diff).
Greg Ward
shrink-revlog: make pyflakes happy
r16305 import os, errno
Augie Fackler
shrink-revlog: update util.opener to scmutil.opener after d13913355390
r14029 from mercurial import revlog, transaction, node, util, scmutil
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 from mercurial import changegroup
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 from mercurial.i18n import _
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Benoit Boissinot
shrink-revlog: factor out postorder algorithm
r10627 def postorder(start, edges):
result = []
visit = list(start)
finished = set()
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Benoit Boissinot
shrink-revlog: factor out postorder algorithm
r10627 while visit:
cur = visit[-1]
for p in edges[cur]:
Augie Fackler
shrink-revlog: defend against null first parents
r14034 # defend against node.nullrev because it's occasionally
# possible for a node to have parents (null, something)
# rather than (something, null)
if p not in finished and p != node.nullrev:
Benoit Boissinot
shrink-revlog: factor out postorder algorithm
r10627 visit.append(p)
break
else:
result.append(cur)
finished.add(cur)
visit.pop()
return result
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Greg Ward
shrink-revlog: add "reverse postorder" and "postorder reverse" toposorts....
r10623 def toposort_reversepostorder(ui, rl):
# postorder of the reverse directed graph
# map rev to list of parent revs (p2 first)
parents = {}
heads = set()
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.status(_('reading revs\n'))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
Greg Ward
shrink-revlog: add "reverse postorder" and "postorder reverse" toposorts....
r10623 for rev in rl:
ui.progress(_('reading'), rev, total=len(rl))
(p1, p2) = rl.parentrevs(rev)
if p1 == p2 == node.nullrev:
parents[rev] = () # root node
elif p1 == p2 or p2 == node.nullrev:
parents[rev] = (p1,) # normal node
else:
parents[rev] = (p2, p1) # merge node
heads.add(rev)
for p in parents[rev]:
heads.discard(p)
finally:
Matt Mackall
progress: drop extra args for pos=None calls (issue2087)
r10724 ui.progress(_('reading'), None)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Benoit Boissinot
shrink-revlog: factor out postorder algorithm
r10627 heads = list(heads)
heads.sort(reverse=True)
Greg Ward
shrink-revlog: add "reverse postorder" and "postorder reverse" toposorts....
r10623
Benoit Boissinot
shrink-revlog: factor out postorder algorithm
r10627 ui.status(_('sorting revs\n'))
return postorder(heads, parents)
Greg Ward
shrink-revlog: add "reverse postorder" and "postorder reverse" toposorts....
r10623
def toposort_postorderreverse(ui, rl):
# reverse-postorder of the reverse directed graph
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Greg Ward
shrink-revlog: add "reverse postorder" and "postorder reverse" toposorts....
r10623 children = {}
roots = set()
ui.status(_('reading revs\n'))
try:
for rev in rl:
ui.progress(_('reading'), rev, total=len(rl))
(p1, p2) = rl.parentrevs(rev)
if p1 == p2 == node.nullrev:
roots.add(rev)
children[rev] = []
if p1 != node.nullrev:
children[p1].append(rev)
if p2 != node.nullrev:
children[p2].append(rev)
finally:
Matt Mackall
progress: drop extra args for pos=None calls (issue2087)
r10724 ui.progress(_('reading'), None)
Greg Ward
shrink-revlog: add "reverse postorder" and "postorder reverse" toposorts....
r10623
Pradeepkumar Gayam
shrink-repo: wrong variable name
r11298 roots = list(roots)
Benoit Boissinot
shrink-revlog: factor out postorder algorithm
r10627 roots.sort()
Greg Ward
shrink-revlog: add accounting of suboptimal nodes to the new algorithms.
r10624
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.status(_('sorting revs\n'))
Benoit Boissinot
shrink-revlog: factor out postorder algorithm
r10627 result = postorder(roots, children)
Greg Ward
shrink-revlog: add "reverse postorder" and "postorder reverse" toposorts....
r10623 result.reverse()
return result
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 def writerevs(ui, r1, r2, order, tr):
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.status(_('writing revs\n'))
Benoit Boissinot
shrink: use progress API
r10440
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009
order = [r1.node(r) for r in order]
# this is a bit ugly, but it works
Matt Mackall
changegroup: fold progress meter into callbacks...
r13783 count = [0]
Augie Fackler
shrink-revlog: use a bundler object (see d69c9510d648)
r14030 def lookup(revl, x):
Matt Mackall
changegroup: fold progress meter into callbacks...
r13783 count[0] += 1
ui.progress(_('writing'), count[0], total=len(order))
Augie Fackler
shrink-revlog: use a bundler object (see d69c9510d648)
r14030 return "%020d" % revl.linkrev(revl.rev(x))
Matt Mackall
changegroup: combine infocollect and lookup callbacks
r13782
Benoit Boissinot
shrink-revlog: improve performance: use changegroup instead of revisions...
r10009 unlookup = lambda x: int(x, 10)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
Augie Fackler
shrink-revlog: use a bundler object (see d69c9510d648)
r14030 bundler = changegroup.bundle10(lookup)
group = util.chunkbuffer(r1.group(order, bundler))
Matt Mackall
bundle: fix shrink-revlog bundle usage
r12348 group = changegroup.unbundle10(group, "UN")
Matt Mackall
bundle: get rid of chunkiter
r12335 r2.addgroup(group, unlookup, tr)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 finally:
Matt Mackall
progress: drop extra args for pos=None calls (issue2087)
r10724 ui.progress(_('writing'), None)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 def report(ui, r1, r2):
def getsize(r):
s = 0
for fn in (r.indexfile, r.datafile):
try:
s += os.stat(fn).st_size
except OSError, inst:
if inst.errno != errno.ENOENT:
raise
return s
oldsize = float(getsize(r1))
newsize = float(getsize(r2))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
kill trailing whitespace
r9712 # argh: have to pass an int to %d, because a float >= 2^32
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 # blows up under Python 2.5 or earlier
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
Matt Mackall
many, many trivial check-code fixups
r10282 % (int(oldsize), oldsize / 1024 / 1024))
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
Matt Mackall
many, many trivial check-code fixups
r10282 % (int(newsize), newsize / 1024 / 1024))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
shrink_percent = (oldsize - newsize) / oldsize * 100
shrink_factor = oldsize / newsize
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
% (shrink_percent, shrink_factor))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 def shrink(ui, repo, **opts):
Greg Ward
shrink-revlog: add --sort option for user-selectable toposort algorithm.
r10622 """shrink a revlog by reordering revisions
Rewrites all the entries in some revlog of the current repository
(by default, the manifest log) to save space.
Different sort algorithms have different performance
characteristics. Use ``--sort`` to select a sort algorithm so you
Benoit Boissinot
shrink-revlog: remove branchsort algorithm (it behaves poorly)
r10625 can determine which works best for your data.
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 """
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 if not repo.local():
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('not a local repository: %s') % repo.root)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 fn = opts.get('revlog')
if not fn:
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 indexfn = repo.sjoin('00manifest.i')
else:
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 if not fn.endswith('.i'):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('--revlog option must specify the revlog index '
'file (*.i), not %s') % opts.get('revlog'))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Matt Mackall
backout dbdb777502dc (issue3077) (issue3071)...
r15381 indexfn = os.path.realpath(fn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 store = repo.sjoin('')
if not indexfn.startswith(store):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('--revlog option must specify a revlog in %s, '
'not %s') % (store, indexfn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Greg Ward
shrink-revlog: add --sort option for user-selectable toposort algorithm.
r10622 sortname = opts['sort']
try:
toposort = globals()['toposort_' + sortname]
except KeyError:
raise util.Abort(_('no such toposort algorithm: %s') % sortname)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 if not os.path.exists(indexfn):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('no such file: %s') % indexfn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 if '00changelog' in indexfn:
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('shrinking the changelog '
'will corrupt your repository'))
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542
ui.write(_('shrinking %s\n') % indexfn)
Greg Ward
shrink-revlog: use util.mktempcopy() to preserve mode of index file....
r11294 tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542
Augie Fackler
shrink-revlog: update util.opener to scmutil.opener after d13913355390
r14029 r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542
datafn, tmpdatafn = r1.datafile, r2.datafile
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
oldindexfn = indexfn + '.old'
olddatafn = datafn + '.old'
if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 raise util.Abort(_('one or both of\n'
' %s\n'
' %s\n'
'exists from a previous run; please clean up '
'before running again') % (oldindexfn, olddatafn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
# Don't use repo.transaction(), because then things get hairy with
# paths: some need to be relative to .hg, and some need to be
Dirkjan Ochtman
contrib: small documentation fixes in shrink-revlog.py
r10216 # absolute. Doing it this way keeps things simple: everything is an
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 # absolute path.
lock = repo.lock(wait=False)
Patrick Mezard
shrink-revlog: make it work on windows (issue1976)
r10234 tr = transaction.transaction(ui.warn,
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 open,
repo.sjoin('journal'))
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 def ignoremissing(func):
def f(*args, **kw):
try:
return func(*args, **kw)
except OSError, inst:
if inst.errno != errno.ENOENT:
raise
return f
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 try:
try:
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 order = toposort(ui, r1)
Benoit Boissinot
shrink-revlog: factor out suboptimal computation
r10626
suboptimal = 0
for i in xrange(1, len(order)):
parents = [p for p in r1.parentrevs(order[i])
if p != node.nullrev]
Martin Geisler
shrink-revlog: add missing whitespace in expression
r10655 if parents and order[i - 1] not in parents:
Benoit Boissinot
shrink-revlog: factor out suboptimal computation
r10626 suboptimal += 1
ui.note(_('%d suboptimal nodes\n') % suboptimal)
Dirkjan Ochtman
contrib: use ui to write in shrink-revlog.py
r10213 writerevs(ui, r1, r2, order, tr)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 report(ui, r1, r2)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 tr.close()
except:
# Abort transaction first, so we truncate the files before
# deleting them.
tr.abort()
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 for fn in (tmpindexfn, tmpdatafn):
ignoremissing(os.unlink)(fn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 raise
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 if not opts.get('dry_run'):
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 # racy, both files cannot be renamed atomically
# copy files
Adrian Buehlmann
rename util.os_link to oslink
r14235 util.oslink(indexfn, oldindexfn)
ignoremissing(util.oslink)(datafn, olddatafn)
Greg Ward
shrink-revlog: preserve mode of the shrunken index and data file....
r11267
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 # rename
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 util.rename(tmpindexfn, indexfn)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 try:
Greg Ward
shrink-revlog: preserve mode of the shrunken index and data file....
r11267 os.chmod(tmpdatafn, os.stat(datafn).st_mode)
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 util.rename(tmpdatafn, datafn)
except OSError, inst:
if inst.errno != errno.ENOENT:
raise
ignoremissing(os.unlink)(datafn)
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 else:
Benoit Boissinot
shrink: handle all combinations of inline/non-inline revlogs
r10542 for fn in (tmpindexfn, tmpdatafn):
ignoremissing(os.unlink)(fn)
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515 finally:
lock.release()
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 if not opts.get('dry_run'):
Greg Ward
shrink-revlog: make check-code happier...
r16306 ui.write(
_('note: old revlog saved in:\n'
' %s\n'
' %s\n'
'(You can delete those files when you are satisfied that your\n'
'repository is still sane. '
'Running \'hg verify\' is strongly recommended.)\n')
% (oldindexfn, olddatafn))
Greg Ward
Add script to rewrite revlog to workaround lack of parent deltas....
r9515
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 cmdtable = {
'shrink': (shrink,
Greg Ward
shrink-revlog: make check-code happier...
r16306 [('', 'revlog', '',
_('the revlog to shrink (.i)')),
('n', 'dry-run', None,
_('do not shrink, simulate only')),
('', 'sort', 'reversepostorder',
_('name of sort algorithm to use')),
Patrick Mezard
shrink-revlog: add --dry-run option
r10241 ],
Benoit Boissinot
shrink-revlog: add strings for translation / import _ before using it
r10508 _('hg shrink [--revlog PATH]'))
Dirkjan Ochtman
contrib: turn shrink-revlog.py into an extension
r10215 }
Greg Ward
shrink-revlog: help/doc tweaks...
r10236
if __name__ == "__main__":
Matt Mackall
many, many trivial check-code fixups
r10282 print "shrink-revlog.py is now an extension (see hg help extensions)"