##// END OF EJS Templates
compression: introduce a `storage.revlog.zstd.level` configuration...
compression: introduce a `storage.revlog.zstd.level` configuration This option control the zstd compression level used when compressing revlog chunk. The usage of zstd for revlog compression has not graduated from experimental yet, but we intend to fix that soon. The option name for the compression level is more straight forward to pick, so this changesets comes first. Having a dedicated option for each compression engine is useful because they don't support the same range of values. I ran the same measurement as for the zlib compression level (in the parent changesets). The variation in repository size is stay mostly in the same (small) range. The "read/write" performance see smallish variation, but are overall much better than zlib. Write performance show the same tend of having better write performance for when reaching high-end compression. Again, we don't intend to change the default zstd compression level (currently: 3) in this series. However this is worth investigating in the future. The Performance comparison of zlib vs zstd is quite impressive. The repository size stay in the same range, but the performance are much better in all situations. Comparison summary ================== We are looking at: - performance range for zlib - performance range for zstd - comparison of default zstd (level-3) to default zlib (level 6) - comparison of the slowest zstd time to the fastest zlib time Read performance: ----------------- | zlib | zstd | cmp | f2s mercurial | 0.170159 - 0.189219 | 0.144127 - 0.149624 | 80% | 88% pypy | 2.679217 - 2.768691 | 1.532317 - 1.705044 | 60% | 63% netbeans | 122.477027 - 141.620281 | 72.996346 - 89.731560 | 58% | 73% mozilla | 147.867662 - 170.572118 | 91.700995 - 105.853099 | 56% | 71% Write performance: ------------------ | zlib | zstd | cmp | f2s mercurial | 53.250304 - 56.2936129 | 40.877025 - 45.677286 | 75% | 86% pypy | 460.721984 - 476.589918 | 270.545409 - 301.002219 | 63% | 65% netbeans | 520.560316 - 715.930400 | 370.356311 - 428.329652 | 55% | 82% mozilla | 739.803002 - 987.056093 | 505.152906 - 591.930683 | 57% | 80% Raw data -------- repo alg lvl .hg/store size 00manifest.d read write mercurial zlib 1 49,402,813 5,963,475 0.170159 53.250304 mercurial zlib 6 47,197,397 5,875,730 0.182820 56.264320 mercurial zlib 9 47,121,596 5,849,781 0.189219 56.293612 mercurial zstd 1 49,737,084 5,966,355 0.144127 40.877025 mercurial zstd 3 48,961,867 5,895,208 0.146376 42.268142 mercurial zstd 5 48,200,592 5,938,676 0.149624 43.162875 mercurial zstd 10 47,833,520 5,913,353 0.145185 44.012489 mercurial zstd 15 47,314,604 5,728,679 0.147686 45.677286 mercurial zstd 20 47,330,502 5,830,539 0.145789 45.025407 mercurial zstd 22 47,330,076 5,830,539 0.143996 44.690460 pypy zlib 1 370,830,572 28,462,425 2.679217 460.721984 pypy zlib 6 340,112,317 27,648,747 2.768691 467.537158 pypy zlib 9 338,360,736 27,639,003 2.763495 476.589918 pypy zstd 1 362,377,479 27,916,214 1.532317 270.545409 pypy zstd 3 354,137,693 27,905,988 1.686718 294.951509 pypy zstd 5 342,640,043 27,655,774 1.705044 301.002219 pypy zstd 10 334,224,327 27,164,493 1.567287 285.186239 pypy zstd 15 329,000,363 26,645,965 1.637729 299.561332 pypy zstd 20 324,534,039 26,199,547 1.526813 302.149827 pypy zstd 22 324,530,595 26,198,932 1.525718 307.821218 netbeans zlib 1 1,281,847,810 165,495,457 122.477027 520.560316 netbeans zlib 6 1,205,284,353 159,161,207 139.876147 715.930400 netbeans zlib 9 1,197,135,671 155,034,586 141.620281 678.297064 netbeans zstd 1 1,259,581,737 160,840,613 72.996346 370.356311 netbeans zstd 3 1,232,978,122 157,691,551 81.622317 396.733087 netbeans zstd 5 1,208,034,075 160,246,880 83.080549 364.342626 netbeans zstd 10 1,188,624,176 156,083,417 79.323935 403.594602 netbeans zstd 15 1,176,973,589 153,859,477 89.731560 428.329652 netbeans zstd 20 1,162,958,258 151,147,535 82.842667 392.335349 netbeans zstd 22 1,162,707,029 151,150,220 82.565695 402.840655 mozilla zlib 1 2,775,497,186 298,527,987 147.867662 751.263721 mozilla zlib 6 2,596,856,420 286,597,671 170.572118 987.056093 mozilla zlib 9 2,587,542,494 287,018,264 163.622338 739.803002 mozilla zstd 1 2,723,159,348 286,617,532 91.700995 570.042751 mozilla zstd 3 2,665,055,001 286,152,013 95.240155 561.412805 mozilla zstd 5 2,607,819,817 288,060,030 101.978048 505.152906 mozilla zstd 10 2,558,761,085 283,967,648 104.113481 497.771202 mozilla zstd 15 2,526,216,060 275,581,300 105.853099 591.930683 mozilla zstd 20 2,485,114,806 266,478,859 95.268795 576.515389 mozilla zstd 22 2,484,869,080 266,456,505 94.429282 572.785537

File last commit:

r41925:aaad36b8 default
r42211:bb271ec2 default
Show More
reposetup.py
393 lines | 17.3 KiB | text/x-python | PythonLexer
# Copyright 2009-2010 Gregory P. Ward
# Copyright 2009-2010 Intelerad Medical Systems Incorporated
# Copyright 2010-2011 Fog Creek Software
# Copyright 2010-2011 Unity Technologies
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
'''setup for largefiles repositories: reposetup'''
from __future__ import absolute_import
import copy
from mercurial.i18n import _
from mercurial import (
error,
localrepo,
match as matchmod,
scmutil,
)
from . import (
lfcommands,
lfutil,
)
def reposetup(ui, repo):
# wire repositories should be given new wireproto functions
# by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
if not repo.local():
return
class lfilesrepo(repo.__class__):
# the mark to examine whether "repo" object enables largefiles or not
_largefilesenabled = True
lfstatus = False
def status_nolfiles(self, *args, **kwargs):
return super(lfilesrepo, self).status(*args, **kwargs)
# When lfstatus is set, return a context that gives the names
# of largefiles instead of their corresponding standins and
# identifies the largefiles as always binary, regardless of
# their actual contents.
def __getitem__(self, changeid):
ctx = super(lfilesrepo, self).__getitem__(changeid)
if self.lfstatus:
class lfilesctx(ctx.__class__):
def files(self):
filenames = super(lfilesctx, self).files()
return [lfutil.splitstandin(f) or f for f in filenames]
def manifest(self):
man1 = super(lfilesctx, self).manifest()
class lfilesmanifest(man1.__class__):
def __contains__(self, filename):
orig = super(lfilesmanifest, self).__contains__
return (orig(filename) or
orig(lfutil.standin(filename)))
man1.__class__ = lfilesmanifest
return man1
def filectx(self, path, fileid=None, filelog=None):
orig = super(lfilesctx, self).filectx
try:
if filelog is not None:
result = orig(path, fileid, filelog)
else:
result = orig(path, fileid)
except error.LookupError:
# Adding a null character will cause Mercurial to
# identify this as a binary file.
if filelog is not None:
result = orig(lfutil.standin(path), fileid,
filelog)
else:
result = orig(lfutil.standin(path), fileid)
olddata = result.data
result.data = lambda: olddata() + '\0'
return result
ctx.__class__ = lfilesctx
return ctx
# Figure out the status of big files and insert them into the
# appropriate list in the result. Also removes standin files
# from the listing. Revert to the original status if
# self.lfstatus is False.
# XXX large file status is buggy when used on repo proxy.
# XXX this needs to be investigated.
@localrepo.unfilteredmethod
def status(self, node1='.', node2=None, match=None, ignored=False,
clean=False, unknown=False, listsubrepos=False):
listignored, listclean, listunknown = ignored, clean, unknown
orig = super(lfilesrepo, self).status
if not self.lfstatus:
return orig(node1, node2, match, listignored, listclean,
listunknown, listsubrepos)
# some calls in this function rely on the old version of status
self.lfstatus = False
ctx1 = self[node1]
ctx2 = self[node2]
working = ctx2.rev() is None
parentworking = working and ctx1 == self['.']
if match is None:
match = matchmod.always()
wlock = None
try:
try:
# updating the dirstate is optional
# so we don't wait on the lock
wlock = self.wlock(False)
except error.LockError:
pass
# First check if paths or patterns were specified on the
# command line. If there were, and they don't match any
# largefiles, we should just bail here and let super
# handle it -- thus gaining a big performance boost.
lfdirstate = lfutil.openlfdirstate(ui, self)
if not match.always():
for f in lfdirstate:
if match(f):
break
else:
return orig(node1, node2, match, listignored, listclean,
listunknown, listsubrepos)
# Create a copy of match that matches standins instead
# of largefiles.
def tostandins(files):
if not working:
return files
newfiles = []
dirstate = self.dirstate
for f in files:
sf = lfutil.standin(f)
if sf in dirstate:
newfiles.append(sf)
elif dirstate.hasdir(sf):
# Directory entries could be regular or
# standin, check both
newfiles.extend((f, sf))
else:
newfiles.append(f)
return newfiles
m = copy.copy(match)
m._files = tostandins(m._files)
result = orig(node1, node2, m, ignored, clean, unknown,
listsubrepos)
if working:
def sfindirstate(f):
sf = lfutil.standin(f)
dirstate = self.dirstate
return sf in dirstate or dirstate.hasdir(sf)
match._files = [f for f in match._files
if sfindirstate(f)]
# Don't waste time getting the ignored and unknown
# files from lfdirstate
unsure, s = lfdirstate.status(match, subrepos=[],
ignored=False,
clean=listclean,
unknown=False)
(modified, added, removed, deleted, clean) = (
s.modified, s.added, s.removed, s.deleted, s.clean)
if parentworking:
for lfile in unsure:
standin = lfutil.standin(lfile)
if standin not in ctx1:
# from second parent
modified.append(lfile)
elif (lfutil.readasstandin(ctx1[standin])
!= lfutil.hashfile(self.wjoin(lfile))):
modified.append(lfile)
else:
if listclean:
clean.append(lfile)
lfdirstate.normal(lfile)
else:
tocheck = unsure + modified + added + clean
modified, added, clean = [], [], []
checkexec = self.dirstate._checkexec
for lfile in tocheck:
standin = lfutil.standin(lfile)
if standin in ctx1:
abslfile = self.wjoin(lfile)
if ((lfutil.readasstandin(ctx1[standin]) !=
lfutil.hashfile(abslfile)) or
(checkexec and
('x' in ctx1.flags(standin)) !=
bool(lfutil.getexecutable(abslfile)))):
modified.append(lfile)
elif listclean:
clean.append(lfile)
else:
added.append(lfile)
# at this point, 'removed' contains largefiles
# marked as 'R' in the working context.
# then, largefiles not managed also in the target
# context should be excluded from 'removed'.
removed = [lfile for lfile in removed
if lfutil.standin(lfile) in ctx1]
# Standins no longer found in lfdirstate have been deleted
for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
lfile = lfutil.splitstandin(standin)
if not match(lfile):
continue
if lfile not in lfdirstate:
deleted.append(lfile)
# Sync "largefile has been removed" back to the
# standin. Removing a file as a side effect of
# running status is gross, but the alternatives (if
# any) are worse.
self.wvfs.unlinkpath(standin, ignoremissing=True)
# Filter result lists
result = list(result)
# Largefiles are not really removed when they're
# still in the normal dirstate. Likewise, normal
# files are not really removed if they are still in
# lfdirstate. This happens in merges where files
# change type.
removed = [f for f in removed
if f not in self.dirstate]
result[2] = [f for f in result[2]
if f not in lfdirstate]
lfiles = set(lfdirstate._map)
# Unknown files
result[4] = set(result[4]).difference(lfiles)
# Ignored files
result[5] = set(result[5]).difference(lfiles)
# combine normal files and largefiles
normals = [[fn for fn in filelist
if not lfutil.isstandin(fn)]
for filelist in result]
lfstatus = (modified, added, removed, deleted, [], [],
clean)
result = [sorted(list1 + list2)
for (list1, list2) in zip(normals, lfstatus)]
else: # not against working directory
result = [[lfutil.splitstandin(f) or f for f in items]
for items in result]
if wlock:
lfdirstate.write()
finally:
if wlock:
wlock.release()
self.lfstatus = True
return scmutil.status(*result)
def commitctx(self, ctx, *args, **kwargs):
node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
class lfilesctx(ctx.__class__):
def markcommitted(self, node):
orig = super(lfilesctx, self).markcommitted
return lfutil.markcommitted(orig, self, node)
ctx.__class__ = lfilesctx
return node
# Before commit, largefile standins have not had their
# contents updated to reflect the hash of their largefile.
# Do that here.
def commit(self, text="", user=None, date=None, match=None,
force=False, editor=False, extra=None):
if extra is None:
extra = {}
orig = super(lfilesrepo, self).commit
with self.wlock():
lfcommithook = self._lfcommithooks[-1]
match = lfcommithook(self, match)
result = orig(text=text, user=user, date=date, match=match,
force=force, editor=editor, extra=extra)
return result
def push(self, remote, force=False, revs=None, newbranch=False):
if remote.local():
missing = set(self.requirements) - remote.local().supported
if missing:
msg = _("required features are not"
" supported in the destination:"
" %s") % (', '.join(sorted(missing)))
raise error.Abort(msg)
return super(lfilesrepo, self).push(remote, force=force, revs=revs,
newbranch=newbranch)
# TODO: _subdirlfs should be moved into "lfutil.py", because
# it is referred only from "lfutil.updatestandinsbymatch"
def _subdirlfs(self, files, lfiles):
'''
Adjust matched file list
If we pass a directory to commit whose only committable files
are largefiles, the core commit code aborts before finding
the largefiles.
So we do the following:
For directories that only have largefiles as matches,
we explicitly add the largefiles to the match list and remove
the directory.
In other cases, we leave the match list unmodified.
'''
actualfiles = []
dirs = []
regulars = []
for f in files:
if lfutil.isstandin(f + '/'):
raise error.Abort(
_('file "%s" is a largefile standin') % f,
hint=('commit the largefile itself instead'))
# Scan directories
if self.wvfs.isdir(f):
dirs.append(f)
else:
regulars.append(f)
for f in dirs:
matcheddir = False
d = self.dirstate.normalize(f) + '/'
# Check for matched normal files
for mf in regulars:
if self.dirstate.normalize(mf).startswith(d):
actualfiles.append(f)
matcheddir = True
break
if not matcheddir:
# If no normal match, manually append
# any matching largefiles
for lf in lfiles:
if self.dirstate.normalize(lf).startswith(d):
actualfiles.append(lf)
if not matcheddir:
# There may still be normal files in the dir, so
# add a directory to the list, which
# forces status/dirstate to walk all files and
# call the match function on the matcher, even
# on case sensitive filesystems.
actualfiles.append('.')
matcheddir = True
# Nothing in dir, so readd it
# and let commit reject it
if not matcheddir:
actualfiles.append(f)
# Always add normal files
actualfiles += regulars
return actualfiles
repo.__class__ = lfilesrepo
# stack of hooks being executed before committing.
# only last element ("_lfcommithooks[-1]") is used for each committing.
repo._lfcommithooks = [lfutil.updatestandinsbymatch]
# Stack of status writer functions taking "*msg, **opts" arguments
# like "ui.status()". Only last element ("_lfstatuswriters[-1]")
# is used to write status out.
repo._lfstatuswriters = [ui.status]
def prepushoutgoinghook(pushop):
"""Push largefiles for pushop before pushing revisions."""
lfrevs = pushop.lfrevs
if lfrevs is None:
lfrevs = pushop.outgoing.missing
if lfrevs:
toupload = set()
addfunc = lambda fn, lfhash: toupload.add(lfhash)
lfutil.getlfilestoupload(pushop.repo, lfrevs,
addfunc)
lfcommands.uploadlfiles(ui, pushop.repo, pushop.remote, toupload)
repo.prepushoutgoinghooks.add("largefiles", prepushoutgoinghook)
def checkrequireslfiles(ui, repo, **kwargs):
if 'largefiles' not in repo.requirements and any(
lfutil.shortname+'/' in f[0] for f in repo.store.datafiles()):
repo.requirements.add('largefiles')
repo._writerequirements()
ui.setconfig('hooks', 'changegroup.lfiles', checkrequireslfiles,
'largefiles')
ui.setconfig('hooks', 'commit.lfiles', checkrequireslfiles, 'largefiles')