test-remotefilelog-datapack.py
379 lines
| 12.1 KiB
| text/x-python
|
PythonLexer
/ tests / test-remotefilelog-datapack.py
Augie Fackler
|
r40530 | #!/usr/bin/env python | ||
from __future__ import absolute_import, print_function | ||||
import hashlib | ||||
import os | ||||
import random | ||||
import shutil | ||||
import stat | ||||
import struct | ||||
import sys | ||||
import tempfile | ||||
import time | ||||
import unittest | ||||
import silenttestrunner | ||||
# Load the local remotefilelog, not the system one | ||||
sys.path[0:0] = [os.path.join(os.path.dirname(__file__), '..')] | ||||
from mercurial.node import nullid | ||||
from mercurial import ( | ||||
Augie Fackler
|
r41290 | pycompat, | ||
Augie Fackler
|
r40530 | ui as uimod, | ||
) | ||||
from hgext.remotefilelog import ( | ||||
basepack, | ||||
constants, | ||||
datapack, | ||||
) | ||||
class datapacktestsbase(object): | ||||
def __init__(self, datapackreader, paramsavailable): | ||||
self.datapackreader = datapackreader | ||||
self.paramsavailable = paramsavailable | ||||
def setUp(self): | ||||
self.tempdirs = [] | ||||
def tearDown(self): | ||||
for d in self.tempdirs: | ||||
shutil.rmtree(d) | ||||
def makeTempDir(self): | ||||
tempdir = tempfile.mkdtemp() | ||||
self.tempdirs.append(tempdir) | ||||
return tempdir | ||||
def getHash(self, content): | ||||
return hashlib.sha1(content).digest() | ||||
def getFakeHash(self): | ||||
return ''.join(chr(random.randint(0, 255)) for _ in range(20)) | ||||
Augie Fackler
|
r40541 | def createPack(self, revisions=None, packdir=None): | ||
Augie Fackler
|
r40530 | if revisions is None: | ||
Gregory Szorc
|
r41612 | revisions = [(b"filename", self.getFakeHash(), nullid, b"content")] | ||
Augie Fackler
|
r40530 | |||
if packdir is None: | ||||
packdir = self.makeTempDir() | ||||
Augie Fackler
|
r40541 | packer = datapack.mutabledatapack(uimod.ui(), packdir, version=2) | ||
Augie Fackler
|
r40530 | |||
for args in revisions: | ||||
filename, node, base, content = args[0:4] | ||||
# meta is optional | ||||
meta = None | ||||
if len(args) > 4: | ||||
meta = args[4] | ||||
packer.add(filename, node, base, content, metadata=meta) | ||||
path = packer.close() | ||||
return self.datapackreader(path) | ||||
def _testAddSingle(self, content): | ||||
"""Test putting a simple blob into a pack and reading it out. | ||||
""" | ||||
Gregory Szorc
|
r41612 | filename = b"foo" | ||
Augie Fackler
|
r40530 | node = self.getHash(content) | ||
revisions = [(filename, node, nullid, content)] | ||||
pack = self.createPack(revisions) | ||||
if self.paramsavailable: | ||||
Gregory Szorc
|
r41351 | self.assertEqual(pack.params.fanoutprefix, | ||
basepack.SMALLFANOUTPREFIX) | ||||
Augie Fackler
|
r40530 | |||
chain = pack.getdeltachain(filename, node) | ||||
Gregory Szorc
|
r41351 | self.assertEqual(content, chain[0][4]) | ||
Augie Fackler
|
r40530 | |||
def testAddSingle(self): | ||||
Gregory Szorc
|
r41612 | self._testAddSingle(b'') | ||
Augie Fackler
|
r40530 | |||
def testAddSingleEmpty(self): | ||||
Gregory Szorc
|
r41612 | self._testAddSingle(b'abcdef') | ||
Augie Fackler
|
r40530 | |||
def testAddMultiple(self): | ||||
"""Test putting multiple unrelated blobs into a pack and reading them | ||||
out. | ||||
""" | ||||
revisions = [] | ||||
for i in range(10): | ||||
Gregory Szorc
|
r41612 | filename = b"foo%d" % i | ||
content = b"abcdef%d" % i | ||||
Augie Fackler
|
r40530 | node = self.getHash(content) | ||
revisions.append((filename, node, self.getFakeHash(), content)) | ||||
pack = self.createPack(revisions) | ||||
for filename, node, base, content in revisions: | ||||
entry = pack.getdelta(filename, node) | ||||
Gregory Szorc
|
r41351 | self.assertEqual((content, filename, base, {}), entry) | ||
Augie Fackler
|
r40530 | |||
chain = pack.getdeltachain(filename, node) | ||||
Gregory Szorc
|
r41351 | self.assertEqual(content, chain[0][4]) | ||
Augie Fackler
|
r40530 | |||
def testAddDeltas(self): | ||||
"""Test putting multiple delta blobs into a pack and read the chain. | ||||
""" | ||||
revisions = [] | ||||
Gregory Szorc
|
r41612 | filename = b"foo" | ||
Augie Fackler
|
r40530 | lastnode = nullid | ||
for i in range(10): | ||||
Gregory Szorc
|
r41612 | content = b"abcdef%d" % i | ||
Augie Fackler
|
r40530 | node = self.getHash(content) | ||
revisions.append((filename, node, lastnode, content)) | ||||
lastnode = node | ||||
pack = self.createPack(revisions) | ||||
entry = pack.getdelta(filename, revisions[0][1]) | ||||
realvalue = (revisions[0][3], filename, revisions[0][2], {}) | ||||
Gregory Szorc
|
r41351 | self.assertEqual(entry, realvalue) | ||
Augie Fackler
|
r40530 | |||
# Test that the chain for the final entry has all the others | ||||
chain = pack.getdeltachain(filename, node) | ||||
for i in range(10): | ||||
Gregory Szorc
|
r41612 | content = b"abcdef%d" % i | ||
Gregory Szorc
|
r41351 | self.assertEqual(content, chain[-i - 1][4]) | ||
Augie Fackler
|
r40530 | |||
def testPackMany(self): | ||||
"""Pack many related and unrelated objects. | ||||
""" | ||||
# Build a random pack file | ||||
revisions = [] | ||||
blobs = {} | ||||
random.seed(0) | ||||
for i in range(100): | ||||
Gregory Szorc
|
r41612 | filename = b"filename-%d" % i | ||
Augie Fackler
|
r40530 | filerevs = [] | ||
for j in range(random.randint(1, 100)): | ||||
Gregory Szorc
|
r41612 | content = b"content-%d" % j | ||
Augie Fackler
|
r40530 | node = self.getHash(content) | ||
lastnode = nullid | ||||
if len(filerevs) > 0: | ||||
lastnode = filerevs[random.randint(0, len(filerevs) - 1)] | ||||
filerevs.append(node) | ||||
blobs[(filename, node, lastnode)] = content | ||||
revisions.append((filename, node, lastnode, content)) | ||||
pack = self.createPack(revisions) | ||||
# Verify the pack contents | ||||
Gregory Szorc
|
r41613 | for (filename, node, lastnode), content in sorted(blobs.items()): | ||
Augie Fackler
|
r40530 | chain = pack.getdeltachain(filename, node) | ||
for entry in chain: | ||||
expectedcontent = blobs[(entry[0], entry[1], entry[3])] | ||||
Gregory Szorc
|
r41351 | self.assertEqual(entry[4], expectedcontent) | ||
Augie Fackler
|
r40530 | |||
def testPackMetadata(self): | ||||
revisions = [] | ||||
for i in range(100): | ||||
Gregory Szorc
|
r41612 | filename = b'%d.txt' % i | ||
content = b'put-something-here \n' * i | ||||
Augie Fackler
|
r40530 | node = self.getHash(content) | ||
meta = {constants.METAKEYFLAG: i ** 4, | ||||
constants.METAKEYSIZE: len(content), | ||||
Gregory Szorc
|
r41612 | b'Z': b'random_string', | ||
b'_': b'\0' * i} | ||||
Augie Fackler
|
r40530 | revisions.append((filename, node, nullid, content, meta)) | ||
Augie Fackler
|
r40541 | pack = self.createPack(revisions) | ||
Augie Fackler
|
r40530 | for name, node, x, content, origmeta in revisions: | ||
parsedmeta = pack.getmeta(name, node) | ||||
# flag == 0 should be optimized out | ||||
if origmeta[constants.METAKEYFLAG] == 0: | ||||
del origmeta[constants.METAKEYFLAG] | ||||
Gregory Szorc
|
r41351 | self.assertEqual(parsedmeta, origmeta) | ||
Augie Fackler
|
r40530 | |||
def testGetMissing(self): | ||||
"""Test the getmissing() api. | ||||
""" | ||||
revisions = [] | ||||
Gregory Szorc
|
r41612 | filename = b"foo" | ||
Augie Fackler
|
r40530 | lastnode = nullid | ||
for i in range(10): | ||||
Gregory Szorc
|
r41612 | content = b"abcdef%d" % i | ||
Augie Fackler
|
r40530 | node = self.getHash(content) | ||
revisions.append((filename, node, lastnode, content)) | ||||
lastnode = node | ||||
pack = self.createPack(revisions) | ||||
Gregory Szorc
|
r41612 | missing = pack.getmissing([(b"foo", revisions[0][1])]) | ||
Augie Fackler
|
r40530 | self.assertFalse(missing) | ||
Gregory Szorc
|
r41612 | missing = pack.getmissing([(b"foo", revisions[0][1]), | ||
(b"foo", revisions[1][1])]) | ||||
Augie Fackler
|
r40530 | self.assertFalse(missing) | ||
fakenode = self.getFakeHash() | ||||
Gregory Szorc
|
r41612 | missing = pack.getmissing([(b"foo", revisions[0][1]), | ||
(b"foo", fakenode)]) | ||||
self.assertEqual(missing, [(b"foo", fakenode)]) | ||||
Augie Fackler
|
r40530 | |||
def testAddThrows(self): | ||||
pack = self.createPack() | ||||
try: | ||||
Gregory Szorc
|
r41612 | pack.add(b'filename', nullid, b'contents') | ||
Augie Fackler
|
r40530 | self.assertTrue(False, "datapack.add should throw") | ||
except RuntimeError: | ||||
pass | ||||
def testBadVersionThrows(self): | ||||
pack = self.createPack() | ||||
Gregory Szorc
|
r41612 | path = pack.path + b'.datapack' | ||
Augie Fackler
|
r40530 | with open(path) as f: | ||
raw = f.read() | ||||
raw = struct.pack('!B', 255) + raw[1:] | ||||
os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) | ||||
with open(path, 'w+') as f: | ||||
f.write(raw) | ||||
try: | ||||
pack = self.datapackreader(pack.path) | ||||
self.assertTrue(False, "bad version number should have thrown") | ||||
except RuntimeError: | ||||
pass | ||||
def testMissingDeltabase(self): | ||||
fakenode = self.getFakeHash() | ||||
Gregory Szorc
|
r41612 | revisions = [(b"filename", fakenode, self.getFakeHash(), b"content")] | ||
Augie Fackler
|
r40530 | pack = self.createPack(revisions) | ||
Gregory Szorc
|
r41612 | chain = pack.getdeltachain(b"filename", fakenode) | ||
Gregory Szorc
|
r41351 | self.assertEqual(len(chain), 1) | ||
Augie Fackler
|
r40530 | |||
def testLargePack(self): | ||||
"""Test creating and reading from a large pack with over X entries. | ||||
This causes it to use a 2^16 fanout table instead.""" | ||||
revisions = [] | ||||
blobs = {} | ||||
total = basepack.SMALLFANOUTCUTOFF + 1 | ||||
Augie Fackler
|
r41290 | for i in pycompat.xrange(total): | ||
Gregory Szorc
|
r41612 | filename = b"filename-%d" % i | ||
Augie Fackler
|
r40530 | content = filename | ||
node = self.getHash(content) | ||||
blobs[(filename, node)] = content | ||||
revisions.append((filename, node, nullid, content)) | ||||
pack = self.createPack(revisions) | ||||
if self.paramsavailable: | ||||
Gregory Szorc
|
r41351 | self.assertEqual(pack.params.fanoutprefix, | ||
basepack.LARGEFANOUTPREFIX) | ||||
Augie Fackler
|
r40530 | |||
Gregory Szorc
|
r41613 | for (filename, node), content in blobs.items(): | ||
Augie Fackler
|
r40530 | actualcontent = pack.getdeltachain(filename, node)[0][4] | ||
Gregory Szorc
|
r41351 | self.assertEqual(actualcontent, content) | ||
Augie Fackler
|
r40530 | |||
def testPacksCache(self): | ||||
"""Test that we remember the most recent packs while fetching the delta | ||||
chain.""" | ||||
packdir = self.makeTempDir() | ||||
deltachains = [] | ||||
numpacks = 10 | ||||
revisionsperpack = 100 | ||||
for i in range(numpacks): | ||||
chain = [] | ||||
Gregory Szorc
|
r41612 | revision = (b'%d' % i, self.getFakeHash(), nullid, b"content") | ||
Augie Fackler
|
r40530 | |||
for _ in range(revisionsperpack): | ||||
chain.append(revision) | ||||
revision = ( | ||||
Gregory Szorc
|
r41612 | b'%d' % i, | ||
Augie Fackler
|
r40530 | self.getFakeHash(), | ||
revision[1], | ||||
self.getFakeHash() | ||||
) | ||||
self.createPack(chain, packdir) | ||||
deltachains.append(chain) | ||||
class testdatapackstore(datapack.datapackstore): | ||||
# Ensures that we are not keeping everything in the cache. | ||||
DEFAULTCACHESIZE = numpacks / 2 | ||||
store = testdatapackstore(uimod.ui(), packdir) | ||||
random.shuffle(deltachains) | ||||
for randomchain in deltachains: | ||||
revision = random.choice(randomchain) | ||||
chain = store.getdeltachain(revision[0], revision[1]) | ||||
mostrecentpack = next(iter(store.packs), None) | ||||
Gregory Szorc
|
r41351 | self.assertEqual( | ||
Augie Fackler
|
r40530 | mostrecentpack.getdeltachain(revision[0], revision[1]), | ||
chain | ||||
) | ||||
Gregory Szorc
|
r41351 | self.assertEqual(randomchain.index(revision) + 1, len(chain)) | ||
Augie Fackler
|
r40530 | |||
# perf test off by default since it's slow | ||||
def _testIndexPerf(self): | ||||
random.seed(0) | ||||
print("Multi-get perf test") | ||||
packsizes = [ | ||||
100, | ||||
10000, | ||||
100000, | ||||
500000, | ||||
1000000, | ||||
3000000, | ||||
] | ||||
lookupsizes = [ | ||||
10, | ||||
100, | ||||
1000, | ||||
10000, | ||||
100000, | ||||
1000000, | ||||
] | ||||
for packsize in packsizes: | ||||
revisions = [] | ||||
Augie Fackler
|
r41290 | for i in pycompat.xrange(packsize): | ||
Gregory Szorc
|
r41612 | filename = b"filename-%d" % i | ||
content = b"content-%d" % i | ||||
Augie Fackler
|
r40530 | node = self.getHash(content) | ||
revisions.append((filename, node, nullid, content)) | ||||
path = self.createPack(revisions).path | ||||
# Perf of large multi-get | ||||
import gc | ||||
gc.disable() | ||||
pack = self.datapackreader(path) | ||||
for lookupsize in lookupsizes: | ||||
if lookupsize > packsize: | ||||
continue | ||||
random.shuffle(revisions) | ||||
findnodes = [(rev[0], rev[1]) for rev in revisions] | ||||
start = time.time() | ||||
pack.getmissing(findnodes[:lookupsize]) | ||||
elapsed = time.time() - start | ||||
Gregory Szorc
|
r41612 | print ("%s pack %d lookups = %0.04f" % | ||
(('%d' % packsize).rjust(7), | ||||
('%d' % lookupsize).rjust(7), | ||||
Augie Fackler
|
r40530 | elapsed)) | ||
print("") | ||||
gc.enable() | ||||
# The perf test is meant to produce output, so we always fail the test | ||||
# so the user sees the output. | ||||
raise RuntimeError("perf test always fails") | ||||
class datapacktests(datapacktestsbase, unittest.TestCase): | ||||
def __init__(self, *args, **kwargs): | ||||
datapacktestsbase.__init__(self, datapack.datapack, True) | ||||
unittest.TestCase.__init__(self, *args, **kwargs) | ||||
# TODO: | ||||
# datapack store: | ||||
# - getmissing | ||||
# - GC two packs into one | ||||
if __name__ == '__main__': | ||||
Matt Harbison
|
r41304 | if pycompat.iswindows: | ||
sys.exit(80) # Skip on Windows | ||||
Augie Fackler
|
r40530 | silenttestrunner.main(__name__) | ||