test_data_structures.py
241 lines
| 8.7 KiB
| text/x-python
|
PythonLexer
Gregory Szorc
|
r37513 | import sys | ||
import unittest | ||||
Gregory Szorc
|
r30435 | |||
Gregory Szorc
|
r37513 | import zstandard as zstd | ||
Gregory Szorc
|
r30435 | |||
Gregory Szorc
|
r44446 | from .common import ( | ||
Gregory Szorc
|
r30895 | make_cffi, | ||
Gregory Szorc
|
r44446 | TestCase, | ||
Gregory Szorc
|
r30895 | ) | ||
@make_cffi | ||||
Gregory Szorc
|
r44446 | class TestCompressionParameters(TestCase): | ||
Gregory Szorc
|
r37513 | def test_bounds(self): | ||
Gregory Szorc
|
r44446 | zstd.ZstdCompressionParameters( | ||
window_log=zstd.WINDOWLOG_MIN, | ||||
chain_log=zstd.CHAINLOG_MIN, | ||||
hash_log=zstd.HASHLOG_MIN, | ||||
search_log=zstd.SEARCHLOG_MIN, | ||||
min_match=zstd.MINMATCH_MIN + 1, | ||||
target_length=zstd.TARGETLENGTH_MIN, | ||||
strategy=zstd.STRATEGY_FAST, | ||||
) | ||||
Gregory Szorc
|
r30435 | |||
Gregory Szorc
|
r44446 | zstd.ZstdCompressionParameters( | ||
window_log=zstd.WINDOWLOG_MAX, | ||||
chain_log=zstd.CHAINLOG_MAX, | ||||
hash_log=zstd.HASHLOG_MAX, | ||||
search_log=zstd.SEARCHLOG_MAX, | ||||
min_match=zstd.MINMATCH_MAX - 1, | ||||
target_length=zstd.TARGETLENGTH_MAX, | ||||
strategy=zstd.STRATEGY_BTULTRA2, | ||||
) | ||||
Gregory Szorc
|
r30435 | |||
Gregory Szorc
|
r37513 | def test_from_level(self): | ||
p = zstd.ZstdCompressionParameters.from_level(1) | ||||
Gregory Szorc
|
r30435 | self.assertIsInstance(p, zstd.CompressionParameters) | ||
Gregory Szorc
|
r30895 | self.assertEqual(p.window_log, 19) | ||
Gregory Szorc
|
r37513 | p = zstd.ZstdCompressionParameters.from_level(-4) | ||
self.assertEqual(p.window_log, 19) | ||||
Gregory Szorc
|
r30895 | def test_members(self): | ||
Gregory Szorc
|
r44446 | p = zstd.ZstdCompressionParameters( | ||
window_log=10, | ||||
chain_log=6, | ||||
hash_log=7, | ||||
search_log=4, | ||||
min_match=5, | ||||
target_length=8, | ||||
strategy=1, | ||||
) | ||||
Gregory Szorc
|
r30895 | self.assertEqual(p.window_log, 10) | ||
self.assertEqual(p.chain_log, 6) | ||||
self.assertEqual(p.hash_log, 7) | ||||
self.assertEqual(p.search_log, 4) | ||||
Gregory Szorc
|
r37513 | self.assertEqual(p.min_match, 5) | ||
Gregory Szorc
|
r30895 | self.assertEqual(p.target_length, 8) | ||
Gregory Szorc
|
r37513 | self.assertEqual(p.compression_strategy, 1) | ||
p = zstd.ZstdCompressionParameters(compression_level=2) | ||||
self.assertEqual(p.compression_level, 2) | ||||
p = zstd.ZstdCompressionParameters(threads=4) | ||||
self.assertEqual(p.threads, 4) | ||||
Gregory Szorc
|
r44446 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6) | ||
Gregory Szorc
|
r37513 | self.assertEqual(p.threads, 2) | ||
self.assertEqual(p.job_size, 1048576) | ||||
Gregory Szorc
|
r42237 | self.assertEqual(p.overlap_log, 6) | ||
Gregory Szorc
|
r37513 | self.assertEqual(p.overlap_size_log, 6) | ||
p = zstd.ZstdCompressionParameters(compression_level=-1) | ||||
self.assertEqual(p.compression_level, -1) | ||||
Gregory Szorc
|
r40157 | p = zstd.ZstdCompressionParameters(compression_level=-2) | ||
Gregory Szorc
|
r37513 | self.assertEqual(p.compression_level, -2) | ||
p = zstd.ZstdCompressionParameters(force_max_window=True) | ||||
self.assertEqual(p.force_max_window, 1) | ||||
p = zstd.ZstdCompressionParameters(enable_ldm=True) | ||||
self.assertEqual(p.enable_ldm, 1) | ||||
p = zstd.ZstdCompressionParameters(ldm_hash_log=7) | ||||
self.assertEqual(p.ldm_hash_log, 7) | ||||
p = zstd.ZstdCompressionParameters(ldm_min_match=6) | ||||
self.assertEqual(p.ldm_min_match, 6) | ||||
p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7) | ||||
self.assertEqual(p.ldm_bucket_size_log, 7) | ||||
Gregory Szorc
|
r42237 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) | ||
Gregory Szorc
|
r37513 | self.assertEqual(p.ldm_hash_every_log, 8) | ||
Gregory Szorc
|
r42237 | self.assertEqual(p.ldm_hash_rate_log, 8) | ||
Gregory Szorc
|
r30895 | |||
Gregory Szorc
|
r31796 | def test_estimated_compression_context_size(self): | ||
Gregory Szorc
|
r44446 | p = zstd.ZstdCompressionParameters( | ||
window_log=20, | ||||
chain_log=16, | ||||
hash_log=17, | ||||
search_log=1, | ||||
min_match=5, | ||||
target_length=16, | ||||
strategy=zstd.STRATEGY_DFAST, | ||||
) | ||||
Gregory Szorc
|
r31796 | |||
# 32-bit has slightly different values from 64-bit. | ||||
Gregory Szorc
|
r44446 | self.assertAlmostEqual( | ||
p.estimated_compression_context_size(), 1294464, delta=400 | ||||
) | ||||
Gregory Szorc
|
r31796 | |||
Gregory Szorc
|
r42237 | def test_strategy(self): | ||
Gregory Szorc
|
r44446 | with self.assertRaisesRegex( | ||
ValueError, "cannot specify both compression_strategy" | ||||
): | ||||
Gregory Szorc
|
r42237 | zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) | ||
p = zstd.ZstdCompressionParameters(strategy=2) | ||||
self.assertEqual(p.compression_strategy, 2) | ||||
p = zstd.ZstdCompressionParameters(strategy=3) | ||||
self.assertEqual(p.compression_strategy, 3) | ||||
def test_ldm_hash_rate_log(self): | ||||
Gregory Szorc
|
r44446 | with self.assertRaisesRegex( | ||
ValueError, "cannot specify both ldm_hash_rate_log" | ||||
): | ||||
Gregory Szorc
|
r42237 | zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) | ||
p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) | ||||
self.assertEqual(p.ldm_hash_every_log, 8) | ||||
p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16) | ||||
self.assertEqual(p.ldm_hash_every_log, 16) | ||||
def test_overlap_log(self): | ||||
Gregory Szorc
|
r44446 | with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"): | ||
Gregory Szorc
|
r42237 | zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) | ||
p = zstd.ZstdCompressionParameters(overlap_log=2) | ||||
self.assertEqual(p.overlap_log, 2) | ||||
self.assertEqual(p.overlap_size_log, 2) | ||||
p = zstd.ZstdCompressionParameters(overlap_size_log=4) | ||||
self.assertEqual(p.overlap_log, 4) | ||||
self.assertEqual(p.overlap_size_log, 4) | ||||
Gregory Szorc
|
r30895 | |||
@make_cffi | ||||
Gregory Szorc
|
r44446 | class TestFrameParameters(TestCase): | ||
Gregory Szorc
|
r30895 | def test_invalid_type(self): | ||
with self.assertRaises(TypeError): | ||||
zstd.get_frame_parameters(None) | ||||
Gregory Szorc
|
r37513 | # Python 3 doesn't appear to convert unicode to Py_buffer. | ||
if sys.version_info[0] >= 3: | ||||
with self.assertRaises(TypeError): | ||||
Gregory Szorc
|
r44446 | zstd.get_frame_parameters(u"foobarbaz") | ||
Gregory Szorc
|
r37513 | else: | ||
# CPython will convert unicode to Py_buffer. But CFFI won't. | ||||
Gregory Szorc
|
r44446 | if zstd.backend == "cffi": | ||
Gregory Szorc
|
r37513 | with self.assertRaises(TypeError): | ||
Gregory Szorc
|
r44446 | zstd.get_frame_parameters(u"foobarbaz") | ||
Gregory Szorc
|
r37513 | else: | ||
with self.assertRaises(zstd.ZstdError): | ||||
Gregory Szorc
|
r44446 | zstd.get_frame_parameters(u"foobarbaz") | ||
Gregory Szorc
|
r30895 | |||
def test_invalid_input_sizes(self): | ||||
Gregory Szorc
|
r44446 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | ||
zstd.get_frame_parameters(b"") | ||||
Gregory Szorc
|
r30895 | |||
Gregory Szorc
|
r44446 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | ||
Gregory Szorc
|
r30895 | zstd.get_frame_parameters(zstd.FRAME_HEADER) | ||
def test_invalid_frame(self): | ||||
Gregory Szorc
|
r44446 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | ||
zstd.get_frame_parameters(b"foobarbaz") | ||||
Gregory Szorc
|
r30895 | |||
def test_attributes(self): | ||||
Gregory Szorc
|
r44446 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00") | ||
Gregory Szorc
|
r37513 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | ||
Gregory Szorc
|
r30895 | self.assertEqual(params.window_size, 1024) | ||
self.assertEqual(params.dict_id, 0) | ||||
self.assertFalse(params.has_checksum) | ||||
# Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. | ||||
Gregory Szorc
|
r44446 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff") | ||
Gregory Szorc
|
r37513 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | ||
Gregory Szorc
|
r30895 | self.assertEqual(params.window_size, 1024) | ||
self.assertEqual(params.dict_id, 255) | ||||
self.assertFalse(params.has_checksum) | ||||
# Lowest 3rd bit indicates if checksum is present. | ||||
Gregory Szorc
|
r44446 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00") | ||
Gregory Szorc
|
r37513 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | ||
Gregory Szorc
|
r30895 | self.assertEqual(params.window_size, 1024) | ||
self.assertEqual(params.dict_id, 0) | ||||
self.assertTrue(params.has_checksum) | ||||
# Upper 2 bits indicate content size. | ||||
Gregory Szorc
|
r44446 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00") | ||
Gregory Szorc
|
r30895 | self.assertEqual(params.content_size, 511) | ||
self.assertEqual(params.window_size, 1024) | ||||
self.assertEqual(params.dict_id, 0) | ||||
self.assertFalse(params.has_checksum) | ||||
# Window descriptor is 2nd byte after frame header. | ||||
Gregory Szorc
|
r44446 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40") | ||
Gregory Szorc
|
r37513 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | ||
Gregory Szorc
|
r30895 | self.assertEqual(params.window_size, 262144) | ||
self.assertEqual(params.dict_id, 0) | ||||
self.assertFalse(params.has_checksum) | ||||
# Set multiple things. | ||||
Gregory Szorc
|
r44446 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00") | ||
Gregory Szorc
|
r30895 | self.assertEqual(params.content_size, 272) | ||
self.assertEqual(params.window_size, 262144) | ||||
self.assertEqual(params.dict_id, 15) | ||||
self.assertTrue(params.has_checksum) | ||||
Gregory Szorc
|
r37513 | |||
def test_input_types(self): | ||||
Gregory Szorc
|
r44446 | v = zstd.FRAME_HEADER + b"\x00\x00" | ||
Gregory Szorc
|
r37513 | |||
mutable_array = bytearray(len(v)) | ||||
mutable_array[:] = v | ||||
sources = [ | ||||
memoryview(v), | ||||
bytearray(v), | ||||
mutable_array, | ||||
] | ||||
for source in sources: | ||||
params = zstd.get_frame_parameters(source) | ||||
self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | ||||
self.assertEqual(params.window_size, 1024) | ||||
self.assertEqual(params.dict_id, 0) | ||||
self.assertFalse(params.has_checksum) | ||||