##// END OF EJS Templates
copies: move from a copy on branchpoint to a copy on write approach...
copies: move from a copy on branchpoint to a copy on write approach Before this changes, any branch points results in a copy of the dictionary containing the copy information. This can be very costly for branchy history with few rename information. Instead, we take a "copy on write" approach. Copying the input data only when we are about to update them. In practice we where already doing the copying in half of these case (because `_chain` makes a copy), so we don't add a significant cost here even in the linear case. However the speed up in branchy case is very significant. Here are some timing on the pypy repository. revision: large amount; added files: large amount; rename small amount; c3b14617fbd7 9ba6ab77fd29 before: ! wall 1.399863 comb 1.400000 user 1.370000 sys 0.030000 (median of 10) after: ! wall 0.766453 comb 0.770000 user 0.750000 sys 0.020000 (median of 11) revision: large amount; added files: small amount; rename small amount; c3b14617fbd7 f650a9b140d2 before: ! wall 1.876748 comb 1.890000 user 1.870000 sys 0.020000 (median of 10) after: ! wall 1.167223 comb 1.170000 user 1.150000 sys 0.020000 (median of 10) revision: large amount; added files: large amount; rename large amount; 08ea3258278e d9fa043f30c0 before: ! wall 0.242457 comb 0.240000 user 0.240000 sys 0.000000 (median of 39) after: ! wall 0.211476 comb 0.210000 user 0.210000 sys 0.000000 (median of 45) revision: small amount; added files: large amount; rename large amount; df6f7a526b60 a83dc6a2d56f before: ! wall 0.013193 comb 0.020000 user 0.020000 sys 0.000000 (median of 224) after: ! wall 0.013290 comb 0.010000 user 0.010000 sys 0.000000 (median of 222) revision: small amount; added files: large amount; rename small amount; 4aa4e1f8e19a 169138063d63 before: ! wall 0.001673 comb 0.000000 user 0.000000 sys 0.000000 (median of 1000) after: ! wall 0.001677 comb 0.000000 user 0.000000 sys 0.000000 (median of 1000) revision: small amount; added files: small amount; rename small amount; 4bc173b045a6 964879152e2e before: ! wall 0.000119 comb 0.000000 user 0.000000 sys 0.000000 (median of 8023) after: ! wall 0.000119 comb 0.000000 user 0.000000 sys 0.000000 (median of 7997) revision: medium amount; added files: large amount; rename medium amount; c95f1ced15f2 2c68e87c3efe before: ! wall 0.201898 comb 0.210000 user 0.200000 sys 0.010000 (median of 48) after: ! wall 0.167415 comb 0.170000 user 0.160000 sys 0.010000 (median of 58) revision: medium amount; added files: medium amount; rename small amount; d343da0c55a8 d7746d32bf9d before: ! wall 0.036820 comb 0.040000 user 0.040000 sys 0.000000 (median of 100) after: ! wall 0.035797 comb 0.040000 user 0.040000 sys 0.000000 (median of 100) The extra cost in the linear case can be reclaimed later with some extra logic. Differential Revision: https://phab.mercurial-scm.org/D7124

File last commit:

r42237:675775c3 default
r43594:ffd04bc9 default
Show More
test_data_structures_fuzzing.py
76 lines | 3.5 KiB | text/x-python | PythonLexer
/ contrib / python-zstandard / tests / test_data_structures_fuzzing.py
import io
import os
import sys
import unittest
try:
import hypothesis
import hypothesis.strategies as strategies
except ImportError:
raise unittest.SkipTest('hypothesis not available')
import zstandard as zstd
from .common import (
make_cffi,
)
s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
max_value=zstd.WINDOWLOG_MAX)
s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
max_value=zstd.CHAINLOG_MAX)
s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
max_value=zstd.HASHLOG_MAX)
s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
max_value=zstd.SEARCHLOG_MAX)
s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN,
max_value=zstd.MINMATCH_MAX)
s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
max_value=zstd.TARGETLENGTH_MAX)
s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
zstd.STRATEGY_DFAST,
zstd.STRATEGY_GREEDY,
zstd.STRATEGY_LAZY,
zstd.STRATEGY_LAZY2,
zstd.STRATEGY_BTLAZY2,
zstd.STRATEGY_BTOPT,
zstd.STRATEGY_BTULTRA,
zstd.STRATEGY_BTULTRA2))
@make_cffi
@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
class TestCompressionParametersHypothesis(unittest.TestCase):
@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
s_minmatch, s_targetlength, s_strategy)
def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
minmatch, targetlength, strategy):
zstd.ZstdCompressionParameters(window_log=windowlog,
chain_log=chainlog,
hash_log=hashlog,
search_log=searchlog,
min_match=minmatch,
target_length=targetlength,
strategy=strategy)
@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
s_minmatch, s_targetlength, s_strategy)
def test_estimated_compression_context_size(self, windowlog, chainlog,
hashlog, searchlog,
minmatch, targetlength,
strategy):
if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
minmatch += 1
elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST:
minmatch -= 1
p = zstd.ZstdCompressionParameters(window_log=windowlog,
chain_log=chainlog,
hash_log=hashlog,
search_log=searchlog,
min_match=minmatch,
target_length=targetlength,
strategy=strategy)
size = p.estimated_compression_context_size()