##// END OF EJS Templates
phabricator: use True primitive instead of b'true' for phabupdate actions...
phabricator: use True primitive instead of b'true' for phabupdate actions Something I'd missed in the creatediff port. This didn't matter before with the old PHP form style wireformat, but breaks with the new arcanist format. Differential Revision: https://phab.mercurial-scm.org/D7152

File last commit:

r43207:69de49c4 default
r43659:e57bf37e stable
Show More
zstd_opt.c
1246 lines | 56.2 KiB | text/x-c | CLexer
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 /*
* Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "zstd_compress_internal.h"
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 #include "hist.h"
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 #include "zstd_opt.h"
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
#define ZSTD_MAX_PRICE (1<<30)
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/*-*************************************
* Price functions for optimal parser
***************************************/
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157
#if 0 /* approximation at bit level */
# define BITCOST_ACCURACY 0
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
#elif 0 /* fractional bit accuracy */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
#else /* opt==approx, ultra==accurate */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
#endif
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
{
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
}
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 U32 const stat = rawStat + 1;
U32 const hb = ZSTD_highbit32(stat);
U32 const BWeight = hb * BITCOST_MULTIPLIER;
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
U32 const weight = BWeight + FWeight;
assert(hb + BITCOST_ACCURACY < 31);
return weight;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 #if (DEBUGLEVEL>=2)
/* debugging function,
* @return price in bytes as fractional value
* for debug messages only */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 MEM_STATIC double ZSTD_fCost(U32 price)
{
return (double)price / (BITCOST_MULTIPLIER*8);
}
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 #endif
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
{
return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
{
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (ZSTD_compressedLiterals(optPtr))
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 /* ZSTD_downscaleStat() :
* reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
* return the resulting sum of elements */
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 {
U32 s, sum=0;
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 for (s=0; s<lastEltIndex+1; s++) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
sum += table[s];
}
return sum;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 /* ZSTD_rescaleFreqs() :
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
* take hints from dictionary if there is one
* or init from zero, using src for literals stats, or flat 1 for match symbols
* otherwise downscale existing stats, to be used as seed for next block.
*/
static void
ZSTD_rescaleFreqs(optState_t* const optPtr,
const BYTE* const src, size_t const srcSize,
int const optLevel)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 optPtr->priceType = zop_dynamic;
if (optPtr->litLengthSum == 0) { /* first block : init */
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 optPtr->priceType = zop_predef;
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157
assert(optPtr->symbolCosts != NULL);
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
/* huffman table presumed generated by dictionary */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 optPtr->priceType = zop_dynamic;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (compressedLiterals) {
unsigned lit;
assert(optPtr->litFreq != NULL);
optPtr->litSum = 0;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 for (lit=0; lit<=MaxLit; lit++) {
U32 const scaleLog = 11; /* scale to 2K */
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
assert(bitCost <= scaleLog);
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->litSum += optPtr->litFreq[lit];
} }
{ unsigned ll;
FSE_CState_t llstate;
FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
optPtr->litLengthSum = 0;
for (ll=0; ll<=MaxLL; ll++) {
U32 const scaleLog = 10; /* scale to 1K */
U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
assert(bitCost < scaleLog);
optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->litLengthSum += optPtr->litLengthFreq[ll];
} }
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 { unsigned ml;
FSE_CState_t mlstate;
FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
optPtr->matchLengthSum = 0;
for (ml=0; ml<=MaxML; ml++) {
U32 const scaleLog = 10;
U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
assert(bitCost < scaleLog);
optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
} }
{ unsigned of;
FSE_CState_t ofstate;
FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
optPtr->offCodeSum = 0;
for (of=0; of<=MaxOff; of++) {
U32 const scaleLog = 10;
U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
assert(bitCost < scaleLog);
optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->offCodeSum += optPtr->offCodeFreq[of];
} }
} else { /* not a dictionary */
assert(optPtr->litFreq != NULL);
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (compressedLiterals) {
unsigned lit = MaxLit;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 }
{ unsigned ll;
for (ll=0; ll<=MaxLL; ll++)
optPtr->litLengthFreq[ll] = 1;
}
optPtr->litLengthSum = MaxLL+1;
{ unsigned ml;
for (ml=0; ml<=MaxML; ml++)
optPtr->matchLengthFreq[ml] = 1;
}
optPtr->matchLengthSum = MaxML+1;
{ unsigned of;
for (of=0; of<=MaxOff; of++)
optPtr->offCodeFreq[of] = 1;
}
optPtr->offCodeSum = MaxOff+1;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 } else { /* new block : re-use previous statistics, scaled down */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (compressedLiterals)
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_setBasePrices(optPtr, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
/* ZSTD_rawLiteralsCost() :
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 * price of literals (only) in specified segment (which length can be 0).
* does not include price of literalLength symbol */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const optState_t* const optPtr,
int optLevel)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
if (litLength == 0) return 0;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207
if (!ZSTD_compressedLiterals(optPtr))
return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (optPtr->priceType == zop_predef)
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* dynamic statistics */
{ U32 price = litLength * optPtr->litSumBasePrice;
U32 u;
for (u=0; u < litLength; u++) {
assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
}
return price;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
}
/* ZSTD_litLengthPrice() :
* cost of literalLength symbol */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* dynamic statistics */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 { U32 const llCode = ZSTD_LLcode(litLength);
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+ optPtr->litLengthSumBasePrice
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
}
/* ZSTD_litLengthContribution() :
* @return ( cost(litlength) - cost(0) )
* this value can then be added to rawLiteralsCost()
* to provide a cost which is directly comparable to a match ending at same position */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* dynamic statistics */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 { U32 const llCode = ZSTD_LLcode(litLength);
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+ (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
- (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 #if 1
return contribution;
#else
return MAX(0, contribution); /* sometimes better, sometimes not ... */
#endif
}
}
/* ZSTD_literalsContribution() :
* creates a fake cost for the literals part of a sequence
* which can be compared to the ending cost of a match
* should a new match start at this position */
static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const optState_t* const optPtr,
int optLevel)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 return contribution;
}
/* ZSTD_getMatchPrice() :
* Provides the cost of the match part (offset + matchLength) of a sequence
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
* optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offset,
U32 const matchLength,
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 const optState_t* const optPtr,
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 int const optLevel)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
U32 price;
U32 const offCode = ZSTD_highbit32(offset+1);
U32 const mlBase = matchLength - MINMATCH;
assert(matchLength >= MINMATCH);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* dynamic statistics */
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
if ((optLevel<2) /*static*/ && offCode >= 20)
price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/* match Length */
{ U32 const mlCode = ZSTD_MLcode(mlBase);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
return price;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* ZSTD_updateStats() :
* assumption : literals + litLengtn <= iend */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 static void ZSTD_updateStats(optState_t* const optPtr,
U32 litLength, const BYTE* literals,
U32 offsetCode, U32 matchLength)
{
/* literals */
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (ZSTD_compressedLiterals(optPtr)) {
U32 u;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 for (u=0; u < litLength; u++)
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
}
/* literal Length */
{ U32 const llCode = ZSTD_LLcode(litLength);
optPtr->litLengthFreq[llCode]++;
optPtr->litLengthSum++;
}
/* match offset code (0-2=>repCode; 3+=>offset+2) */
{ U32 const offCode = ZSTD_highbit32(offsetCode+1);
assert(offCode <= MaxOff);
optPtr->offCodeFreq[offCode]++;
optPtr->offCodeSum++;
}
/* match Length */
{ U32 const mlBase = matchLength - MINMATCH;
U32 const mlCode = ZSTD_MLcode(mlBase);
optPtr->matchLengthFreq[mlCode]++;
optPtr->matchLengthSum++;
}
}
/* ZSTD_readMINMATCH() :
* function safe only for comparisons
* assumption : memPtr must be at least 4 bytes before end of buffer */
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
{
switch (length)
{
default :
case 4 : return MEM_read32(memPtr);
case 3 : if (MEM_isLittleEndian())
return MEM_read32(memPtr)<<8;
else
return MEM_read32(memPtr)>>8;
}
}
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
U32* const hashTable3 = ms->hashTable3;
U32 const hashLog3 = ms->hashLog3;
const BYTE* const base = ms->window.base;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32 idx = *nextToUpdate3;
U32 const target = (U32)(ip - base);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
assert(hashLog3 > 0);
while(idx < target) {
hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
idx++;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 *nextToUpdate3 = target;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 return hashTable3[hash3];
}
/*-*************************************
* Binary Tree search
***************************************/
/** ZSTD_insertBt1() : add one or multiple positions to tree.
* ip : assumed <= iend-8 .
* @return : nb of positions added */
static U32 ZSTD_insertBt1(
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_matchState_t* ms,
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 const BYTE* const ip, const BYTE* const iend,
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 U32 const mls, const int extDict)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const ZSTD_compressionParameters* const cParams = &ms->cParams;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32* const hashTable = ms->hashTable;
U32 const hashLog = cParams->hashLog;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
U32 matchIndex = hashTable[h];
size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase;
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* match;
const U32 current = (U32)(ip-base);
const U32 btLow = btMask >= current ? 0 : current - btMask;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = smallerPtr + 1;
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
U32 matchEndIdx = current+8+1;
size_t bestLength = 8;
U32 nbCompares = 1U << cParams->searchLog;
#ifdef ZSTD_C_PREDICT
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
predictedSmall += (predictedSmall>0);
predictedLarge += (predictedLarge>0);
#endif /* ZSTD_C_PREDICT */
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
assert(ip <= iend-8); /* required for h calculation */
hashTable[h] = current; /* Update Hash Table */
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 assert(windowLow > 0);
while (nbCompares-- && (matchIndex >= windowLow)) {
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(matchIndex < current);
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
if (matchIndex == predictedSmall) {
/* no need to check length, result known */
*smallerPtr = matchIndex;
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
predictedSmall = predictPtr[1] + (predictPtr[1]>0);
continue;
}
if (matchIndex == predictedLarge) {
*largerPtr = matchIndex;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
largerPtr = nextPtr;
matchIndex = nextPtr[0];
predictedLarge = predictPtr[0] + (predictPtr[0]>0);
continue;
}
#endif
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 match = base + matchIndex;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
if (matchLength > bestLength) {
bestLength = matchLength;
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
}
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
}
if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
/* match is smaller than current */
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
} else {
/* match is larger than current */
*largerPtr = matchIndex;
commonLengthLarger = matchLength;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
largerPtr = nextPtr;
matchIndex = nextPtr[0];
} }
*smallerPtr = *largerPtr = 0;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 { U32 positions = 0;
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
assert(matchEndIdx > current + 8);
return MAX(positions, matchEndIdx - (current + 8));
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
FORCE_INLINE_TEMPLATE
void ZSTD_updateTree_internal(
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_matchState_t* ms,
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 const BYTE* const ip, const BYTE* const iend,
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const U32 mls, const ZSTD_dictMode_e dictMode)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
const BYTE* const base = ms->window.base;
U32 const target = (U32)(ip - base);
U32 idx = ms->nextToUpdate;
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 idx, target, dictMode);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 while(idx < target) {
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
assert(idx < (U32)(idx + forward));
idx += forward;
}
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 ms->nextToUpdate = target;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_matchState_t* ms,
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32* nextToUpdate3,
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 const U32 rep[ZSTD_REP_NUM],
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
const U32 lengthToBeat,
U32 const mls /* template */)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const ZSTD_compressionParameters* const cParams = &ms->cParams;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const hashLog = cParams->hashLog;
U32 const minMatch = (mls==3) ? 3 : 4;
U32* const hashTable = ms->hashTable;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
U32 matchIndex = hashTable[h];
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
U32 const btMask= (1U << btLog) - 1;
size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const dictBase = ms->window.dictBase;
U32 const dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32 const btLow = (btMask >= current) ? 0 : current - btMask;
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 U32 const matchLow = windowLow ? windowLow : 1;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
U32 dummy32; /* to be nullified at the end */
U32 mnum = 0;
U32 nbCompares = 1U << cParams->searchLog;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
const ZSTD_compressionParameters* const dmsCParams =
dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 size_t bestLength = lengthToBeat-1;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/* check repCode */
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 assert(ll0 <= 1); /* necessarily 1 or 0 */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 { U32 const lastR = ZSTD_REP_NUM + ll0;
U32 repCode;
for (repCode = ll0; repCode < lastR; repCode++) {
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
U32 const repIndex = current - repOffset;
U32 repLen = 0;
assert(current >= dictLimit);
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
}
} else { /* repIndex < dictLimit || repIndex >= current */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
dmsBase + repIndex - dmsIndexDelta :
dictBase + repIndex;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 assert(current >= windowLow);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ( dictMode == ZSTD_extDict
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
& (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 }
if (dictMode == ZSTD_dictMatchState
&& ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
& ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 } }
/* save longer solution */
if (repLen > bestLength) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
repCode, ll0, repOffset, repLen);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 bestLength = repLen;
matches[mnum].off = repCode - ll0;
matches[mnum].len = (U32)repLen;
mnum++;
if ( (repLen > sufficient_len)
| (ip+repLen == iLimit) ) { /* best possible */
return mnum;
} } } }
/* HC3 match finder */
if ((mls == 3) /*static*/ && (bestLength < mls)) {
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ((matchIndex3 >= matchLow)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
size_t mlen;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 const BYTE* const match = base + matchIndex3;
mlen = ZSTD_count(ip, match, iLimit);
} else {
const BYTE* const match = dictBase + matchIndex3;
mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
}
/* save best solution */
if (mlen >= mls /* == 3 > bestLength */) {
DEBUGLOG(8, "found small match with hlog3, of length %u",
(U32)mlen);
bestLength = mlen;
assert(current > matchIndex3);
assert(mnum==0); /* no prior solution */
matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
matches[0].len = (U32)mlen;
mnum = 1;
if ( (mlen > sufficient_len) |
(ip+mlen == iLimit) ) { /* best possible length */
ms->nextToUpdate = current+1; /* skip insertion */
return 1;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 } } }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
hashTable[h] = current; /* Update Hash Table */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 while (nbCompares-- && (matchIndex >= matchLow)) {
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32* const nextPtr = bt + 2*(matchIndex & btMask);
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 const BYTE* match;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(current > matchIndex);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
match = base + matchIndex;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
} else {
match = dictBase + matchIndex;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 match = base + matchIndex; /* prepare for match[matchLength] read */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
if (matchLength > bestLength) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 assert(matchEndIdx > matchIndex);
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength;
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
matches[mnum].len = (U32)matchLength;
mnum++;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ( (matchLength > ZSTD_OPT_NUM)
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
}
if (match[matchLength] < ip[matchLength]) {
/* match smaller than current */
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
} else {
*largerPtr = matchIndex;
commonLengthLarger = matchLength;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
largerPtr = nextPtr;
matchIndex = nextPtr[0];
} }
*smallerPtr = *largerPtr = 0;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (dictMode == ZSTD_dictMatchState && nbCompares) {
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
U32 dictMatchIndex = dms->hashTable[dmsH];
const U32* const dmsBt = dms->chainTable;
commonLengthSmaller = commonLengthLarger = 0;
while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
const BYTE* match = dmsBase + dictMatchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
if (dictMatchIndex+matchLength >= dmsHighLimit)
match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
if (matchLength > bestLength) {
matchIndex = dictMatchIndex + dmsIndexDelta;
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength;
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
matches[mnum].len = (U32)matchLength;
mnum++;
if ( (matchLength > ZSTD_OPT_NUM)
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
break; /* drop, to guarantee consistency (miss a little bit of compression) */
}
}
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
if (match[matchLength] < ip[matchLength]) {
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
} else {
/* match is larger than current */
commonLengthLarger = matchLength;
dictMatchIndex = nextPtr[0];
}
}
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 assert(matchEndIdx > current+8);
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
return mnum;
}
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_matchState_t* ms,
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32* nextToUpdate3,
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 const U32 rep[ZSTD_REP_NUM],
U32 const ll0,
U32 const lengthToBeat)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const ZSTD_compressionParameters* const cParams = &ms->cParams;
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 U32 const matchLengthSearch = cParams->minMatch;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 switch(matchLengthSearch)
{
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 default :
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 case 7 :
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
}
/*-*******************************
* Optimal parser
*********************************/
typedef struct repcodes_s {
U32 rep[3];
} repcodes_t;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
repcodes_t newReps;
if (offset >= ZSTD_REP_NUM) { /* full offset */
newReps.rep[2] = rep[1];
newReps.rep[1] = rep[0];
newReps.rep[0] = offset - ZSTD_REP_MOVE;
} else { /* repcode */
U32 const repCode = offset + ll0;
if (repCode > 0) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
newReps.rep[1] = rep[0];
newReps.rep[0] = currentOffset;
} else { /* repCode == 0 */
memcpy(&newReps, rep, sizeof(newReps));
}
}
return newReps;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 return sol.litlen + sol.mlen;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 #if 0 /* debug */
static void
listStats(const U32* table, int lastEltID)
{
int const nbElts = lastEltID + 1;
int enb;
for (enb=0; enb < nbElts; enb++) {
(void)table;
//RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
RAWLOG(2, "%4i,", table[enb]);
}
RAWLOG(2, " \n");
}
#endif
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 const void* src, size_t srcSize,
const int optLevel,
const ZSTD_dictMode_e dictMode)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
optState_t* const optStatePtr = &ms->opt;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ms->window.base;
const BYTE* const prefixStart = base + ms->window.dictLimit;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const ZSTD_compressionParameters* const cParams = &ms->cParams;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32 nextToUpdate3 = ms->nextToUpdate;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_optimal_t lastSequence;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/* init */
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 assert(optLevel <= 2);
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 ip += (ip==prefixStart);
/* Match Loop */
while (ip < ilimit) {
U32 cur, last_pos = 0;
/* find first match */
{ U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 opt[0].mlen = 0; /* means is_a_literal */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 opt[0].litlen = litlen;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 U32 const maxOffset = matches[nbMatches-1].off;
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
if (maxML > sufficient_len) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 lastSequence.litlen = litlen;
lastSequence.mlen = maxML;
lastSequence.off = maxOffset;
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
maxML, sufficient_len);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 cur = 0;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 last_pos = ZSTD_totalLen(lastSequence);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 goto _shortestPath;
} }
/* set prices for first matches starting position == 0 */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32 pos;
U32 matchNb;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 for (pos = 1; pos < minMatch; pos++) {
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offset = matches[matchNb].off;
U32 const end = matches[matchNb].len;
repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
for ( ; pos <= end ; pos++ ) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
U32 const sequencePrice = literalsPrice + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
pos, ZSTD_fCost(sequencePrice));
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 opt[pos].mlen = pos;
opt[pos].off = offset;
opt[pos].litlen = litlen;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 opt[pos].price = sequencePrice;
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
} }
last_pos = pos-1;
}
}
/* check further positions */
for (cur = 1; cur <= last_pos; cur++) {
const BYTE* const inr = ip + cur;
assert(cur < ZSTD_OPT_NUM);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/* Fix current position with one literal if cheaper */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
int const price = opt[cur-1].price
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 assert(price < 1000000000); /* overflow check */
if (price <= opt[cur].price) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
opt[cur].mlen = 0;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 opt[cur].off = 0;
opt[cur].litlen = litlen;
opt[cur].price = price;
memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 } else {
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
}
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/* last match must start at a minimum distance of 8 from oend */
if (inr > ilimit) continue;
if (cur == last_pos) break;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ( (optLevel==0) /*static_test*/
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 }
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 { U32 const ll0 = (opt[cur].mlen != 0);
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
U32 const previousPrice = opt[cur].price;
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32 matchNb;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur);
continue;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
{ U32 const maxML = matches[nbMatches-1].len;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
inr-istart, cur, nbMatches, maxML);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
if ( (maxML > sufficient_len)
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 || (cur + maxML >= ZSTD_OPT_NUM) ) {
lastSequence.mlen = maxML;
lastSequence.off = matches[nbMatches-1].off;
lastSequence.litlen = litlen;
cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
last_pos = cur + ZSTD_totalLen(lastSequence);
if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 goto _shortestPath;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 } }
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
/* set prices using matches found at position == cur */
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offset = matches[matchNb].off;
repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
U32 const lastML = matches[matchNb].len;
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
U32 mlen;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 matchNb, matches[matchNb].off, lastML, litlen);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32 const pos = cur + mlen;
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
if ((pos > last_pos) || (price < opt[pos].price)) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 opt[pos].mlen = mlen;
opt[pos].off = offset;
opt[pos].litlen = litlen;
opt[pos].price = price;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
} else {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
} } }
} /* for (cur = 1; cur <= last_pos; cur++) */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 lastSequence = opt[last_pos];
cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
assert(cur < ZSTD_OPT_NUM); /* control overflow*/
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 assert(opt[0].mlen == 0);
{ U32 const storeEnd = cur + 1;
U32 storeStart = storeEnd;
U32 seqPos = cur;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
last_pos, cur); (void)last_pos;
assert(storeEnd < ZSTD_OPT_NUM);
DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
opt[storeEnd] = lastSequence;
while (seqPos > 0) {
U32 const backDist = ZSTD_totalLen(opt[seqPos]);
storeStart--;
DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
opt[storeStart] = opt[seqPos];
seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* save sequences */
DEBUGLOG(6, "sending selected sequences into seqStore")
{ U32 storePos;
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
U32 const llen = opt[storePos].litlen;
U32 const mlen = opt[storePos].mlen;
U32 const offCode = opt[storePos].off;
U32 const advance = llen + mlen;
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 anchor - istart, (unsigned)llen, (unsigned)mlen);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157
if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
assert(storePos == storeEnd); /* must be last sequence */
ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
continue; /* will finish */
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 /* repcodes update : like ZSTD_updateRep(), but update in place */
if (offCode >= ZSTD_REP_NUM) { /* full offset */
rep[2] = rep[1];
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 rep[1] = rep[0];
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 rep[0] = offCode - ZSTD_REP_MOVE;
} else { /* repcode */
U32 const repCode = offCode + (llen==0);
if (repCode) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
if (repCode >= 2) rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = currentOffset;
} }
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
anchor += advance;
ip = anchor;
} }
ZSTD_setBasePrices(optStatePtr, optLevel);
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 } /* while (ip < ilimit) */
/* Return the last literals size */
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 return (size_t)(iend - anchor);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const void* src, size_t srcSize)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
}
/* used in 2-pass strategy */
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 {
U32 s, sum=0;
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 assert(ZSTD_FREQ_DIV+bonus >= 0);
for (s=0; s<lastEltIndex+1; s++) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 table[s] <<= ZSTD_FREQ_DIV+bonus;
table[s]--;
sum += table[s];
}
return sum;
}
/* used in 2-pass strategy */
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
{
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 if (ZSTD_compressedLiterals(optPtr))
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
}
/* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm.
Gregory Szorc
zstandard: vendor python-zstandard 0.12...
r43207 * this function cannot error, hence its contract must be respected.
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 */
static void
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
memcpy(tmpRep, rep, sizeof(tmpRep));
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
assert(ms->opt.litLengthSum == 0); /* first block */
assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
/* invalidate first scan from history */
ZSTD_resetSeqStore(seqStore);
ms->window.base -= srcSize;
ms->window.dictLimit += (U32)srcSize;
ms->window.lowLimit = ms->window.dictLimit;
ms->nextToUpdate = ms->window.dictLimit;
/* re-inforce weight of collected statistics */
ZSTD_upscaleStats(&ms->opt);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const void* src, size_t srcSize)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
}
size_t ZSTD_compressBlock_btultra2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
U32 const current = (U32)((const BYTE*)src - ms->window.base);
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
/* 2-pass strategy:
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 * this strategy makes a first pass over first block to collect statistics
* and seed next round's statistics with it.
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 * After 1st pass, function forgets everything, and starts a new block.
* Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing.
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 * The compression ratio gain is generally small (~0.5% on first block),
* the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
&& (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
) {
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 }
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
}
size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const void* src, size_t srcSize)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
size_t ZSTD_compressBlock_btultra_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 const void* src, size_t srcSize)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 }
Gregory Szorc
zstandard: vendor python-zstandard 0.11...
r42237
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */