##// END OF EJS Templates
convert: use raw string for regular expressions...
convert: use raw string for regular expressions This avoids a SyntaxWarning on Python 3.8. Differential Revision: https://phab.mercurial-scm.org/D5821

File last commit:

r40157:73fef626 default
r41678:fc09aafd default
Show More
fse_compress.c
721 lines | 26.9 KiB | text/x-c | CLexer
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 /* ******************************************************************
FSE : Finite State Entropy encoder
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 Copyright (C) 2013-present, Yann Collet.
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
/* **************************************************************
* Includes
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 #include "compiler.h"
#include "mem.h" /* U32, U16, etc. */
#include "debug.h" /* assert, DEBUGLOG */
#include "hist.h" /* HIST_count_wksp */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 #include "bitstream.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 #include "error_private.h"
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* **************************************************************
* Error Management
****************************************************************/
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 #define FSE_isError ERR_isError
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* **************************************************************
* Templates
****************************************************************/
/*
designed to be included
for type-specific functions (template emulation in C)
Objective is to write these functions only once, for improved maintenance
*/
/* safety checks */
#ifndef FSE_FUNCTION_EXTENSION
# error "FSE_FUNCTION_EXTENSION must be defined"
#endif
#ifndef FSE_FUNCTION_TYPE
# error "FSE_FUNCTION_TYPE must be defined"
#endif
/* Function names */
#define FSE_CAT(X,Y) X##Y
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
/* Function templates */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
*/
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize)
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 {
U32 const tableSize = 1 << tableLog;
U32 const tableMask = tableSize - 1;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize);
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 U32 highThreshold = tableSize-1;
/* CTable header */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 tableU16[-2] = (U16) tableLog;
tableU16[-1] = (U16) maxSymbolValue;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 assert(tableLog < 16); /* required for threshold strategy to work */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* For explanations on how to distribute symbol values over the table :
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
#endif
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* symbol start positions */
{ U32 u;
cumul[0] = 0;
for (u=1; u<=maxSymbolValue+1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else {
cumul[u] = cumul[u-1] + normalizedCounter[u-1];
} }
cumul[maxSymbolValue+1] = tableSize+1;
}
/* Spread symbols */
{ U32 position = 0;
U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
int nbOccurences;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 int const freq = normalizedCounter[symbol];
for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
position = (position + step) & tableMask;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 } }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 assert(position==0); /* Must have initialized all positions */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 }
/* Build table */
{ U32 u; for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */
} }
/* Build Symbol Transformation Table */
{ unsigned total = 0;
unsigned s;
for (s=0; s<=maxSymbolValue; s++) {
switch (normalizedCounter[s])
{
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 case 0:
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
break;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
case -1:
case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
symbolTT[s].deltaFindState = total - 1;
total ++;
break;
default :
{
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = total - normalizedCounter[s];
total += normalizedCounter[s];
} } } }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 #if 0 /* debug : symbol costs */
DEBUGLOG(5, "\n --- table statistics : ");
{ U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
}
}
#endif
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 return 0;
}
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
}
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
#ifndef FSE_COMMONDEFS_ONLY
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 /*-**************************************************************
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 * FSE NCount encoding
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 ****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 static size_t
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
unsigned writeIsSafe)
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 {
BYTE* const ostart = (BYTE*) header;
BYTE* out = ostart;
BYTE* const oend = ostart + headerBufferSize;
int nbBits;
const int tableSize = 1 << tableLog;
int remaining;
int threshold;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 U32 bitStream = 0;
int bitCount = 0;
unsigned symbol = 0;
unsigned const alphabetSize = maxSymbolValue + 1;
int previousIs0 = 0;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* Table Size */
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
bitCount += 4;
/* Init */
remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize;
nbBits = tableLog+1;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) {
unsigned start = symbol;
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
if (symbol == alphabetSize) break; /* incorrect distribution */
while (symbol >= start+24) {
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 start+=24;
bitStream += 0xFFFFU << bitCount;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ((!writeIsSafe) && (out > oend-2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 out[0] = (BYTE) bitStream;
out[1] = (BYTE)(bitStream>>8);
out+=2;
bitStream>>=16;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 while (symbol >= start+3) {
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 start+=3;
bitStream += 3 << bitCount;
bitCount += 2;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 bitStream += (symbol-start) << bitCount;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 bitCount += 2;
if (bitCount>16) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ((!writeIsSafe) && (out > oend - 2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out += 2;
bitStream >>= 16;
bitCount -= 16;
} }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 { int count = normalizedCounter[symbol++];
int const max = (2*threshold-1) - remaining;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 remaining -= count < 0 ? -count : count;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 count++; /* +1 for extra accuracy */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 bitStream += count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 previousIs0 = (count==1);
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (remaining<1) return ERROR(GENERIC);
while (remaining<threshold) { nbBits--; threshold>>=1; }
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 }
if (bitCount>16) {
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ((!writeIsSafe) && (out > oend - 2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out += 2;
bitStream >>= 16;
bitCount -= 16;
} }
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (remaining != 1)
return ERROR(GENERIC); /* incorrect normalized distribution */
assert(symbol <= alphabetSize);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 /* flush remaining bitStream */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if ((!writeIsSafe) && (out > oend - 2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
return (out-ostart);
}
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 {
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 }
/*-**************************************************************
* FSE Compression Code
****************************************************************/
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
size_t size;
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 }
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 U32 tableLog = maxTableLog;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
return tableLog;
}
unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
{
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
}
/* Secondary normalization method.
To be used when primary method fails. */
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
{
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 short const NOT_YET_ASSIGNED = -2;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 U32 s;
U32 distributed = 0;
U32 ToDistribute;
/* Init */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 U32 const lowThreshold = (U32)(total >> tableLog);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
for (s=0; s<=maxSymbolValue; s++) {
if (count[s] == 0) {
norm[s]=0;
continue;
}
if (count[s] <= lowThreshold) {
norm[s] = -1;
distributed++;
total -= count[s];
continue;
}
if (count[s] <= lowOne) {
norm[s] = 1;
distributed++;
total -= count[s];
continue;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513
norm[s]=NOT_YET_ASSIGNED;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 }
ToDistribute = (1 << tableLog) - distributed;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 if (ToDistribute == 0)
return 0;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 if ((total / ToDistribute) > lowOne) {
/* risk of rounding to zero */
lowOne = (U32)((total * 3) / (ToDistribute * 2));
for (s=0; s<=maxSymbolValue; s++) {
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 norm[s] = 1;
distributed++;
total -= count[s];
continue;
} }
ToDistribute = (1 << tableLog) - distributed;
}
if (distributed == maxSymbolValue+1) {
/* all values are pretty poor;
probably incompressible data (should have already been detected);
find max, then give all remaining points to max */
U32 maxV = 0, maxC = 0;
for (s=0; s<=maxSymbolValue; s++)
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (count[s] > maxC) { maxV=s; maxC=count[s]; }
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 norm[maxV] += (short)ToDistribute;
return 0;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (total == 0) {
/* all of the symbols were low enough for the lowOne or lowThreshold */
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
if (norm[s] > 0) { ToDistribute--; norm[s]++; }
return 0;
}
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 { U64 const vStepLog = 62 - tableLog;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 U64 const mid = (1ULL << (vStepLog-1)) - 1;
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
U64 tmpTotal = mid;
for (s=0; s<=maxSymbolValue; s++) {
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (norm[s]==NOT_YET_ASSIGNED) {
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 U64 const end = tmpTotal + (count[s] * rStep);
U32 const sStart = (U32)(tmpTotal >> vStepLog);
U32 const sEnd = (U32)(end >> vStepLog);
U32 const weight = sEnd - sStart;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 if (weight < 1)
return ERROR(GENERIC);
norm[s] = (short)weight;
tmpTotal = end;
} } }
return 0;
}
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t total,
unsigned maxSymbolValue)
{
/* Sanity checks */
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 U64 const scale = 62 - tableLog;
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
U64 const vStep = 1ULL<<(scale-20);
int stillToDistribute = 1<<tableLog;
unsigned s;
unsigned largest=0;
short largestP=0;
U32 lowThreshold = (U32)(total >> tableLog);
for (s=0; s<=maxSymbolValue; s++) {
if (count[s] == total) return 0; /* rle special case */
if (count[s] == 0) { normalizedCounter[s]=0; continue; }
if (count[s] <= lowThreshold) {
normalizedCounter[s] = -1;
stillToDistribute--;
} else {
short proba = (short)((count[s]*step) >> scale);
if (proba<8) {
U64 restToBeat = vStep * rtbTable[proba];
proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
}
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 if (proba > largestP) { largestP=proba; largest=s; }
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 normalizedCounter[s] = proba;
stillToDistribute -= proba;
} }
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
/* corner case, need another normalization method */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 if (FSE_isError(errorCode)) return errorCode;
}
else normalizedCounter[largest] += (short)stillToDistribute;
}
#if 0
{ /* Print Table (debug) */
U32 s;
U32 nTotal = 0;
for (s=0; s<=maxSymbolValue; s++)
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 for (s=0; s<=maxSymbolValue; s++)
nTotal += abs(normalizedCounter[s]);
if (nTotal != (1U<<tableLog))
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 getchar();
}
#endif
return tableLog;
}
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
{
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSymbolValue = tableMask;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* header */
tableU16[-2] = (U16) nbBits;
tableU16[-1] = (U16) maxSymbolValue;
/* Build table */
for (s=0; s<tableSize; s++)
tableU16[s] = (U16)(tableSize + s);
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 /* fake FSE_CTable, for rle input (always same symbol) */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
void* ptr = ct;
U16* tableU16 = ( (U16*) ptr) + 2;
void* FSCTptr = (U32*)ptr + 2;
FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
/* header */
tableU16[-2] = (U16) 0;
tableU16[-1] = (U16) symbolValue;
/* Build table */
tableU16[0] = 0;
tableU16[1] = 0; /* just in case */
/* Build Symbol Transformation Table */
symbolTT[symbolValue].deltaNbBits = 0;
symbolTT[symbolValue].deltaFindState = 0;
return 0;
}
static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
const void* src, size_t srcSize,
const FSE_CTable* ct, const unsigned fast)
{
const BYTE* const istart = (const BYTE*) src;
const BYTE* const iend = istart + srcSize;
const BYTE* ip=iend;
BIT_CStream_t bitC;
FSE_CState_t CState1, CState2;
/* init */
if (srcSize <= 2) return 0;
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
if (srcSize & 1) {
FSE_initCState2(&CState1, ct, *--ip);
FSE_initCState2(&CState2, ct, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
FSE_FLUSHBITS(&bitC);
} else {
FSE_initCState2(&CState2, ct, *--ip);
FSE_initCState2(&CState1, ct, *--ip);
}
/* join to mod 4 */
srcSize -= 2;
if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
FSE_encodeSymbol(&bitC, &CState2, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
FSE_FLUSHBITS(&bitC);
}
/* 2 or 4 encoding per loop */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 while ( ip>istart ) {
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
FSE_encodeSymbol(&bitC, &CState2, *--ip);
if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
FSE_FLUSHBITS(&bitC);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */
FSE_encodeSymbol(&bitC, &CState2, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
}
FSE_FLUSHBITS(&bitC);
}
FSE_flushCState(&bitC, &CState2);
FSE_flushCState(&bitC, &CState1);
return BIT_closeCStream(&bitC);
}
size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
const void* src, size_t srcSize,
const FSE_CTable* ct)
{
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
if (fast)
return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
else
return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
}
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
Gregory Szorc
zstandard: vendor python-zstandard 0.9.0...
r37513 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` size must be `(1<<tableLog)`.
*/
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 {
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
U32 count[FSE_MAX_SYMBOL_VALUE+1];
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 FSE_CTable* CTable = (FSE_CTable*)workSpace;
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
void* scratchBuffer = (void*)(CTable + CTableSize);
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* init conditions */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
if (srcSize <= 1) return 0; /* Not compressible */
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
}
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* Write table description header */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
op += nc_err;
}
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* Compress */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
/* check compressibility */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434
return op-ostart;
}
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 typedef struct {
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
} fseWkspMax_t;
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 {
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 fseWkspMax_t scratchBuffer;
Gregory Szorc
zstandard: vendor python-zstandard 0.10.1...
r40157 DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
}
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
Gregory Szorc
zstd: vendor zstd 1.1.1...
r30434 }
#endif /* FSE_COMMONDEFS_ONLY */