entropy_common.c
236 lines
| 9.3 KiB
| text/x-c
|
CLexer
Gregory Szorc
|
r30434 | /* | ||
Common functions of New Generation Entropy library | ||||
Copyright (C) 2016, Yann Collet. | ||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||||
Redistribution and use in source and binary forms, with or without | ||||
modification, are permitted provided that the following conditions are | ||||
met: | ||||
* Redistributions of source code must retain the above copyright | ||||
notice, this list of conditions and the following disclaimer. | ||||
* Redistributions in binary form must reproduce the above | ||||
copyright notice, this list of conditions and the following disclaimer | ||||
in the documentation and/or other materials provided with the | ||||
distribution. | ||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
You can contact the author at : | ||||
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||||
- Public forum : https://groups.google.com/forum/#!forum/lz4c | ||||
*************************************************************************** */ | ||||
/* ************************************* | ||||
* Dependencies | ||||
***************************************/ | ||||
#include "mem.h" | ||||
#include "error_private.h" /* ERR_*, ERROR */ | ||||
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ | ||||
#include "fse.h" | ||||
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ | ||||
#include "huf.h" | ||||
Gregory Szorc
|
r37513 | /*=== Version ===*/ | ||
unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } | ||||
/*=== Error Management ===*/ | ||||
Gregory Szorc
|
r30434 | unsigned FSE_isError(size_t code) { return ERR_isError(code); } | ||
const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } | ||||
unsigned HUF_isError(size_t code) { return ERR_isError(code); } | ||||
const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } | ||||
/*-************************************************************** | ||||
* FSE NCount encoding-decoding | ||||
****************************************************************/ | ||||
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | ||||
const void* headerBuffer, size_t hbSize) | ||||
{ | ||||
const BYTE* const istart = (const BYTE*) headerBuffer; | ||||
const BYTE* const iend = istart + hbSize; | ||||
const BYTE* ip = istart; | ||||
int nbBits; | ||||
int remaining; | ||||
int threshold; | ||||
U32 bitStream; | ||||
int bitCount; | ||||
unsigned charnum = 0; | ||||
int previous0 = 0; | ||||
Gregory Szorc
|
r40157 | if (hbSize < 4) { | ||
/* This function only works when hbSize >= 4 */ | ||||
char buffer[4]; | ||||
memset(buffer, 0, sizeof(buffer)); | ||||
memcpy(buffer, headerBuffer, hbSize); | ||||
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, | ||||
buffer, sizeof(buffer)); | ||||
if (FSE_isError(countSize)) return countSize; | ||||
if (countSize > hbSize) return ERROR(corruption_detected); | ||||
return countSize; | ||||
} } | ||||
assert(hbSize >= 4); | ||||
/* init */ | ||||
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ | ||||
Gregory Szorc
|
r30434 | bitStream = MEM_readLE32(ip); | ||
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ | ||||
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); | ||||
bitStream >>= 4; | ||||
bitCount = 4; | ||||
*tableLogPtr = nbBits; | ||||
remaining = (1<<nbBits)+1; | ||||
threshold = 1<<nbBits; | ||||
nbBits++; | ||||
while ((remaining>1) & (charnum<=*maxSVPtr)) { | ||||
if (previous0) { | ||||
unsigned n0 = charnum; | ||||
while ((bitStream & 0xFFFF) == 0xFFFF) { | ||||
n0 += 24; | ||||
if (ip < iend-5) { | ||||
ip += 2; | ||||
bitStream = MEM_readLE32(ip) >> bitCount; | ||||
} else { | ||||
bitStream >>= 16; | ||||
bitCount += 16; | ||||
} } | ||||
while ((bitStream & 3) == 3) { | ||||
n0 += 3; | ||||
bitStream >>= 2; | ||||
bitCount += 2; | ||||
} | ||||
n0 += bitStream & 3; | ||||
bitCount += 2; | ||||
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); | ||||
while (charnum < n0) normalizedCounter[charnum++] = 0; | ||||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | ||||
Gregory Szorc
|
r40157 | assert((bitCount >> 3) <= 3); /* For first condition to work */ | ||
Gregory Szorc
|
r30434 | ip += bitCount>>3; | ||
bitCount &= 7; | ||||
bitStream = MEM_readLE32(ip) >> bitCount; | ||||
} else { | ||||
bitStream >>= 2; | ||||
} } | ||||
Gregory Szorc
|
r37513 | { int const max = (2*threshold-1) - remaining; | ||
int count; | ||||
Gregory Szorc
|
r30434 | |||
if ((bitStream & (threshold-1)) < (U32)max) { | ||||
Gregory Szorc
|
r37513 | count = bitStream & (threshold-1); | ||
bitCount += nbBits-1; | ||||
Gregory Szorc
|
r30434 | } else { | ||
Gregory Szorc
|
r37513 | count = bitStream & (2*threshold-1); | ||
Gregory Szorc
|
r30434 | if (count >= threshold) count -= max; | ||
Gregory Szorc
|
r37513 | bitCount += nbBits; | ||
Gregory Szorc
|
r30434 | } | ||
count--; /* extra accuracy */ | ||||
Gregory Szorc
|
r37513 | remaining -= count < 0 ? -count : count; /* -1 means +1 */ | ||
normalizedCounter[charnum++] = (short)count; | ||||
Gregory Szorc
|
r30434 | previous0 = !count; | ||
while (remaining < threshold) { | ||||
nbBits--; | ||||
threshold >>= 1; | ||||
} | ||||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | ||||
ip += bitCount>>3; | ||||
bitCount &= 7; | ||||
} else { | ||||
bitCount -= (int)(8 * (iend - 4 - ip)); | ||||
ip = iend - 4; | ||||
} | ||||
bitStream = MEM_readLE32(ip) >> (bitCount & 31); | ||||
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ | ||||
if (remaining != 1) return ERROR(corruption_detected); | ||||
if (bitCount > 32) return ERROR(corruption_detected); | ||||
*maxSVPtr = charnum-1; | ||||
ip += (bitCount+7)>>3; | ||||
return ip-istart; | ||||
} | ||||
/*! HUF_readStats() : | ||||
Read compact Huffman tree, saved by HUF_writeCTable(). | ||||
`huffWeight` is destination buffer. | ||||
Gregory Szorc
|
r30822 | `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. | ||
Gregory Szorc
|
r30434 | @return : size read from `src` , or an error Code . | ||
Note : Needed by HUF_readCTable() and HUF_readDTableX?() . | ||||
*/ | ||||
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | ||||
U32* nbSymbolsPtr, U32* tableLogPtr, | ||||
const void* src, size_t srcSize) | ||||
{ | ||||
U32 weightTotal; | ||||
const BYTE* ip = (const BYTE*) src; | ||||
size_t iSize; | ||||
size_t oSize; | ||||
if (!srcSize) return ERROR(srcSize_wrong); | ||||
iSize = ip[0]; | ||||
/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ | ||||
if (iSize >= 128) { /* special header */ | ||||
oSize = iSize - 127; | ||||
iSize = ((oSize+1)/2); | ||||
if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | ||||
if (oSize >= hwSize) return ERROR(corruption_detected); | ||||
ip += 1; | ||||
{ U32 n; | ||||
for (n=0; n<oSize; n+=2) { | ||||
huffWeight[n] = ip[n/2] >> 4; | ||||
huffWeight[n+1] = ip[n/2] & 15; | ||||
} } } | ||||
else { /* header compressed with FSE (normal case) */ | ||||
Gregory Szorc
|
r30822 | FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ | ||
Gregory Szorc
|
r30434 | if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | ||
Gregory Szorc
|
r30822 | oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ | ||
Gregory Szorc
|
r30434 | if (FSE_isError(oSize)) return oSize; | ||
} | ||||
/* collect weight stats */ | ||||
Gregory Szorc
|
r30822 | memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); | ||
Gregory Szorc
|
r30434 | weightTotal = 0; | ||
{ U32 n; for (n=0; n<oSize; n++) { | ||||
Gregory Szorc
|
r30822 | if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); | ||
Gregory Szorc
|
r30434 | rankStats[huffWeight[n]]++; | ||
weightTotal += (1 << huffWeight[n]) >> 1; | ||||
} } | ||||
if (weightTotal == 0) return ERROR(corruption_detected); | ||||
/* get last non-null symbol weight (implied, total must be 2^n) */ | ||||
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1; | ||||
Gregory Szorc
|
r30822 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); | ||
Gregory Szorc
|
r30434 | *tableLogPtr = tableLog; | ||
/* determine last weight */ | ||||
{ U32 const total = 1 << tableLog; | ||||
U32 const rest = total - weightTotal; | ||||
U32 const verif = 1 << BIT_highbit32(rest); | ||||
U32 const lastWeight = BIT_highbit32(rest) + 1; | ||||
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ | ||||
huffWeight[oSize] = (BYTE)lastWeight; | ||||
rankStats[lastWeight]++; | ||||
} } | ||||
/* check tree construction validity */ | ||||
if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ | ||||
/* results */ | ||||
*nbSymbolsPtr = (U32)(oSize+1); | ||||
return iSize+1; | ||||
} | ||||