##// END OF EJS Templates
perf: add command to benchmark bundle reading...
perf: add command to benchmark bundle reading Upcoming commits will be refactoring bundle2 I/O code. This commit establishes a `hg perfbundleread` command that measures how long it takes to read a bundle using various mechanisms. As a baseline, here's output from an uncompressed bundle1 bundle of my Firefox repo (7,098,622,890 bytes): ! read(8k) ! wall 0.763481 comb 0.760000 user 0.160000 sys 0.600000 (best of 6) ! read(16k) ! wall 0.644512 comb 0.640000 user 0.110000 sys 0.530000 (best of 16) ! read(32k) ! wall 0.581172 comb 0.590000 user 0.060000 sys 0.530000 (best of 18) ! read(128k) ! wall 0.535183 comb 0.530000 user 0.010000 sys 0.520000 (best of 19) ! cg1 deltaiter() ! wall 0.873500 comb 0.880000 user 0.840000 sys 0.040000 (best of 12) ! cg1 getchunks() ! wall 6.283797 comb 6.270000 user 5.570000 sys 0.700000 (best of 3) ! cg1 read(8k) ! wall 1.097173 comb 1.100000 user 0.400000 sys 0.700000 (best of 10) ! cg1 read(16k) ! wall 0.810750 comb 0.800000 user 0.200000 sys 0.600000 (best of 13) ! cg1 read(32k) ! wall 0.671215 comb 0.670000 user 0.110000 sys 0.560000 (best of 15) ! cg1 read(128k) ! wall 0.597857 comb 0.600000 user 0.020000 sys 0.580000 (best of 15) And from an uncompressed bundle2 bundle (6,070,036,163 bytes): ! read(8k) ! wall 0.676997 comb 0.680000 user 0.160000 sys 0.520000 (best of 15) ! read(16k) ! wall 0.592706 comb 0.590000 user 0.080000 sys 0.510000 (best of 17) ! read(32k) ! wall 0.529395 comb 0.530000 user 0.050000 sys 0.480000 (best of 16) ! read(128k) ! wall 0.491270 comb 0.490000 user 0.010000 sys 0.480000 (best of 19) ! bundle2 forwardchunks() ! wall 2.997131 comb 2.990000 user 2.270000 sys 0.720000 (best of 4) ! bundle2 iterparts() ! wall 12.247197 comb 10.670000 user 8.170000 sys 2.500000 (best of 3) ! bundle2 part seek() ! wall 11.761675 comb 10.500000 user 8.240000 sys 2.260000 (best of 3) ! bundle2 part read(8k) ! wall 9.116163 comb 9.110000 user 8.240000 sys 0.870000 (best of 3) ! bundle2 part read(16k) ! wall 8.984362 comb 8.970000 user 8.110000 sys 0.860000 (best of 3) ! bundle2 part read(32k) ! wall 8.758364 comb 8.740000 user 7.860000 sys 0.880000 (best of 3) ! bundle2 part read(128k) ! wall 8.749040 comb 8.730000 user 7.830000 sys 0.900000 (best of 3) We already see some interesting data. Notably that bundle2 has significant overhead compared to bundle1. This matters for e.g. stream clone bundles, which can be applied at >1Gbps. Differential Revision: https://phab.mercurial-scm.org/D1385

File last commit:

r30822:b54a2984 default
r35108:e9661304 default
Show More
entropy_common.c
227 lines | 8.8 KiB | text/x-c | CLexer
/*
Common functions of New Generation Entropy library
Copyright (C) 2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
*************************************************************************** */
/* *************************************
* Dependencies
***************************************/
#include "mem.h"
#include "error_private.h" /* ERR_*, ERROR */
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
#include "huf.h"
/*-****************************************
* FSE Error Management
******************************************/
unsigned FSE_isError(size_t code) { return ERR_isError(code); }
const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
/* **************************************************************
* HUF Error Management
****************************************************************/
unsigned HUF_isError(size_t code) { return ERR_isError(code); }
const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
const BYTE* const istart = (const BYTE*) headerBuffer;
const BYTE* const iend = istart + hbSize;
const BYTE* ip = istart;
int nbBits;
int remaining;
int threshold;
U32 bitStream;
int bitCount;
unsigned charnum = 0;
int previous0 = 0;
if (hbSize < 4) return ERROR(srcSize_wrong);
bitStream = MEM_readLE32(ip);
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
bitStream >>= 4;
bitCount = 4;
*tableLogPtr = nbBits;
remaining = (1<<nbBits)+1;
threshold = 1<<nbBits;
nbBits++;
while ((remaining>1) & (charnum<=*maxSVPtr)) {
if (previous0) {
unsigned n0 = charnum;
while ((bitStream & 0xFFFF) == 0xFFFF) {
n0 += 24;
if (ip < iend-5) {
ip += 2;
bitStream = MEM_readLE32(ip) >> bitCount;
} else {
bitStream >>= 16;
bitCount += 16;
} }
while ((bitStream & 3) == 3) {
n0 += 3;
bitStream >>= 2;
bitCount += 2;
}
n0 += bitStream & 3;
bitCount += 2;
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
while (charnum < n0) normalizedCounter[charnum++] = 0;
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
ip += bitCount>>3;
bitCount &= 7;
bitStream = MEM_readLE32(ip) >> bitCount;
} else {
bitStream >>= 2;
} }
{ short const max = (short)((2*threshold-1)-remaining);
short count;
if ((bitStream & (threshold-1)) < (U32)max) {
count = (short)(bitStream & (threshold-1));
bitCount += nbBits-1;
} else {
count = (short)(bitStream & (2*threshold-1));
if (count >= threshold) count -= max;
bitCount += nbBits;
}
count--; /* extra accuracy */
remaining -= FSE_abs(count);
normalizedCounter[charnum++] = count;
previous0 = !count;
while (remaining < threshold) {
nbBits--;
threshold >>= 1;
}
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
ip += bitCount>>3;
bitCount &= 7;
} else {
bitCount -= (int)(8 * (iend - 4 - ip));
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
if (remaining != 1) return ERROR(corruption_detected);
if (bitCount > 32) return ERROR(corruption_detected);
*maxSVPtr = charnum-1;
ip += (bitCount+7)>>3;
return ip-istart;
}
/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer.
`rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
@return : size read from `src` , or an error Code .
Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
*/
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize)
{
U32 weightTotal;
const BYTE* ip = (const BYTE*) src;
size_t iSize;
size_t oSize;
if (!srcSize) return ERROR(srcSize_wrong);
iSize = ip[0];
/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
if (iSize >= 128) { /* special header */
oSize = iSize - 127;
iSize = ((oSize+1)/2);
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
if (oSize >= hwSize) return ERROR(corruption_detected);
ip += 1;
{ U32 n;
for (n=0; n<oSize; n+=2) {
huffWeight[n] = ip[n/2] >> 4;
huffWeight[n+1] = ip[n/2] & 15;
} } }
else { /* header compressed with FSE (normal case) */
FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
if (FSE_isError(oSize)) return oSize;
}
/* collect weight stats */
memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1;
} }
if (weightTotal == 0) return ERROR(corruption_detected);
/* get last non-null symbol weight (implied, total must be 2^n) */
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
*tableLogPtr = tableLog;
/* determine last weight */
{ U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++;
} }
/* check tree construction validity */
if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
/* results */
*nbSymbolsPtr = (U32)(oSize+1);
return iSize+1;
}