diff options
Diffstat (limited to 'thirdparty/zstd/compress/huf_compress.c')
| -rw-r--r-- | thirdparty/zstd/compress/huf_compress.c | 403 |
1 files changed, 234 insertions, 169 deletions
diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c index 2b3d6adc2a..29871877a7 100644 --- a/thirdparty/zstd/compress/huf_compress.c +++ b/thirdparty/zstd/compress/huf_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Huffman encoder, part of New Generation Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -29,9 +29,9 @@ #include "hist.h" #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ #include "../common/fse.h" /* header compression */ -#define HUF_STATIC_LINKING_ONLY #include "../common/huf.h" #include "../common/error_private.h" +#include "../common/bits.h" /* ZSTD_highbit32 */ /* ************************************************************** @@ -42,13 +42,67 @@ /* ************************************************************** -* Utils +* Required declarations ****************************************************************/ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + + +/* ************************************************************** +* Debug Traces +****************************************************************/ + +#if DEBUGLEVEL >= 2 + +static size_t showU32(const U32* arr, size_t size) { - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); + size_t u; + for (u=0; u<size; u++) { + RAWLOG(6, " %u", arr[u]); (void)arr; + } + RAWLOG(6, " \n"); + return size; } +static size_t HUF_getNbBits(HUF_CElt elt); + +static size_t showCTableBits(const HUF_CElt* ctable, size_t size) +{ + size_t u; + for (u=0; u<size; u++) { + RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable; + } + RAWLOG(6, " \n"); + return size; + +} + +static size_t showHNodeSymbols(const nodeElt* hnode, size_t size) +{ + size_t u; + for (u=0; u<size; u++) { + RAWLOG(6, " %u", hnode[u].byte); (void)hnode; + } + RAWLOG(6, " \n"); + return size; +} + +static size_t showHNodeBits(const nodeElt* hnode, size_t size) +{ + size_t u; + for (u=0; u<size; u++) { + RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode; + } + RAWLOG(6, " \n"); + return size; +} + +#endif + /* ******************************************************* * HUF : Huffman block compression @@ -89,7 +143,10 @@ typedef struct { S16 norm[HUF_TABLELOG_MAX+1]; } HUF_CompressWeightsWksp; -static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) +static size_t +HUF_compressWeights(void* dst, size_t dstSize, + const void* weightTable, size_t wtSize, + void* workspace, size_t workspaceSize) { BYTE* const ostart = (BYTE*) dst; BYTE* op = ostart; @@ -140,7 +197,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt) static size_t HUF_getValue(HUF_CElt elt) { - return elt & ~0xFF; + return elt & ~(size_t)0xFF; } static size_t HUF_getValueFast(HUF_CElt elt) @@ -178,6 +235,8 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, U32 n; HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); + HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp)); + /* check conditions */ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); @@ -207,16 +266,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, return ((maxSymbolValue+1)/2) + 1; } -/*! HUF_writeCTable() : - `CTable` : Huffman tree to save, using huf representation. - @return : size of saved CTable */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, - const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) -{ - HUF_WriteCTableWksp wksp; - return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); -} - size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) { @@ -272,68 +321,64 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) { - const HUF_CElt* ct = CTable + 1; + const HUF_CElt* const ct = CTable + 1; assert(symbolValue <= HUF_SYMBOLVALUE_MAX); return (U32)HUF_getNbBits(ct[symbolValue]); } -typedef struct nodeElt_s { - U32 count; - U16 parent; - BYTE byte; - BYTE nbBits; -} nodeElt; - /** * HUF_setMaxHeight(): - * Enforces maxNbBits on the Huffman tree described in huffNode. + * Try to enforce @targetNbBits on the Huffman tree described in @huffNode. * - * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts - * the tree to so that it is a valid canonical Huffman tree. + * It attempts to convert all nodes with nbBits > @targetNbBits + * to employ @targetNbBits instead. Then it adjusts the tree + * so that it remains a valid canonical Huffman tree. * * @pre The sum of the ranks of each symbol == 2^largestBits, * where largestBits == huffNode[lastNonNull].nbBits. * @post The sum of the ranks of each symbol == 2^largestBits, - * where largestBits is the return value <= maxNbBits. + * where largestBits is the return value (expected <= targetNbBits). * - * @param huffNode The Huffman tree modified in place to enforce maxNbBits. + * @param huffNode The Huffman tree modified in place to enforce targetNbBits. + * It's presumed sorted, from most frequent to rarest symbol. * @param lastNonNull The symbol with the lowest count in the Huffman tree. - * @param maxNbBits The maximum allowed number of bits, which the Huffman tree + * @param targetNbBits The allowed number of bits, which the Huffman tree * may not respect. After this function the Huffman tree will - * respect maxNbBits. - * @return The maximum number of bits of the Huffman tree after adjustment, - * necessarily no more than maxNbBits. + * respect targetNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment. */ -static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits) { const U32 largestBits = huffNode[lastNonNull].nbBits; - /* early exit : no elt > maxNbBits, so the tree is already valid. */ - if (largestBits <= maxNbBits) return largestBits; + /* early exit : no elt > targetNbBits, so the tree is already valid. */ + if (largestBits <= targetNbBits) return largestBits; + + DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits); /* there are several too large elements (at least >= 2) */ { int totalCost = 0; - const U32 baseCost = 1 << (largestBits - maxNbBits); + const U32 baseCost = 1 << (largestBits - targetNbBits); int n = (int)lastNonNull; - /* Adjust any ranks > maxNbBits to maxNbBits. + /* Adjust any ranks > targetNbBits to targetNbBits. * Compute totalCost, which is how far the sum of the ranks is * we are over 2^largestBits after adjust the offending ranks. */ - while (huffNode[n].nbBits > maxNbBits) { + while (huffNode[n].nbBits > targetNbBits) { totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); - huffNode[n].nbBits = (BYTE)maxNbBits; + huffNode[n].nbBits = (BYTE)targetNbBits; n--; } - /* n stops at huffNode[n].nbBits <= maxNbBits */ - assert(huffNode[n].nbBits <= maxNbBits); - /* n end at index of smallest symbol using < maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) --n; + /* n stops at huffNode[n].nbBits <= targetNbBits */ + assert(huffNode[n].nbBits <= targetNbBits); + /* n end at index of smallest symbol using < targetNbBits */ + while (huffNode[n].nbBits == targetNbBits) --n; - /* renorm totalCost from 2^largestBits to 2^maxNbBits + /* renorm totalCost from 2^largestBits to 2^targetNbBits * note : totalCost is necessarily a multiple of baseCost */ - assert((totalCost & (baseCost - 1)) == 0); - totalCost >>= (largestBits - maxNbBits); + assert(((U32)totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - targetNbBits); assert(totalCost > 0); /* repay normalized cost */ @@ -342,19 +387,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) /* Get pos of last (smallest = lowest cum. count) symbol per rank */ ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); - { U32 currentNbBits = maxNbBits; + { U32 currentNbBits = targetNbBits; int pos; for (pos=n ; pos >= 0; pos--) { if (huffNode[pos].nbBits >= currentNbBits) continue; - currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits-currentNbBits] = (U32)pos; + currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */ + rankLast[targetNbBits-currentNbBits] = (U32)pos; } } while (totalCost > 0) { /* Try to reduce the next power of 2 above totalCost because we * gain back half the rank. */ - U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1; for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { U32 const highPos = rankLast[nBitsToDecrease]; U32 const lowPos = rankLast[nBitsToDecrease-1]; @@ -394,7 +439,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) rankLast[nBitsToDecrease] = noSymbol; else { rankLast[nBitsToDecrease]--; - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease) rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ } } /* while (totalCost > 0) */ @@ -406,11 +451,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) * TODO. */ while (totalCost < 0) { /* Sometimes, cost correction overshoot */ - /* special case : no rank 1 symbol (using maxNbBits-1); - * let's create one from largest rank 0 (using maxNbBits). + /* special case : no rank 1 symbol (using targetNbBits-1); + * let's create one from largest rank 0 (using targetNbBits). */ if (rankLast[1] == noSymbol) { - while (huffNode[n].nbBits == maxNbBits) n--; + while (huffNode[n].nbBits == targetNbBits) n--; huffNode[n+1].nbBits--; assert(n >= 0); rankLast[1] = (U32)(n+1); @@ -424,7 +469,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) } /* repay normalized cost */ } /* there are several too large elements (at least >= 2) */ - return maxNbBits; + return targetNbBits; } typedef struct { @@ -432,7 +477,7 @@ typedef struct { U16 curr; } rankPos; -typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; +typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)]; /* Number of buckets available for HUF_sort() */ #define RANK_POSITION_TABLE_SIZE 192 @@ -451,8 +496,8 @@ typedef struct { * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. */ #define RANK_POSITION_MAX_COUNT_LOG 32 -#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ -#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ +#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */) +#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */) /* Return the appropriate bucket index for a given count. See definition of * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. @@ -460,7 +505,7 @@ typedef struct { static U32 HUF_getIndex(U32 const count) { return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) ? count - : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; + : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; } /* Helper swap function for HUF_quickSortPartition() */ @@ -583,7 +628,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy /* Sort each bucket. */ for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { - U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; + int const bucketSize = rankPosition[n].curr - rankPosition[n].base; U32 const bucketStartIdx = rankPosition[n].base; if (bucketSize > 1) { assert(bucketStartIdx < maxSymbolValue1); @@ -594,6 +639,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy assert(HUF_isSorted(huffNode, maxSymbolValue1)); } + /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). @@ -614,6 +660,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) int lowS, lowN; int nodeNb = STARTNODE; int n, nodeRoot; + DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1); /* init for parents */ nonNullRank = (int)maxSymbolValue; while(huffNode[nonNullRank].count == 0) nonNullRank--; @@ -640,6 +687,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) for (n=0; n<=nonNullRank; n++) huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1)); + return nonNullRank; } @@ -677,28 +726,36 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i CTable[0] = maxNbBits; } -size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) +size_t +HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize) { - HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); + HUF_buildCTable_wksp_tables* const wksp_tables = + (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; nodeElt* const huffNode = huffNode0+1; int nonNullRank; + HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables)); + + DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1); + /* safety checks */ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) - return ERROR(workSpace_tooSmall); + return ERROR(workSpace_tooSmall); if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(maxSymbolValue_tooLarge); + return ERROR(maxSymbolValue_tooLarge); ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); /* sort, decreasing order */ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1)); /* build tree */ nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); - /* enforce maxTableLog */ + /* determine and enforce maxTableLog */ maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ @@ -807,7 +864,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id #if DEBUGLEVEL >= 1 { size_t const nbBits = HUF_getNbBits(elt); - size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; + size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1; (void)dirtyBits; /* Middle bits are 0. */ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); @@ -887,7 +944,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC) { size_t const nbBits = bitC->bitPos[0] & 0xFF; if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ - return (bitC->ptr - bitC->startPtr) + (nbBits > 0); + return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0); } } @@ -1048,9 +1105,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, static size_t HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) + const HUF_CElt* CTable, const int flags) { - if (bmi2) { + if (flags & HUF_flags_bmi2) { return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); } return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); @@ -1061,28 +1118,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, static size_t HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) + const HUF_CElt* CTable, const int flags) { - (void)bmi2; + (void)flags; return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); } #endif -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) { - return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - -size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) -{ - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); } static size_t HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, - const HUF_CElt* CTable, int bmi2) + const HUF_CElt* CTable, int flags) { size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ const BYTE* ip = (const BYTE*) src; @@ -1096,7 +1148,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, op += 6; /* jumpTable */ assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart, (U16)cSize); op += cSize; @@ -1104,7 +1156,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+2, (U16)cSize); op += cSize; @@ -1112,7 +1164,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+4, (U16)cSize); op += cSize; @@ -1121,7 +1173,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); assert(ip <= iend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; op += cSize; } @@ -1129,14 +1181,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, return (size_t)(op-ostart); } -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -{ - return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - -size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) { - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); } typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; @@ -1144,11 +1191,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; static size_t HUF_compressCTable_internal( BYTE* const ostart, BYTE* op, BYTE* const oend, const void* src, size_t srcSize, - HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) { size_t const cSize = (nbStreams==HUF_singleStream) ? - HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : - HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags); if (HUF_isError(cSize)) { return cSize; } if (cSize==0) { return 0; } /* uncompressible */ op += cSize; @@ -1171,6 +1218,79 @@ typedef struct { #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ +unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) +{ + unsigned cardinality = 0; + unsigned i; + + for (i = 0; i < maxSymbolValue + 1; i++) { + if (count[i] != 0) cardinality += 1; + } + + return cardinality; +} + +unsigned HUF_minTableLog(unsigned symbolCardinality) +{ + U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1; + return minBitsSymbols; +} + +unsigned HUF_optimalTableLog( + unsigned maxTableLog, + size_t srcSize, + unsigned maxSymbolValue, + void* workSpace, size_t wkspSize, + HUF_CElt* table, + const unsigned* count, + int flags) +{ + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); + + if (!(flags & HUF_flags_optimalDepth)) { + /* cheap evaluation, based on FSE */ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); + } + + { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); + size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); + size_t maxBits, hSize, newSize; + const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); + const unsigned minTableLog = HUF_minTableLog(symbolCardinality); + size_t optSize = ((size_t) ~0) - 1; + unsigned optLog = maxTableLog, optLogGuess; + + DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); + + /* Search until size increases */ + for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { + DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); + maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); + if (ERR_isError(maxBits)) continue; + + if (maxBits < optLogGuess && optLogGuess > minTableLog) break; + + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); + + if (ERR_isError(hSize)) continue; + + newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; + + if (newSize > optSize + 1) { + break; + } + + if (newSize < optSize) { + optSize = newSize; + optLog = optLogGuess; + } + } + assert(optLog <= HUF_TABLELOG_MAX); + return optLog; + } +} + /* HUF_compress_internal() : * `workSpace_align4` must be aligned on 4-bytes boundaries, * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ @@ -1180,14 +1300,14 @@ HUF_compress_internal (void* dst, size_t dstSize, unsigned maxSymbolValue, unsigned huffLog, HUF_nbStreams_e nbStreams, void* workSpace, size_t wkspSize, - HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, - const int bmi2, unsigned suspectUncompressible) + HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) { HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; + DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); /* checks & inits */ @@ -1201,16 +1321,17 @@ HUF_compress_internal (void* dst, size_t dstSize, if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; /* Heuristic : If old table is valid, use it for small inputs */ - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) { return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, oldHufTable, bmi2); + nbStreams, oldHufTable, flags); } /* If uncompressible data is suspected, do a smaller sampling first */ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); - if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { + if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { size_t largestTotal = 0; + DEBUGLOG(5, "input suspected incompressible : sampling to check"); { unsigned maxSymbolValueBegin = maxSymbolValue; CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); largestTotal += largestBegin; @@ -1227,6 +1348,7 @@ HUF_compress_internal (void* dst, size_t dstSize, if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ } + DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); /* Check validity of previous table */ if ( repeat @@ -1235,19 +1357,20 @@ HUF_compress_internal (void* dst, size_t dstSize, *repeat = HUF_repeat_none; } /* Heuristic : use existing table for small inputs */ - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) { return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, oldHufTable, bmi2); + nbStreams, oldHufTable, flags); } /* Build Huffman Tree */ - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags); { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, maxSymbolValue, huffLog, &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); CHECK_F(maxBits); huffLog = (U32)maxBits; + DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1)); } /* Zero unused symbols in CTable, so we can check it for validity */ { @@ -1266,7 +1389,7 @@ HUF_compress_internal (void* dst, size_t dstSize, if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, oldHufTable, bmi2); + nbStreams, oldHufTable, flags); } } /* Use the new huffman table */ @@ -1278,46 +1401,20 @@ HUF_compress_internal (void* dst, size_t dstSize, } return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, table->CTable, bmi2); -} - - -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/, 0); + nbStreams, table->CTable, flags); } size_t HUF_compress1X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, - int bmi2, unsigned suspectUncompressible) + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) { + DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize); return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_singleStream, workSpace, wkspSize, hufTable, - repeat, preferRepeat, bmi2, suspectUncompressible); -} - -/* HUF_compress4X_repeat(): - * compress input using 4 streams. - * provide workspace to generate compression tables */ -size_t HUF_compress4X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/, 0); + repeat, flags); } /* HUF_compress4X_repeat(): @@ -1328,43 +1425,11 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) { + DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize); return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_fourStreams, workSpace, wkspSize, - hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); + hufTable, repeat, flags); } - -#ifndef ZSTD_NO_UNUSED_FUNCTIONS -/** HUF_buildCTable() : - * @return : maxNbBits - * Note : count is used before tree is written, so they can safely overlap - */ -size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) -{ - HUF_buildCTable_wksp_tables workspace; - return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); -} - -size_t HUF_compress1X (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - U64 workSpace[HUF_WORKSPACE_SIZE_U64]; - return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} - -size_t HUF_compress2 (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - U64 workSpace[HUF_WORKSPACE_SIZE_U64]; - return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} - -size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); -} -#endif |
