summaryrefslogtreecommitdiffstats
path: root/thirdparty/zstd/compress
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/zstd/compress')
-rw-r--r--thirdparty/zstd/compress/fse_compress.c15
-rw-r--r--thirdparty/zstd/compress/huf_compress.c79
-rw-r--r--thirdparty/zstd/compress/zstd_compress.c443
-rw-r--r--thirdparty/zstd/compress/zstd_compress_internal.h56
-rw-r--r--thirdparty/zstd/compress/zstd_compress_superblock.c337
-rw-r--r--thirdparty/zstd/compress/zstd_cwksp.h32
-rw-r--r--thirdparty/zstd/compress/zstd_double_fast.c22
-rw-r--r--thirdparty/zstd/compress/zstd_double_fast.h11
-rw-r--r--thirdparty/zstd/compress/zstd_fast.c20
-rw-r--r--thirdparty/zstd/compress/zstd_lazy.c216
-rw-r--r--thirdparty/zstd/compress/zstd_lazy.h131
-rw-r--r--thirdparty/zstd/compress/zstd_ldm.c10
-rw-r--r--thirdparty/zstd/compress/zstd_opt.c328
-rw-r--r--thirdparty/zstd/compress/zstd_opt.h38
-rw-r--r--thirdparty/zstd/compress/zstdmt_compress.c173
15 files changed, 1239 insertions, 672 deletions
diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c
index 5d3770808d..1ce3cf16ac 100644
--- a/thirdparty/zstd/compress/fse_compress.c
+++ b/thirdparty/zstd/compress/fse_compress.c
@@ -25,7 +25,7 @@
#include "../common/error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
-#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
@@ -225,8 +225,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */) / 8)
- + 1 /* round up to whole nb bytes */
- + 2 /* additional two bytes for bitstream flush */;
+ + 1 /* round up to whole nb bytes */
+ + 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
@@ -255,7 +255,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
/* Init */
remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize;
- nbBits = tableLog+1;
+ nbBits = (int)tableLog+1;
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) {
@@ -274,7 +274,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
}
while (symbol >= start+3) {
start+=3;
- bitStream += 3 << bitCount;
+ bitStream += 3U << bitCount;
bitCount += 2;
}
bitStream += (symbol-start) << bitCount;
@@ -294,7 +294,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
count++; /* +1 for extra accuracy */
if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
- bitStream += count << bitCount;
+ bitStream += (U32)count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
previousIs0 = (count==1);
@@ -322,7 +322,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
- return (out-ostart);
+ assert(out >= ostart);
+ return (size_t)(out-ostart);
}
diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c
index 29871877a7..ea00072320 100644
--- a/thirdparty/zstd/compress/huf_compress.c
+++ b/thirdparty/zstd/compress/huf_compress.c
@@ -220,6 +220,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value)
}
}
+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
+{
+ HUF_CTableHeader header;
+ ZSTD_memcpy(&header, ctable, sizeof(header));
+ return header;
+}
+
+static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
+{
+ HUF_CTableHeader header;
+ HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
+ ZSTD_memset(&header, 0, sizeof(header));
+ assert(tableLog < 256);
+ header.tableLog = (BYTE)tableLog;
+ assert(maxSymbolValue < 256);
+ header.maxSymbolValue = (BYTE)maxSymbolValue;
+ ZSTD_memcpy(ctable, &header, sizeof(header));
+}
+
typedef struct {
HUF_CompressWeightsWksp wksp;
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
@@ -237,6 +256,9 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
+ assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
+ assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
+
/* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@@ -283,7 +305,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
- CTable[0] = tableLog;
+ *maxSymbolValuePtr = nbSymbols - 1;
+
+ HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
/* Prepare base value per rank */
{ U32 n, nextRankStart = 0;
@@ -315,7 +339,6 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
}
- *maxSymbolValuePtr = nbSymbols - 1;
return readSize;
}
@@ -323,6 +346,8 @@ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{
const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+ if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
+ return 0;
return (U32)HUF_getNbBits(ct[symbolValue]);
}
@@ -723,7 +748,8 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
for (n=0; n<alphabetSize; n++)
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
- CTable[0] = maxNbBits;
+
+ HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
}
size_t
@@ -776,13 +802,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
}
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
- HUF_CElt const* ct = CTable + 1;
- int bad = 0;
- int s;
- for (s = 0; s <= (int)maxSymbolValue; ++s) {
- bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
- }
- return !bad;
+ HUF_CTableHeader header = HUF_readCTableHeader(CTable);
+ HUF_CElt const* ct = CTable + 1;
+ int bad = 0;
+ int s;
+
+ assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
+
+ if (header.maxSymbolValue < maxSymbolValue)
+ return 0;
+
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
+ }
+ return !bad;
}
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
@@ -1024,17 +1057,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
- U32 const tableLog = (U32)CTable[0];
+ U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
HUF_CElt const* ct = CTable + 1;
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
- BYTE* op = ostart;
HUF_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
- { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
+ { BYTE* op = ostart;
+ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; }
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
@@ -1255,7 +1288,7 @@ unsigned HUF_optimalTableLog(
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
- size_t maxBits, hSize, newSize;
+ size_t hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
size_t optSize = ((size_t) ~0) - 1;
@@ -1266,12 +1299,14 @@ unsigned HUF_optimalTableLog(
/* Search until size increases */
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
- maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
- if (ERR_isError(maxBits)) continue;
- if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
+ { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
+ if (ERR_isError(maxBits)) continue;
+
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
- hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+ }
if (ERR_isError(hSize)) continue;
@@ -1372,12 +1407,6 @@ HUF_compress_internal (void* dst, size_t dstSize,
huffLog = (U32)maxBits;
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
}
- /* Zero unused symbols in CTable, so we can check it for validity */
- {
- size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
- size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
- ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
- }
/* Write table description header */
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
@@ -1420,7 +1449,7 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* consider skipping quickly
- * re-use an existing huffman compression table */
+ * reuse an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c
index d6133e70b4..9284e2a480 100644
--- a/thirdparty/zstd/compress/zstd_compress.c
+++ b/thirdparty/zstd/compress/zstd_compress.c
@@ -178,6 +178,7 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
{
+ DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx);
if (cctx==NULL) return 0; /* support free on NULL */
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"not compatible with static CCtx");
@@ -649,10 +650,11 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
return 0;
}
-#define BOUNDCHECK(cParam, val) { \
- RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
- parameter_outOfBound, "Param out of bounds"); \
-}
+#define BOUNDCHECK(cParam, val) \
+ do { \
+ RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+ parameter_outOfBound, "Param out of bounds"); \
+ } while (0)
static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
@@ -868,7 +870,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
CCtxParams->nbWorkers = value;
- return CCtxParams->nbWorkers;
+ return (size_t)(CCtxParams->nbWorkers);
#endif
case ZSTD_c_jobSize :
@@ -892,7 +894,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->overlapLog = value;
- return CCtxParams->overlapLog;
+ return (size_t)CCtxParams->overlapLog;
#endif
case ZSTD_c_rsyncable :
@@ -902,7 +904,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->rsyncable = value;
- return CCtxParams->rsyncable;
+ return (size_t)CCtxParams->rsyncable;
#endif
case ZSTD_c_enableDedicatedDictSearch :
@@ -939,8 +941,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
return CCtxParams->ldmParams.hashRateLog;
case ZSTD_c_targetCBlockSize :
- if (value!=0) /* 0 ==> default */
+ if (value!=0) { /* 0 ==> default */
+ value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+ }
CCtxParams->targetCBlockSize = (U32)value;
return CCtxParams->targetCBlockSize;
@@ -968,7 +972,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value;
- return CCtxParams->validateSequences;
+ return (size_t)CCtxParams->validateSequences;
case ZSTD_c_useBlockSplitter:
BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
@@ -983,7 +987,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_deterministicRefPrefix:
BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
CCtxParams->deterministicRefPrefix = !!value;
- return CCtxParams->deterministicRefPrefix;
+ return (size_t)CCtxParams->deterministicRefPrefix;
case ZSTD_c_prefetchCDictTables:
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
@@ -993,7 +997,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_enableSeqProducerFallback:
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
CCtxParams->enableMatchFinderFallback = value;
- return CCtxParams->enableMatchFinderFallback;
+ return (size_t)CCtxParams->enableMatchFinderFallback;
case ZSTD_c_maxBlockSize:
if (value!=0) /* 0 ==> default */
@@ -1363,7 +1367,6 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Reset parameters is only possible during init stage.");
ZSTD_clearAllDicts(cctx);
- ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
}
return 0;
@@ -1391,11 +1394,12 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams)
{
-# define CLAMP_TYPE(cParam, val, type) { \
- ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
- if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
- else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
- }
+# define CLAMP_TYPE(cParam, val, type) \
+ do { \
+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
+ if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
+ else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+ } while (0)
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
CLAMP(ZSTD_c_windowLog, cParams.windowLog);
CLAMP(ZSTD_c_chainLog, cParams.chainLog);
@@ -1467,6 +1471,48 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
assert(ZSTD_checkCParams(cPar)==0);
+ /* Cascade the selected strategy down to the next-highest one built into
+ * this binary. */
+#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_btultra2) {
+ cPar.strategy = ZSTD_btultra;
+ }
+ if (cPar.strategy == ZSTD_btultra) {
+ cPar.strategy = ZSTD_btopt;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_btopt) {
+ cPar.strategy = ZSTD_btlazy2;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_btlazy2) {
+ cPar.strategy = ZSTD_lazy2;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_lazy2) {
+ cPar.strategy = ZSTD_lazy;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_lazy) {
+ cPar.strategy = ZSTD_greedy;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_greedy) {
+ cPar.strategy = ZSTD_dfast;
+ }
+#endif
+#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+ if (cPar.strategy == ZSTD_dfast) {
+ cPar.strategy = ZSTD_fast;
+ cPar.targetLength = 0;
+ }
+#endif
+
switch (mode) {
case ZSTD_cpm_unknown:
case ZSTD_cpm_noAttachDict:
@@ -1617,8 +1663,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
- + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
- + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+ + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+ + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0;
@@ -1707,7 +1753,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
* be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+ &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
}
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
@@ -1768,7 +1814,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
- ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+ ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
}
}
@@ -2001,8 +2047,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
- ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
- ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+ ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
+ ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
}
ms->cParams = *cParams;
@@ -2074,7 +2120,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(params->maxBlockSize, windowSize);
- size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params));
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
: 0;
@@ -2091,8 +2137,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
- buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
- int resizeWorkspace;
+ buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
@@ -2101,7 +2146,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
{ /* Check if workspace is large enough, alloc a new one if needed */
int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
- resizeWorkspace = workspaceTooSmall || workspaceWasteful;
+ int resizeWorkspace = workspaceTooSmall || workspaceWasteful;
DEBUGLOG(4, "Need %zu B workspace", neededSpace);
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
@@ -2176,10 +2221,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
}
/* reserve space for block-level external sequences */
- if (params->useSequenceProducer) {
+ if (ZSTD_hasExtSeqProd(params)) {
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
- zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
- zc->externalMatchCtx.seqBuffer =
+ zc->extSeqBufCapacity = maxNbExternalSeq;
+ zc->extSeqBuf =
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
}
@@ -2564,7 +2609,7 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
assert(size < (1U<<31)); /* can be casted to int */
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
- /* To validate that the table re-use logic is sound, and that we don't
+ /* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty.
*
@@ -2992,40 +3037,43 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast,
- ZSTD_compressBlock_doubleFast,
- ZSTD_compressBlock_greedy,
- ZSTD_compressBlock_lazy,
- ZSTD_compressBlock_lazy2,
- ZSTD_compressBlock_btlazy2,
- ZSTD_compressBlock_btopt,
- ZSTD_compressBlock_btultra,
- ZSTD_compressBlock_btultra2 },
+ ZSTD_COMPRESSBLOCK_DOUBLEFAST,
+ ZSTD_COMPRESSBLOCK_GREEDY,
+ ZSTD_COMPRESSBLOCK_LAZY,
+ ZSTD_COMPRESSBLOCK_LAZY2,
+ ZSTD_COMPRESSBLOCK_BTLAZY2,
+ ZSTD_COMPRESSBLOCK_BTOPT,
+ ZSTD_COMPRESSBLOCK_BTULTRA,
+ ZSTD_COMPRESSBLOCK_BTULTRA2
+ },
{ ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict,
- ZSTD_compressBlock_doubleFast_extDict,
- ZSTD_compressBlock_greedy_extDict,
- ZSTD_compressBlock_lazy_extDict,
- ZSTD_compressBlock_lazy2_extDict,
- ZSTD_compressBlock_btlazy2_extDict,
- ZSTD_compressBlock_btopt_extDict,
- ZSTD_compressBlock_btultra_extDict,
- ZSTD_compressBlock_btultra_extDict },
+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT,
+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT,
+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT,
+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT,
+ ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT
+ },
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
ZSTD_compressBlock_fast_dictMatchState,
- ZSTD_compressBlock_doubleFast_dictMatchState,
- ZSTD_compressBlock_greedy_dictMatchState,
- ZSTD_compressBlock_lazy_dictMatchState,
- ZSTD_compressBlock_lazy2_dictMatchState,
- ZSTD_compressBlock_btlazy2_dictMatchState,
- ZSTD_compressBlock_btopt_dictMatchState,
- ZSTD_compressBlock_btultra_dictMatchState,
- ZSTD_compressBlock_btultra_dictMatchState },
+ ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE,
+ ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE
+ },
{ NULL /* default for 0 */,
NULL,
NULL,
- ZSTD_compressBlock_greedy_dedicatedDictSearch,
- ZSTD_compressBlock_lazy_dedicatedDictSearch,
- ZSTD_compressBlock_lazy2_dedicatedDictSearch,
+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH,
+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH,
+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH,
NULL,
NULL,
NULL,
@@ -3038,18 +3086,26 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
- { ZSTD_compressBlock_greedy_row,
- ZSTD_compressBlock_lazy_row,
- ZSTD_compressBlock_lazy2_row },
- { ZSTD_compressBlock_greedy_extDict_row,
- ZSTD_compressBlock_lazy_extDict_row,
- ZSTD_compressBlock_lazy2_extDict_row },
- { ZSTD_compressBlock_greedy_dictMatchState_row,
- ZSTD_compressBlock_lazy_dictMatchState_row,
- ZSTD_compressBlock_lazy2_dictMatchState_row },
- { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
- ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
- ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_ROW
+ },
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW
+ },
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW
+ },
+ {
+ ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW,
+ ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW
+ }
};
DEBUGLOG(4, "Selecting a row-based matchfinder");
assert(useRowMatchFinder != ZSTD_ps_auto);
@@ -3192,7 +3248,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
/* External matchfinder + LDM is technically possible, just not implemented yet.
* We need to revisit soon and implement it. */
RETURN_ERROR_IF(
- zc->appliedParams.useSequenceProducer,
+ ZSTD_hasExtSeqProd(&zc->appliedParams),
parameter_combination_unsupported,
"Long-distance matching with external sequence producer enabled is not currently supported."
);
@@ -3211,7 +3267,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
/* External matchfinder + LDM is technically possible, just not implemented yet.
* We need to revisit soon and implement it. */
RETURN_ERROR_IF(
- zc->appliedParams.useSequenceProducer,
+ ZSTD_hasExtSeqProd(&zc->appliedParams),
parameter_combination_unsupported,
"Long-distance matching with external sequence producer enabled is not currently supported."
);
@@ -3230,18 +3286,18 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
zc->appliedParams.useRowMatchFinder,
src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size);
- } else if (zc->appliedParams.useSequenceProducer) {
+ } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) {
assert(
- zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
+ zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize)
);
- assert(zc->externalMatchCtx.mFinder != NULL);
+ assert(zc->appliedParams.extSeqProdFunc != NULL);
{ U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
- size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
- zc->externalMatchCtx.mState,
- zc->externalMatchCtx.seqBuffer,
- zc->externalMatchCtx.seqBufferCapacity,
+ size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)(
+ zc->appliedParams.extSeqProdState,
+ zc->extSeqBuf,
+ zc->extSeqBufCapacity,
src, srcSize,
NULL, 0, /* dict and dictSize, currently not supported */
zc->appliedParams.compressionLevel,
@@ -3249,21 +3305,21 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
);
size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
- zc->externalMatchCtx.seqBuffer,
+ zc->extSeqBuf,
nbExternalSeqs,
- zc->externalMatchCtx.seqBufferCapacity,
+ zc->extSeqBufCapacity,
srcSize
);
/* Return early if there is no error, since we don't need to worry about last literals */
if (!ZSTD_isError(nbPostProcessedSeqs)) {
ZSTD_sequencePosition seqPos = {0,0,0};
- size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
+ size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs);
RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
FORWARD_IF_ERROR(
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
zc, &seqPos,
- zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
+ zc->extSeqBuf, nbPostProcessedSeqs,
src, srcSize,
zc->appliedParams.searchForExternalRepcodes
),
@@ -3280,9 +3336,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
}
/* Fallback to software matchfinder */
- { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
- zc->appliedParams.useRowMatchFinder,
- dictMode);
+ { ZSTD_blockCompressor const blockCompressor =
+ ZSTD_selectBlockCompressor(
+ zc->appliedParams.cParams.strategy,
+ zc->appliedParams.useRowMatchFinder,
+ dictMode);
ms->ldmSeqStore = NULL;
DEBUGLOG(
5,
@@ -3292,9 +3350,10 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
} }
} else { /* not long range mode and no external matchfinder */
- ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
- zc->appliedParams.useRowMatchFinder,
- dictMode);
+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(
+ zc->appliedParams.cParams.strategy,
+ zc->appliedParams.useRowMatchFinder,
+ dictMode);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
@@ -3304,29 +3363,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
return ZSTDbss_compress;
}
-static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM])
{
- const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
- const seqDef* seqStoreSeqs = seqStore->sequencesStart;
- size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
- size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
- size_t literalsRead = 0;
- size_t lastLLSize;
+ const seqDef* inSeqs = seqStore->sequencesStart;
+ const size_t nbInSequences = seqStore->sequences - inSeqs;
+ const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart);
- ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+ ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex;
+ const size_t nbOutSequences = nbInSequences + 1;
+ size_t nbOutLiterals = 0;
+ repcodes_t repcodes;
size_t i;
- repcodes_t updatedRepcodes;
- assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
- /* Ensure we have enough space for last literals "sequence" */
- assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
- ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
- for (i = 0; i < seqStoreSeqSize; ++i) {
- U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
- outSeqs[i].litLength = seqStoreSeqs[i].litLength;
- outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
+ /* Bounds check that we have enough space for every input sequence
+ * and the block delimiter
+ */
+ assert(seqCollector->seqIndex <= seqCollector->maxSequences);
+ RETURN_ERROR_IF(
+ nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex),
+ dstSize_tooSmall,
+ "Not enough space to copy sequences");
+
+ ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes));
+ for (i = 0; i < nbInSequences; ++i) {
+ U32 rawOffset;
+ outSeqs[i].litLength = inSeqs[i].litLength;
+ outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH;
outSeqs[i].rep = 0;
+ /* Handle the possible single length >= 64K
+ * There can only be one because we add MINMATCH to every match length,
+ * and blocks are at most 128K.
+ */
if (i == seqStore->longLengthPos) {
if (seqStore->longLengthType == ZSTD_llt_literalLength) {
outSeqs[i].litLength += 0x10000;
@@ -3335,41 +3403,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
}
}
- if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
- /* Derive the correct offset corresponding to a repcode */
- outSeqs[i].rep = seqStoreSeqs[i].offBase;
+ /* Determine the raw offset given the offBase, which may be a repcode. */
+ if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) {
+ const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase);
+ assert(repcode > 0);
+ outSeqs[i].rep = repcode;
if (outSeqs[i].litLength != 0) {
- rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+ rawOffset = repcodes.rep[repcode - 1];
} else {
- if (outSeqs[i].rep == 3) {
- rawOffset = updatedRepcodes.rep[0] - 1;
+ if (repcode == 3) {
+ assert(repcodes.rep[0] > 1);
+ rawOffset = repcodes.rep[0] - 1;
} else {
- rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
+ rawOffset = repcodes.rep[repcode];
}
}
+ } else {
+ rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase);
}
outSeqs[i].offset = rawOffset;
- /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
- so we provide seqStoreSeqs[i].offset - 1 */
- ZSTD_updateRep(updatedRepcodes.rep,
- seqStoreSeqs[i].offBase,
- seqStoreSeqs[i].litLength == 0);
- literalsRead += outSeqs[i].litLength;
+
+ /* Update repcode history for the sequence */
+ ZSTD_updateRep(repcodes.rep,
+ inSeqs[i].offBase,
+ inSeqs[i].litLength == 0);
+
+ nbOutLiterals += outSeqs[i].litLength;
}
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
* for the block boundary, according to the API.
*/
- assert(seqStoreLiteralsSize >= literalsRead);
- lastLLSize = seqStoreLiteralsSize - literalsRead;
- outSeqs[i].litLength = (U32)lastLLSize;
- outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
- seqStoreSeqSize++;
- zc->seqCollector.seqIndex += seqStoreSeqSize;
+ assert(nbInLiterals >= nbOutLiterals);
+ {
+ const size_t lastLLSize = nbInLiterals - nbOutLiterals;
+ outSeqs[nbInSequences].litLength = (U32)lastLLSize;
+ outSeqs[nbInSequences].matchLength = 0;
+ outSeqs[nbInSequences].offset = 0;
+ assert(nbOutSequences == nbInSequences + 1);
+ }
+ seqCollector->seqIndex += nbOutSequences;
+ assert(seqCollector->seqIndex <= seqCollector->maxSequences);
+
+ return 0;
}
size_t ZSTD_sequenceBound(size_t srcSize) {
- return (srcSize / ZSTD_MINMATCH_MIN) + 1;
+ const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1;
+ const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1;
+ return maxNbSeq + maxNbDelims;
}
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
@@ -3378,6 +3460,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
const size_t dstCapacity = ZSTD_compressBound(srcSize);
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
SeqCollector seqCollector;
+ {
+ int targetCBlockSize;
+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), "");
+ RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0");
+ }
+ {
+ int nbWorkers;
+ FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), "");
+ RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0");
+ }
RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
@@ -3387,8 +3479,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
seqCollector.maxSequences = outSeqsSize;
zc->seqCollector = seqCollector;
- ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
- ZSTD_customFree(dst, ZSTD_defaultCMem);
+ {
+ const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+ ZSTD_customFree(dst, ZSTD_defaultCMem);
+ FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed");
+ }
+ assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize));
return zc->seqCollector.seqIndex;
}
@@ -3981,8 +4077,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
cSeqsSize = 1;
}
+ /* Sequence collection not supported when block splitting */
if (zc->seqCollector.collectSequences) {
- ZSTD_copyBlockSequences(zc);
+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed");
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0;
}
@@ -4204,6 +4301,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
if (bss == ZSTDbss_noCompress) {
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
@@ -4236,11 +4334,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
- if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+ if (bss == ZSTDbss_noCompress) {
+ RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
+ cSize = 0;
+ goto out;
+ }
}
if (zc->seqCollector.collectSequences) {
- ZSTD_copyBlockSequences(zc);
+ FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed");
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0;
}
@@ -4553,19 +4655,15 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
}
}
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
{
- RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
- "wrong cctx stage");
- RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
- parameter_unsupported,
- "incompatible with ldm");
+ assert(cctx->stage == ZSTDcs_init);
+ assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable);
cctx->externSeqStore.seq = seq;
cctx->externSeqStore.size = nbSeq;
cctx->externSeqStore.capacity = nbSeq;
cctx->externSeqStore.pos = 0;
cctx->externSeqStore.posInSequence = 0;
- return 0;
}
@@ -4760,12 +4858,19 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
ZSTD_fillHashTable(ms, iend, dtlm, tfp);
break;
case ZSTD_dfast:
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR)
assert(srcSize >= HASH_READ_SIZE);
if (ms->dedicatedDictSearch) {
assert(ms->chainTable != NULL);
@@ -4782,14 +4887,23 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
DEBUGLOG(4, "Using chain-based hash table for lazy dict");
}
}
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
case ZSTD_btopt:
case ZSTD_btultra:
case ZSTD_btultra2:
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
assert(srcSize >= HASH_READ_SIZE);
ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
default:
@@ -4836,11 +4950,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
/* We only set the loaded table as valid if it contains all non-zero
* weights. Otherwise, we set it to check */
- if (!hasZeroWeights)
+ if (!hasZeroWeights && maxSymbolValue == 255)
bs->entropy.huf.repeatMode = HUF_repeat_valid;
RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
- RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
dictPtr += hufHeaderSize;
}
@@ -5107,14 +5220,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
{
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
- size_t fhSize = 0;
DEBUGLOG(4, "ZSTD_writeEpilogue");
RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
/* special case : empty frame */
if (cctx->stage == ZSTDcs_init) {
- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+ size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
dstCapacity -= fhSize;
op += fhSize;
@@ -5124,8 +5236,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
if (cctx->stage != ZSTDcs_ending) {
/* write one last empty block, make it the "last" block */
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
- RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
- MEM_writeLE32(op, cBlockHeader24);
+ ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3);
+ RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue");
+ MEM_writeLE24(op, cBlockHeader24);
op += ZSTD_blockHeaderSize;
dstCapacity -= ZSTD_blockHeaderSize;
}
@@ -5455,7 +5568,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2(
cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
customMem);
- if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+ if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
dictLoadMethod, dictContentType,
cctxParams) )) {
@@ -5879,7 +5992,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
assert(input->pos >= zcs->stableIn_notConsumed);
input->pos -= zcs->stableIn_notConsumed;
- ip -= zcs->stableIn_notConsumed;
+ if (ip) ip -= zcs->stableIn_notConsumed;
zcs->stableIn_notConsumed = 0;
}
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
@@ -6138,7 +6251,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
#ifdef ZSTD_MULTITHREAD
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
RETURN_ERROR_IF(
- params.useSequenceProducer == 1 && params.nbWorkers >= 1,
+ ZSTD_hasExtSeqProd(&params) && params.nbWorkers >= 1,
parameter_combination_unsupported,
"External sequence producer isn't supported with nbWorkers >= 1"
);
@@ -6430,7 +6543,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
- cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+ cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
"Sequence validation failed");
}
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
@@ -6568,7 +6681,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
- cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+ cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
"Sequence validation failed");
}
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
@@ -7014,19 +7127,27 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
}
void ZSTD_registerSequenceProducer(
- ZSTD_CCtx* zc, void* mState,
- ZSTD_sequenceProducer_F* mFinder
+ ZSTD_CCtx* zc,
+ void* extSeqProdState,
+ ZSTD_sequenceProducer_F extSeqProdFunc
+) {
+ assert(zc != NULL);
+ ZSTD_CCtxParams_registerSequenceProducer(
+ &zc->requestedParams, extSeqProdState, extSeqProdFunc
+ );
+}
+
+void ZSTD_CCtxParams_registerSequenceProducer(
+ ZSTD_CCtx_params* params,
+ void* extSeqProdState,
+ ZSTD_sequenceProducer_F extSeqProdFunc
) {
- if (mFinder != NULL) {
- ZSTD_externalMatchCtx emctx;
- emctx.mState = mState;
- emctx.mFinder = mFinder;
- emctx.seqBuffer = NULL;
- emctx.seqBufferCapacity = 0;
- zc->externalMatchCtx = emctx;
- zc->requestedParams.useSequenceProducer = 1;
+ assert(params != NULL);
+ if (extSeqProdFunc != NULL) {
+ params->extSeqProdFunc = extSeqProdFunc;
+ params->extSeqProdState = extSeqProdState;
} else {
- ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
- zc->requestedParams.useSequenceProducer = 0;
+ params->extSeqProdFunc = NULL;
+ params->extSeqProdState = NULL;
}
}
diff --git a/thirdparty/zstd/compress/zstd_compress_internal.h b/thirdparty/zstd/compress/zstd_compress_internal.h
index 10f68d010e..e41d7b78ec 100644
--- a/thirdparty/zstd/compress/zstd_compress_internal.h
+++ b/thirdparty/zstd/compress/zstd_compress_internal.h
@@ -39,7 +39,7 @@ extern "C" {
It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
@@ -159,23 +159,24 @@ typedef struct {
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct {
- int price;
- U32 off;
- U32 mlen;
- U32 litlen;
- U32 rep[ZSTD_REP_NUM];
+ int price; /* price from beginning of segment to this position */
+ U32 off; /* offset of previous match */
+ U32 mlen; /* length of previous match */
+ U32 litlen; /* nb of literals since previous match */
+ U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */
} ZSTD_optimal_t;
typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
typedef struct {
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
unsigned* litFreq; /* table of literals statistics, of size 256 */
unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
- ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
- ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */
+ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
U32 litSum; /* nb of literals */
U32 litLengthSum; /* nb of litLength codes */
@@ -228,7 +229,7 @@ struct ZSTD_matchState_t {
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
- U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
+ U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable;
@@ -360,10 +361,11 @@ struct ZSTD_CCtx_params_s {
* if the external matchfinder returns an error code. */
int enableMatchFinderFallback;
- /* Indicates whether an external matchfinder has been referenced.
- * Users can't set this externally.
- * It is set internally in ZSTD_registerSequenceProducer(). */
- int useSequenceProducer;
+ /* Parameters for the external sequence producer API.
+ * Users set these parameters through ZSTD_registerSequenceProducer().
+ * It is not possible to set these parameters individually through the public API. */
+ void* extSeqProdState;
+ ZSTD_sequenceProducer_F extSeqProdFunc;
/* Adjust the max block size*/
size_t maxBlockSize;
@@ -401,14 +403,6 @@ typedef struct {
ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx;
-/* Context for block-level external matchfinder API */
-typedef struct {
- void* mState;
- ZSTD_sequenceProducer_F* mFinder;
- ZSTD_Sequence* seqBuffer;
- size_t seqBufferCapacity;
-} ZSTD_externalMatchCtx;
-
struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -479,8 +473,9 @@ struct ZSTD_CCtx_s {
/* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx;
- /* Workspace for external matchfinder */
- ZSTD_externalMatchCtx externalMatchCtx;
+ /* Buffer for output from external sequence producer */
+ ZSTD_Sequence* extSeqBuf;
+ size_t extSeqBufCapacity;
};
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@@ -1053,7 +1048,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
* The least significant cycleLog bits of the indices must remain the same,
* which may be 0. Every index up to maxDist in the past must be valid.
*/
-MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
U32 maxDist, void const* src)
{
/* preemptive overflow correction:
@@ -1246,7 +1243,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
* forget about the extDict. Handles overlap of the prefix and extDict.
* Returns non-zero if the segment is contiguous.
*/
-MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_update(ZSTD_window_t* window,
void const* src, size_t srcSize,
int forceNonContiguous)
{
@@ -1467,11 +1466,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
* This cannot be used when long range matching is enabled.
* Zstd will use these sequences, and pass the literals to a secondary block
* compressor.
- * @return : An error code on failure.
* NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
* access and data corruption.
*/
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
/** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
@@ -1509,6 +1507,10 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
+/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
+MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
+ return params->extSeqProdFunc != NULL;
+}
/* ===============================================================
* Deprecated definitions that are still used internally to avoid
diff --git a/thirdparty/zstd/compress/zstd_compress_superblock.c b/thirdparty/zstd/compress/zstd_compress_superblock.c
index 638c4acbe7..628a2dccd0 100644
--- a/thirdparty/zstd/compress/zstd_compress_superblock.c
+++ b/thirdparty/zstd/compress/zstd_compress_superblock.c
@@ -76,8 +76,8 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
}
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
- const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
+ : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
op += cSize;
cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) {
@@ -102,7 +102,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
@@ -122,30 +122,30 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
}
*entropyWritten = 1;
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
- return op-ostart;
+ return (size_t)(op-ostart);
}
static size_t
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
- const seqDef* sequences, size_t nbSeq,
- size_t litSize, int lastSequence)
+ const seqDef* sequences, size_t nbSeqs,
+ size_t litSize, int lastSubBlock)
{
- const seqDef* const sstart = sequences;
- const seqDef* const send = sequences + nbSeq;
- const seqDef* sp = sstart;
size_t matchLengthSum = 0;
size_t litLengthSum = 0;
- (void)(litLengthSum); /* suppress unused variable warning on some environments */
- while (send-sp > 0) {
- ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+ size_t n;
+ for (n=0; n<nbSeqs; n++) {
+ const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
litLengthSum += seqLen.litLength;
matchLengthSum += seqLen.matchLength;
- sp++;
}
- assert(litLengthSum <= litSize);
- if (!lastSequence) {
+ DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
+ (unsigned)nbSeqs, (const void*)sequences,
+ (unsigned)litLengthSum, (unsigned)matchLengthSum);
+ if (!lastSubBlock)
assert(litLengthSum == litSize);
- }
+ else
+ assert(litLengthSum <= litSize);
+ (void)litLengthSum;
return matchLengthSum + litSize;
}
@@ -180,14 +180,14 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
/* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall, "");
- if (nbSeq < 0x7F)
+ if (nbSeq < 128)
*op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ)
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) {
- return op - ostart;
+ return (size_t)(op - ostart);
}
/* seqHead : flags for FSE encoding type */
@@ -209,7 +209,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
}
{ size_t const bitstreamSize = ZSTD_encodeSequences(
- op, oend - op,
+ op, (size_t)(oend - op),
fseTables->matchlengthCTable, mlCode,
fseTables->offcodeCTable, ofCode,
fseTables->litlengthCTable, llCode,
@@ -253,7 +253,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
#endif
*entropyWritten = 1;
- return op - ostart;
+ return (size_t)(op - ostart);
}
/** ZSTD_compressSubBlock() :
@@ -279,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
&entropyMetadata->hufMetadata, literals, litSize,
- op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+ op, (size_t)(oend-op),
+ bmi2, writeLitEntropy, litEntropyWritten);
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
if (cLitSize == 0) return 0;
op += cLitSize;
@@ -289,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
sequences, nbSeq,
llCode, mlCode, ofCode,
cctxParams,
- op, oend-op,
+ op, (size_t)(oend-op),
bmi2, writeSeqEntropy, seqEntropyWritten);
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
if (cSeqSize == 0) return 0;
op += cSeqSize;
}
/* Write block header */
- { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+ { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(ostart, cBlockHeader24);
}
- return op-ostart;
+ return (size_t)(op-ostart);
}
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@@ -389,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
return cSeqSizeEstimate + sequencesSectionHeaderSize;
}
-static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+typedef struct {
+ size_t estLitSize;
+ size_t estBlockSize;
+} EstimatedBlockSize;
+static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable,
const BYTE* llCodeTable,
const BYTE* mlCodeTable,
@@ -397,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize,
- int writeLitEntropy, int writeSeqEntropy) {
- size_t cSizeEstimate = 0;
- cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
- &entropy->huf, &entropyMetadata->hufMetadata,
- workspace, wkspSize, writeLitEntropy);
- cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+ int writeLitEntropy, int writeSeqEntropy)
+{
+ EstimatedBlockSize ebs;
+ ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
+ &entropy->huf, &entropyMetadata->hufMetadata,
+ workspace, wkspSize, writeLitEntropy);
+ ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
workspace, wkspSize, writeSeqEntropy);
- return cSizeEstimate + ZSTD_blockHeaderSize;
+ ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
+ return ebs;
}
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@@ -419,13 +426,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
return 0;
}
+static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
+{
+ size_t n, total = 0;
+ assert(sp != NULL);
+ for (n=0; n<seqCount; n++) {
+ total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
+ }
+ DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
+ return total;
+}
+
+#define BYTESCALE 256
+
+static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
+ size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
+ int firstSubBlock)
+{
+ size_t n, budget = 0, inSize=0;
+ /* entropy headers */
+ size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
+ assert(firstSubBlock==0 || firstSubBlock==1);
+ budget += headerSize;
+
+ /* first sequence => at least one sequence*/
+ budget += sp[0].litLength * avgLitCost + avgSeqCost;
+ if (budget > targetBudget) return 1;
+ inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
+
+ /* loop over sequences */
+ for (n=1; n<nbSeqs; n++) {
+ size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
+ budget += currentCost;
+ inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
+ /* stop when sub-block budget is reached */
+ if ( (budget > targetBudget)
+ /* though continue to expand until the sub-block is deemed compressible */
+ && (budget < inSize * BYTESCALE) )
+ break;
+ }
+
+ return n;
+}
+
/** ZSTD_compressSubBlock_multi() :
* Breaks super-block into multiple sub-blocks and compresses them.
- * Entropy will be written to the first block.
- * The following blocks will use repeat mode to compress.
- * All sub-blocks are compressed blocks (no raw or rle blocks).
- * @return : compressed size of the super block (which is multiple ZSTD blocks)
- * Or 0 if it failed to compress. */
+ * Entropy will be written into the first block.
+ * The following blocks use repeat_mode to compress.
+ * Sub-blocks are all compressed, except the last one when beneficial.
+ * @return : compressed size of the super block (which features multiple ZSTD blocks)
+ * or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const ZSTD_compressedBlockState_t* prevCBlock,
ZSTD_compressedBlockState_t* nextCBlock,
@@ -438,10 +488,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
{
const seqDef* const sstart = seqStorePtr->sequencesStart;
const seqDef* const send = seqStorePtr->sequences;
- const seqDef* sp = sstart;
+ const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
+ size_t const nbSeqs = (size_t)(send - sstart);
const BYTE* const lstart = seqStorePtr->litStart;
const BYTE* const lend = seqStorePtr->lit;
const BYTE* lp = lstart;
+ size_t const nbLiterals = (size_t)(lend - lstart);
BYTE const* ip = (BYTE const*)src;
BYTE const* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*)dst;
@@ -450,96 +502,152 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const BYTE* llCodePtr = seqStorePtr->llCode;
const BYTE* mlCodePtr = seqStorePtr->mlCode;
const BYTE* ofCodePtr = seqStorePtr->ofCode;
- size_t targetCBlockSize = cctxParams->targetCBlockSize;
- size_t litSize, seqCount;
- int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+ size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
+ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
+ int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
int writeSeqEntropy = 1;
- int lastSequence = 0;
-
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
- (unsigned)(lend-lp), (unsigned)(send-sstart));
-
- litSize = 0;
- seqCount = 0;
- do {
- size_t cBlockSizeEstimate = 0;
- if (sstart == send) {
- lastSequence = 1;
- } else {
- const seqDef* const sequence = sp + seqCount;
- lastSequence = sequence == send - 1;
- litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
- seqCount++;
- }
- if (lastSequence) {
- assert(lp <= lend);
- assert(litSize <= (size_t)(lend - lp));
- litSize = (size_t)(lend - lp);
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
+ (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
+
+ /* let's start by a general estimation for the full block */
+ if (nbSeqs > 0) {
+ EstimatedBlockSize const ebs =
+ ZSTD_estimateSubBlockSize(lp, nbLiterals,
+ ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
+ &nextCBlock->entropy, entropyMetadata,
+ workspace, wkspSize,
+ writeLitEntropy, writeSeqEntropy);
+ /* quick estimation */
+ size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
+ size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
+ const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
+ size_t n, avgBlockBudget, blockBudgetSupp=0;
+ avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
+ DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
+ (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
+ (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
+ /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
+ * this will result in the production of a single uncompressed block covering @srcSize.*/
+ if (ebs.estBlockSize > srcSize) return 0;
+
+ /* compress and write sub-blocks */
+ assert(nbSubBlocks>0);
+ for (n=0; n < nbSubBlocks-1; n++) {
+ /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
+ size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
+ avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
+ /* if reached last sequence : break to last sub-block (simplification) */
+ assert(seqCount <= (size_t)(send-sp));
+ if (sp + seqCount == send) break;
+ assert(seqCount > 0);
+ /* compress sub-block */
+ { int litEntropyWritten = 0;
+ int seqEntropyWritten = 0;
+ size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
+ const size_t decompressedSize =
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+ sp, seqCount,
+ lp, litSize,
+ llCodePtr, mlCodePtr, ofCodePtr,
+ cctxParams,
+ op, (size_t)(oend-op),
+ bmi2, writeLitEntropy, writeSeqEntropy,
+ &litEntropyWritten, &seqEntropyWritten,
+ 0);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+ /* check compressibility, update state components */
+ if (cSize > 0 && cSize < decompressedSize) {
+ DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
+ (unsigned)decompressedSize, (unsigned)cSize);
+ assert(ip + decompressedSize <= iend);
+ ip += decompressedSize;
+ lp += litSize;
+ op += cSize;
+ llCodePtr += seqCount;
+ mlCodePtr += seqCount;
+ ofCodePtr += seqCount;
+ /* Entropy only needs to be written once */
+ if (litEntropyWritten) {
+ writeLitEntropy = 0;
+ }
+ if (seqEntropyWritten) {
+ writeSeqEntropy = 0;
+ }
+ sp += seqCount;
+ blockBudgetSupp = 0;
+ } }
+ /* otherwise : do not compress yet, coalesce current sub-block with following one */
}
- /* I think there is an optimization opportunity here.
- * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
- * since it recalculates estimate from scratch.
- * For example, it would recount literal distribution and symbol codes every time.
- */
- cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
- &nextCBlock->entropy, entropyMetadata,
- workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
- if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
- int litEntropyWritten = 0;
- int seqEntropyWritten = 0;
- const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
- const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
- sp, seqCount,
- lp, litSize,
- llCodePtr, mlCodePtr, ofCodePtr,
- cctxParams,
- op, oend-op,
- bmi2, writeLitEntropy, writeSeqEntropy,
- &litEntropyWritten, &seqEntropyWritten,
- lastBlock && lastSequence);
- FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
- if (cSize > 0 && cSize < decompressedSize) {
- DEBUGLOG(5, "Committed the sub-block");
- assert(ip + decompressedSize <= iend);
- ip += decompressedSize;
- sp += seqCount;
- lp += litSize;
- op += cSize;
- llCodePtr += seqCount;
- mlCodePtr += seqCount;
- ofCodePtr += seqCount;
- litSize = 0;
- seqCount = 0;
- /* Entropy only needs to be written once */
- if (litEntropyWritten) {
- writeLitEntropy = 0;
- }
- if (seqEntropyWritten) {
- writeSeqEntropy = 0;
- }
+ } /* if (nbSeqs > 0) */
+
+ /* write last block */
+ DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
+ { int litEntropyWritten = 0;
+ int seqEntropyWritten = 0;
+ size_t litSize = (size_t)(lend - lp);
+ size_t seqCount = (size_t)(send - sp);
+ const size_t decompressedSize =
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+ sp, seqCount,
+ lp, litSize,
+ llCodePtr, mlCodePtr, ofCodePtr,
+ cctxParams,
+ op, (size_t)(oend-op),
+ bmi2, writeLitEntropy, writeSeqEntropy,
+ &litEntropyWritten, &seqEntropyWritten,
+ lastBlock);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+ /* update pointers, the nb of literals borrowed from next sequence must be preserved */
+ if (cSize > 0 && cSize < decompressedSize) {
+ DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
+ (unsigned)decompressedSize, (unsigned)cSize);
+ assert(ip + decompressedSize <= iend);
+ ip += decompressedSize;
+ lp += litSize;
+ op += cSize;
+ llCodePtr += seqCount;
+ mlCodePtr += seqCount;
+ ofCodePtr += seqCount;
+ /* Entropy only needs to be written once */
+ if (litEntropyWritten) {
+ writeLitEntropy = 0;
+ }
+ if (seqEntropyWritten) {
+ writeSeqEntropy = 0;
}
+ sp += seqCount;
}
- } while (!lastSequence);
+ }
+
+
if (writeLitEntropy) {
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+ DEBUGLOG(5, "Literal entropy tables were never written");
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
}
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
/* If we haven't written our entropy tables, then we've violated our contract and
* must emit an uncompressed block.
*/
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+ DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
return 0;
}
+
if (ip < iend) {
- size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+ /* some data left : last part of the block sent uncompressed */
+ size_t const rSize = (size_t)((iend - ip));
+ size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
+ DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
assert(cSize != 0);
op += cSize;
/* We have to regenerate the repcodes because we've skipped some sequences */
if (sp < send) {
- seqDef const* seq;
+ const seqDef* seq;
repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) {
@@ -548,14 +656,17 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
}
}
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
- return op-ostart;
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
+ (unsigned)(op-ostart));
+ return (size_t)(op-ostart);
}
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
- void const* src, size_t srcSize,
- unsigned lastBlock) {
+ const void* src, size_t srcSize,
+ unsigned lastBlock)
+{
ZSTD_entropyCTablesMetadata_t entropyMetadata;
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
diff --git a/thirdparty/zstd/compress/zstd_cwksp.h b/thirdparty/zstd/compress/zstd_cwksp.h
index cc7fb1c715..3eddbd334e 100644
--- a/thirdparty/zstd/compress/zstd_cwksp.h
+++ b/thirdparty/zstd/compress/zstd_cwksp.h
@@ -192,6 +192,7 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
{
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
+ (void)offset;
#if defined(ZSTD_MSAN_PRINT)
if(offset!=-1) {
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
@@ -433,7 +434,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
/**
* Aligned on 64 bytes. These buffers have the special property that
- * their values remain constrained, allowing us to re-use them without
+ * their values remain constrained, allowing us to reuse them without
* memset()-ing them.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
@@ -525,7 +526,7 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
- /* To validate that the table re-use logic is sound, and that we don't
+ /* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty.
* Since tableValidEnd space and initOnce space may overlap we don't poison
@@ -602,9 +603,9 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing!");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
- /* To validate that the context re-use logic is sound, and that we don't
+ /* To validate that the context reuse logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
- * the workspace except for the areas in which we expect memory re-use
+ * the workspace except for the areas in which we expect memory reuse
* without initialization (objects, valid tables area and init once
* memory). */
{
@@ -635,6 +636,15 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
ZSTD_cwksp_assert_internal_consistency(ws);
}
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+ + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
/**
* The provided workspace takes ownership of the buffer [start, start+size).
* Any existing values in the workspace are ignored (the previously managed
@@ -666,6 +676,11 @@ MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
void *ptr = ws->workspace;
DEBUGLOG(4, "cwksp: freeing workspace");
+#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ if (ptr != NULL && customMem.customFree != NULL) {
+ __msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws));
+ }
+#endif
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_customFree(ptr, customMem);
}
@@ -679,15 +694,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
}
-MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
- return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
-}
-
-MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
- return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
- + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
-}
-
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
return ws->allocFailed;
}
diff --git a/thirdparty/zstd/compress/zstd_double_fast.c b/thirdparty/zstd/compress/zstd_double_fast.c
index 0ad88ffc7b..a4e9c50d3b 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.c
+++ b/thirdparty/zstd/compress/zstd_double_fast.c
@@ -11,7 +11,11 @@
#include "zstd_compress_internal.h"
#include "zstd_double_fast.h"
-static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -47,7 +51,9 @@ static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
} }
}
-static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -95,6 +101,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls /* template */)
@@ -305,6 +312,7 @@ _match_stored:
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
@@ -348,8 +356,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
- PREFETCH_AREA(dictHashLong, hashTableBytes)
- PREFETCH_AREA(dictHashSmall, chainTableBytes)
+ PREFETCH_AREA(dictHashLong, hashTableBytes);
+ PREFETCH_AREA(dictHashSmall, chainTableBytes);
}
/* init */
@@ -589,7 +597,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
}
-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls /* template */)
@@ -756,3 +766,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}
+
+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
diff --git a/thirdparty/zstd/compress/zstd_double_fast.h b/thirdparty/zstd/compress/zstd_double_fast.h
index 6f0047c4ba..ce6ed8c97f 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.h
+++ b/thirdparty/zstd/compress/zstd_double_fast.h
@@ -18,9 +18,12 @@ extern "C" {
#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
+
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
@@ -31,6 +34,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_fast.c b/thirdparty/zstd/compress/zstd_fast.c
index 5f2c6a2eda..6c4554cfca 100644
--- a/thirdparty/zstd/compress/zstd_fast.c
+++ b/thirdparty/zstd/compress/zstd_fast.c
@@ -11,7 +11,9 @@
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h"
-static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
@@ -46,7 +48,9 @@ static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
} } } }
}
-static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
@@ -139,8 +143,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
*
* This is also the work we do at the beginning to enter the loop initially.
*/
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_fast_noDict_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls, U32 const hasStep)
@@ -456,6 +461,7 @@ size_t ZSTD_compressBlock_fast(
}
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
@@ -502,7 +508,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
- PREFETCH_AREA(dictHashTable, hashTableBytes)
+ PREFETCH_AREA(dictHashTable, hashTableBytes);
}
/* init */
@@ -681,7 +687,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
}
-static size_t ZSTD_compressBlock_fast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
{
diff --git a/thirdparty/zstd/compress/zstd_lazy.c b/thirdparty/zstd/compress/zstd_lazy.c
index 5ba88e8678..67dd55fdb8 100644
--- a/thirdparty/zstd/compress/zstd_lazy.c
+++ b/thirdparty/zstd/compress/zstd_lazy.c
@@ -12,6 +12,11 @@
#include "zstd_lazy.h"
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
+
#define kLazySkippingStep 8
@@ -19,8 +24,9 @@
* Binary Tree search
***************************************/
-static void
-ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iend,
U32 mls)
{
@@ -63,8 +69,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
* sort one already inserted but unsorted position
* assumption : curr >= btlow == (curr - btmask)
* doesn't fail */
-static void
-ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
U32 curr, const BYTE* inputEnd,
U32 nbCompares, U32 btLow,
const ZSTD_dictMode_e dictMode)
@@ -152,8 +159,9 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
}
-static size_t
-ZSTD_DUBT_findBetterDictMatch (
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBetterDictMatch (
const ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr,
@@ -230,8 +238,9 @@ ZSTD_DUBT_findBetterDictMatch (
}
-static size_t
-ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
size_t* offBasePtr,
U32 const mls,
@@ -381,8 +390,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offBasePtr,
const U32 mls /* template */,
@@ -617,7 +627,9 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
/* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndex_internal(
ZSTD_matchState_t* ms,
const ZSTD_compressionParameters* const cParams,
const BYTE* ip, U32 const mls, U32 const lazySkipping)
@@ -651,6 +663,7 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
/* inlining is important to hardwire a hot branch (template emulation) */
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_HcFindBestMatch(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
@@ -819,7 +832,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* t
* Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
* but not beyond iLimit.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
U32 const rowLog, U32 const mls,
U32 idx, const BYTE* const iLimit)
{
@@ -845,7 +860,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
* Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
*/
-FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
BYTE const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog,
U32 const rowLog, U32 const mls,
@@ -863,10 +880,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
/* ZSTD_row_update_internalImpl():
* Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
- U32 updateStartIdx, U32 const updateEndIdx,
- U32 const mls, U32 const rowLog,
- U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+ U32 updateStartIdx, U32 const updateEndIdx,
+ U32 const mls, U32 const rowLog,
+ U32 const rowMask, U32 const useCache)
{
U32* const hashTable = ms->hashTable;
BYTE* const tagTable = ms->tagTable;
@@ -892,9 +911,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
* Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
* Skips sections of long matches as is necessary.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
- U32 const mls, U32 const rowLog,
- U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+ U32 const mls, U32 const rowLog,
+ U32 const rowMask, U32 const useCache)
{
U32 idx = ms->nextToUpdate;
const BYTE* const base = ms->window.base;
@@ -1102,20 +1123,21 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr
/* The high-level approach of the SIMD row based match finder is as follows:
* - Figure out where to insert the new entry:
- * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
- * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ * - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
+ * - The hash is salted by a value that changes on every contex reset, so when the same table is used
+ * we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
+ * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
* which row to insert into.
- * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
- * be considered as a circular buffer with a "head" index that resides in the tagTable.
- * - Also insert the "tag" into the equivalent row and position in the tagTable.
- * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
- * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
- * for alignment/performance reasons, leaving some bytes unused.
- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
+ * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
+ * per row).
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
* generate a bitfield that we can cycle through to check the collisions in the hash table.
* - Pick the longest match.
+ * - Insert the tag into the equivalent row and position in the tagTable.
*/
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_RowFindBestMatch(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
@@ -1489,8 +1511,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
* Common parser - lazy strategy
*********************************/
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_lazy_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
@@ -1754,152 +1777,163 @@ _storeSequence:
/* Return the last literals size */
return (size_t)(iend - anchor);
}
+#endif /* build exclusions */
-size_t ZSTD_compressBlock_btlazy2(
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_lazy2(
+size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_lazy(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
}
-size_t ZSTD_compressBlock_greedy(
+size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
}
+#endif
-size_t ZSTD_compressBlock_lazy_dictMatchState(
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
}
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
}
-/* Row-based matchfinder */
-size_t ZSTD_compressBlock_lazy2_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
}
+#endif
-size_t ZSTD_compressBlock_lazy_row(
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
}
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
}
-
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
}
+#endif
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
}
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
}
+#endif
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
@@ -2101,8 +2135,9 @@ _storeSequence:
/* Return the last literals size */
return (size_t)(iend - anchor);
}
+#endif /* build exclusions */
-
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
@@ -2110,48 +2145,55 @@ size_t ZSTD_compressBlock_greedy_extDict(
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
}
-size_t ZSTD_compressBlock_lazy_extDict(
+size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
-
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
}
+#endif
-size_t ZSTD_compressBlock_lazy2_extDict(
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
}
-size_t ZSTD_compressBlock_btlazy2_extDict(
+size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
}
+#endif
-size_t ZSTD_compressBlock_greedy_extDict_row(
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
-size_t ZSTD_compressBlock_lazy_extDict_row(
+size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
-
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
}
+#endif
-size_t ZSTD_compressBlock_lazy2_extDict_row(
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}
+#endif
diff --git a/thirdparty/zstd/compress/zstd_lazy.h b/thirdparty/zstd/compress/zstd_lazy.h
index 3bde67331e..3635813bdd 100644
--- a/thirdparty/zstd/compress/zstd_lazy.h
+++ b/thirdparty/zstd/compress/zstd_lazy.h
@@ -27,98 +27,173 @@ extern "C" {
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+#endif
-size_t ZSTD_compressBlock_btlazy2(
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2(
+size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy(
+size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy(
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_row(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_row(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_row(
+size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
+
+#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_GREEDY NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+
+#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
+#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_LAZY NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_greedy_extDict(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_extDict_row(
+size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict_row(
+
+#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_extDict_row(
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
+#endif
+
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_ldm.c b/thirdparty/zstd/compress/zstd_ldm.c
index 3d74ff19e3..17c069fe1d 100644
--- a/thirdparty/zstd/compress/zstd_ldm.c
+++ b/thirdparty/zstd/compress/zstd_ldm.c
@@ -246,7 +246,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
break;
case ZSTD_dfast:
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
+#else
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
break;
case ZSTD_greedy:
@@ -318,7 +322,9 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
}
}
-static size_t ZSTD_ldm_generateSequences_internal(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
ldmParams_t const* params, void const* src, size_t srcSize)
{
@@ -689,7 +695,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* maybeSplitSequence updates rawSeqStore->pos */
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
(U32)(iend - ip), minMatch);
- int i;
/* End signal */
if (sequence.offset == 0)
break;
@@ -702,6 +707,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* Run the block compressor */
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
{
+ int i;
size_t const newLitLength =
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
ip += sequence.litLength;
diff --git a/thirdparty/zstd/compress/zstd_opt.c b/thirdparty/zstd/compress/zstd_opt.c
index f02a760946..e63073e5a4 100644
--- a/thirdparty/zstd/compress/zstd_opt.c
+++ b/thirdparty/zstd/compress/zstd_opt.c
@@ -12,6 +12,9 @@
#include "hist.h"
#include "zstd_opt.h"
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30)
@@ -264,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
const optState_t* const optPtr,
int optLevel)
{
+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
if (litLength == 0) return 0;
if (!ZSTD_compressedLiterals(optPtr))
@@ -402,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
- U32* nextToUpdate3,
- const BYTE* const ip)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
+ U32* nextToUpdate3,
+ const BYTE* const ip)
{
U32* const hashTable3 = ms->hashTable3;
U32 const hashLog3 = ms->hashLog3;
@@ -431,7 +437,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
* @param ip assumed <= iend-8 .
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
* @return : nb of positions added */
-static U32 ZSTD_insertBt1(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertBt1(
const ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
U32 const target,
@@ -550,6 +558,7 @@ static U32 ZSTD_insertBt1(
}
FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_updateTree_internal(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
@@ -558,7 +567,7 @@ void ZSTD_updateTree_internal(
const BYTE* const base = ms->window.base;
U32 const target = (U32)(ip - base);
U32 idx = ms->nextToUpdate;
- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
idx, target, dictMode);
while(idx < target) {
@@ -575,7 +584,9 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
}
-FORCE_INLINE_TEMPLATE U32
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32
ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms,
@@ -816,7 +827,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
U32 const ll0,
U32 const lengthToBeat);
-FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_btGetAllMatches_internal(
ZSTD_match_t* matches,
ZSTD_matchState_t* ms,
U32* nextToUpdate3,
@@ -1035,11 +1048,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
* Optimal parser
*********************************/
-static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
-{
- return sol.litlen + sol.mlen;
-}
-
#if 0 /* debug */
static void
@@ -1057,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
#endif
-FORCE_INLINE_TEMPLATE size_t
+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
+
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
@@ -1083,10 +1097,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
- ZSTD_optimal_t lastSequence;
+ ZSTD_optimal_t lastStretch;
ZSTD_optLdm_t optLdm;
- ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
@@ -1108,19 +1122,31 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const ll0 = !litlen;
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
- (U32)(ip-istart), (U32)(iend - ip));
- if (!nbMatches) { ip++; continue; }
+ (U32)(ip-istart), (U32)(iend-ip));
+ if (!nbMatches) {
+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
+ ip++;
+ continue;
+ }
+
+ /* Match found: let's store this solution, and eventually find more candidates.
+ * During this forward pass, @opt is used to store stretches,
+ * defined as "a match followed by N literals".
+ * Note how this is different from a Sequence, which is "N literals followed by a match".
+ * Storing stretches allows us to store different match predecessors
+ * for each literal position part of a literals run. */
/* initialize opt[0] */
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
- opt[0].mlen = 0; /* means is_a_literal */
+ opt[0].mlen = 0; /* there are only literals so far */
opt[0].litlen = litlen;
- /* We don't need to include the actual price of the literals because
- * it is static for the duration of the forward pass, and is included
- * in every price. We include the literal length to avoid negative
- * prices when we subtract the previous literal length.
+ /* No need to include the actual price of the literals before the first match
+ * because it is static for the duration of the forward pass, and is included
+ * in every subsequent price. But, we include the literal length because
+ * the cost variation of litlen depends on the value of litlen.
*/
- opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+ opt[0].price = LL_PRICE(litlen);
+ ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
+ ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
/* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len;
@@ -1129,82 +1155,106 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
if (maxML > sufficient_len) {
- lastSequence.litlen = litlen;
- lastSequence.mlen = maxML;
- lastSequence.off = maxOffBase;
- DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+ lastStretch.litlen = 0;
+ lastStretch.mlen = maxML;
+ lastStretch.off = maxOffBase;
+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
maxML, sufficient_len);
cur = 0;
- last_pos = ZSTD_totalLen(lastSequence);
+ last_pos = maxML;
goto _shortestPath;
} }
/* set prices for first matches starting position == 0 */
assert(opt[0].price >= 0);
- { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
- U32 pos;
+ { U32 pos;
U32 matchNb;
for (pos = 1; pos < minMatch; pos++) {
- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
+ opt[pos].price = ZSTD_MAX_PRICE;
+ opt[pos].mlen = 0;
+ opt[pos].litlen = litlen + pos;
}
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offBase = matches[matchNb].off;
U32 const end = matches[matchNb].len;
for ( ; pos <= end ; pos++ ) {
- U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
- U32 const sequencePrice = literalsPrice + matchPrice;
+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
+ int const sequencePrice = opt[0].price + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
- pos, ZSTD_fCost((int)sequencePrice));
+ pos, ZSTD_fCost(sequencePrice));
opt[pos].mlen = pos;
opt[pos].off = offBase;
- opt[pos].litlen = litlen;
- opt[pos].price = (int)sequencePrice;
- } }
+ opt[pos].litlen = 0; /* end of match */
+ opt[pos].price = sequencePrice + LL_PRICE(0);
+ }
+ }
last_pos = pos-1;
+ opt[pos].price = ZSTD_MAX_PRICE;
}
}
/* check further positions */
for (cur = 1; cur <= last_pos; cur++) {
const BYTE* const inr = ip + cur;
- assert(cur < ZSTD_OPT_NUM);
- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+ assert(cur <= ZSTD_OPT_NUM);
+ DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
/* Fix current position with one literal if cheaper */
- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+ { U32 const litlen = opt[cur-1].litlen + 1;
int const price = opt[cur-1].price
- + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
- + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
- - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+ + LIT_PRICE(ip+cur-1)
+ + LL_INCPRICE(litlen);
assert(price < 1000000000); /* overflow check */
if (price <= opt[cur].price) {
+ ZSTD_optimal_t const prevMatch = opt[cur];
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
- opt[cur].mlen = 0;
- opt[cur].off = 0;
+ opt[cur] = opt[cur-1];
opt[cur].litlen = litlen;
opt[cur].price = price;
+ if ( (optLevel >= 1) /* additional check only for higher modes */
+ && (prevMatch.litlen == 0) /* replace a match */
+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
+ && LIKELY(ip + cur < iend)
+ ) {
+ /* check next position, in case it would be cheaper */
+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
+ if ( (with1literal < withMoreLiterals)
+ && (with1literal < opt[cur+1].price) ) {
+ /* update offset history - before it disappears */
+ U32 const prev = cur - prevMatch.mlen;
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
+ assert(cur >= prevMatch.mlen);
+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
+ newReps.rep[0], newReps.rep[1], newReps.rep[2] );
+ opt[cur+1] = prevMatch; /* mlen & offbase */
+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
+ opt[cur+1].litlen = 1;
+ opt[cur+1].price = with1literal;
+ if (last_pos < cur+1) last_pos = cur+1;
+ }
+ }
} else {
- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+ DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
}
}
- /* Set the repcodes of the current position. We must do it here
- * because we rely on the repcodes of the 2nd to last sequence being
- * correct to set the next chunks repcodes during the backward
- * traversal.
+ /* Offset history is not updated during match comparison.
+ * Do it here, now that the match is selected and confirmed.
*/
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
assert(cur >= opt[cur].mlen);
- if (opt[cur].mlen != 0) {
+ if (opt[cur].litlen == 0) {
+ /* just finished a match => alter offset history */
U32 const prev = cur - opt[cur].mlen;
- repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
- } else {
- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
}
/* last match must start at a minimum distance of 8 from oend */
@@ -1214,15 +1264,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
if ( (optLevel==0) /*static_test*/
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
}
assert(opt[cur].price >= 0);
- { U32 const ll0 = (opt[cur].mlen != 0);
- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
- U32 const previousPrice = (U32)opt[cur].price;
- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+ { U32 const ll0 = (opt[cur].litlen == 0);
+ int const previousPrice = opt[cur].price;
+ int const basePrice = previousPrice + LL_PRICE(0);
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
U32 matchNb;
@@ -1234,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
continue;
}
- { U32 const maxML = matches[nbMatches-1].len;
- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
- inr-istart, cur, nbMatches, maxML);
-
- if ( (maxML > sufficient_len)
- || (cur + maxML >= ZSTD_OPT_NUM) ) {
- lastSequence.mlen = maxML;
- lastSequence.off = matches[nbMatches-1].off;
- lastSequence.litlen = litlen;
- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
- last_pos = cur + ZSTD_totalLen(lastSequence);
- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
+ { U32 const longestML = matches[nbMatches-1].len;
+ DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
+ inr-istart, cur, nbMatches, longestML);
+
+ if ( (longestML > sufficient_len)
+ || (cur + longestML >= ZSTD_OPT_NUM)
+ || (ip + cur + longestML >= iend) ) {
+ lastStretch.mlen = longestML;
+ lastStretch.off = matches[nbMatches-1].off;
+ lastStretch.litlen = 0;
+ last_pos = cur + longestML;
goto _shortestPath;
} }
@@ -1257,19 +1305,24 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 mlen;
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
- matchNb, matches[matchNb].off, lastML, litlen);
+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
U32 const pos = cur + mlen;
- int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
if ((pos > last_pos) || (price < opt[pos].price)) {
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
+ while (last_pos < pos) {
+ /* fill empty positions, for future comparisons */
+ last_pos++;
+ opt[last_pos].price = ZSTD_MAX_PRICE;
+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
+ }
opt[pos].mlen = mlen;
opt[pos].off = offset;
- opt[pos].litlen = litlen;
+ opt[pos].litlen = 0;
opt[pos].price = price;
} else {
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@@ -1277,47 +1330,81 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
}
} } }
+ opt[last_pos+1].price = ZSTD_MAX_PRICE;
} /* for (cur = 1; cur <= last_pos; cur++) */
- lastSequence = opt[last_pos];
- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
- assert(cur < ZSTD_OPT_NUM); /* control overflow*/
+ lastStretch = opt[last_pos];
+ assert(cur >= lastStretch.mlen);
+ cur = last_pos - lastStretch.mlen;
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
assert(opt[0].mlen == 0);
+ assert(last_pos >= lastStretch.mlen);
+ assert(cur == last_pos - lastStretch.mlen);
- /* Set the next chunk's repcodes based on the repcodes of the beginning
- * of the last match, and the last sequence. This avoids us having to
- * update them while traversing the sequences.
- */
- if (lastSequence.mlen != 0) {
- repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
- ZSTD_memcpy(rep, &reps, sizeof(reps));
+ if (lastStretch.mlen==0) {
+ /* no solution : all matches have been converted into literals */
+ assert(lastStretch.litlen == (ip - anchor) + last_pos);
+ ip += last_pos;
+ continue;
+ }
+ assert(lastStretch.off > 0);
+
+ /* Update offset history */
+ if (lastStretch.litlen == 0) {
+ /* finishing on a match : update offset history */
+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
+ ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
} else {
- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
+ assert(cur >= lastStretch.litlen);
+ cur -= lastStretch.litlen;
}
- { U32 const storeEnd = cur + 1;
+ /* Let's write the shortest path solution.
+ * It is stored in @opt in reverse order,
+ * starting from @storeEnd (==cur+2),
+ * effectively partially @opt overwriting.
+ * Content is changed too:
+ * - So far, @opt stored stretches, aka a match followed by literals
+ * - Now, it will store sequences, aka literals followed by a match
+ */
+ { U32 const storeEnd = cur + 2;
U32 storeStart = storeEnd;
- U32 seqPos = cur;
+ U32 stretchPos = cur;
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
last_pos, cur); (void)last_pos;
- assert(storeEnd < ZSTD_OPT_NUM);
- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
- opt[storeEnd] = lastSequence;
- while (seqPos > 0) {
- U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+ assert(storeEnd < ZSTD_OPT_SIZE);
+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
+ if (lastStretch.litlen > 0) {
+ /* last "sequence" is unfinished: just a bunch of literals */
+ opt[storeEnd].litlen = lastStretch.litlen;
+ opt[storeEnd].mlen = 0;
+ storeStart = storeEnd-1;
+ opt[storeStart] = lastStretch;
+ } {
+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
+ storeStart = storeEnd;
+ }
+ while (1) {
+ ZSTD_optimal_t nextStretch = opt[stretchPos];
+ opt[storeStart].litlen = nextStretch.litlen;
+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
+ if (nextStretch.mlen == 0) {
+ /* reaching beginning of segment */
+ break;
+ }
storeStart--;
- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
- opt[storeStart] = opt[seqPos];
- seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */
+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
+ stretchPos -= nextStretch.litlen + nextStretch.mlen;
}
/* save sequences */
- DEBUGLOG(6, "sending selected sequences into seqStore")
+ DEBUGLOG(6, "sending selected sequences into seqStore");
{ U32 storePos;
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
U32 const llen = opt[storePos].litlen;
@@ -1339,6 +1426,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
anchor += advance;
ip = anchor;
} }
+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
+
+ /* update all costs */
ZSTD_setBasePrices(optStatePtr, optLevel);
}
} /* while (ip < ilimit) */
@@ -1346,21 +1436,27 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
/* Return the last literals size */
return (size_t)(iend - anchor);
}
+#endif /* build exclusions */
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
static size_t ZSTD_compressBlock_opt0(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
{
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
static size_t ZSTD_compressBlock_opt2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
{
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
@@ -1368,20 +1464,23 @@ size_t ZSTD_compressBlock_btopt(
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
/* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm.
* this function cannot error out, its narrow contract must be respected.
*/
-static void
-ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
- seqStore_t* seqStore,
- U32 rep[ZSTD_REP_NUM],
- const void* src, size_t srcSize)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+ seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ const void* src, size_t srcSize)
{
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
@@ -1425,7 +1524,7 @@ size_t ZSTD_compressBlock_btultra2(
* Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block),
- ** the cost is 2x cpu time on first block. */
+ * the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
@@ -1438,7 +1537,9 @@ size_t ZSTD_compressBlock_btultra2(
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
}
+#endif
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
@@ -1446,18 +1547,20 @@ size_t ZSTD_compressBlock_btopt_dictMatchState(
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
}
-size_t ZSTD_compressBlock_btultra_dictMatchState(
+size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
}
+#endif
-size_t ZSTD_compressBlock_btopt_extDict(
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_btultra_extDict(
@@ -1466,6 +1569,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
{
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
}
+#endif
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h
index 342e5a3112..d4e7113157 100644
--- a/thirdparty/zstd/compress/zstd_opt.h
+++ b/thirdparty/zstd/compress/zstd_opt.h
@@ -17,30 +17,40 @@ extern "C" {
#include "zstd_compress_internal.h"
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
/* used in ZSTD_loadDictionaryContent() */
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+#endif
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra(
+size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra2(
+size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
+#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_BTOPT NULL
+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
+#endif
-size_t ZSTD_compressBlock_btopt_dictMatchState(
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btopt_extDict(
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
- void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
@@ -48,6 +58,20 @@ size_t ZSTD_compressBlock_btultra_extDict(
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */
+size_t ZSTD_compressBlock_btultra2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize);
+
+#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
+#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
+#else
+#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
+#endif
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c
index 6786075569..86ccce3184 100644
--- a/thirdparty/zstd/compress/zstdmt_compress.c
+++ b/thirdparty/zstd/compress/zstdmt_compress.c
@@ -15,17 +15,13 @@
#endif
-/* ====== Constants ====== */
-#define ZSTDMT_OVERLAPLOG_DEFAULT 0
-
-
/* ====== Dependencies ====== */
-#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
#include "../common/mem.h" /* MEM_STATIC */
#include "../common/pool.h" /* threadpool */
#include "../common/threading.h" /* mutex */
-#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
#include "zstd_ldm.h"
#include "zstdmt_compress.h"
@@ -44,12 +40,13 @@
# include <unistd.h>
# include <sys/times.h>
-# define DEBUG_PRINTHEX(l,p,n) { \
- unsigned debug_u; \
- for (debug_u=0; debug_u<(n); debug_u++) \
- RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
- RAWLOG(l, " \n"); \
-}
+# define DEBUG_PRINTHEX(l,p,n) \
+ do { \
+ unsigned debug_u; \
+ for (debug_u=0; debug_u<(n); debug_u++) \
+ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+ RAWLOG(l, " \n"); \
+ } while (0)
static unsigned long long GetCurrentClockTimeMicroseconds(void)
{
@@ -61,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
} }
#define MUTEX_WAIT_TIME_DLEVEL 6
-#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
- if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
- unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
- ZSTD_pthread_mutex_lock(mutex); \
- { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
- unsigned long long const elapsedTime = (afterTime-beforeTime); \
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
- elapsedTime, #mutex); \
- } } \
- } else { \
- ZSTD_pthread_mutex_lock(mutex); \
- } \
-}
+#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
+ do { \
+ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
+ ZSTD_pthread_mutex_lock(mutex); \
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
+ if (elapsedTime > 1000) { \
+ /* or whatever threshold you like; I'm using 1 millisecond here */ \
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
+ "Thread took %llu microseconds to acquire mutex %s \n", \
+ elapsedTime, #mutex); \
+ } } \
+ } else { \
+ ZSTD_pthread_mutex_lock(mutex); \
+ } \
+ } while (0)
#else
# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
-# define DEBUG_PRINTHEX(l,p,n) {}
+# define DEBUG_PRINTHEX(l,p,n) do { } while (0)
#endif
@@ -100,18 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
unsigned totalBuffers;
unsigned nbBuffers;
ZSTD_customMem cMem;
- buffer_t bTable[1]; /* variable size */
+ buffer_t* buffers;
} ZSTDMT_bufferPool;
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+ DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
+ if (!bufPool) return; /* compatibility with free on NULL */
+ if (bufPool->buffers) {
+ unsigned u;
+ for (u=0; u<bufPool->totalBuffers; u++) {
+ DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
+ ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
+ }
+ ZSTD_customFree(bufPool->buffers, bufPool->cMem);
+ }
+ ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
+ ZSTD_customFree(bufPool, bufPool->cMem);
+}
+
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
{
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
- sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
+ ZSTDMT_bufferPool* const bufPool =
+ (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
if (bufPool==NULL) return NULL;
if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
ZSTD_customFree(bufPool, cMem);
return NULL;
}
+ bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
+ if (bufPool->buffers==NULL) {
+ ZSTDMT_freeBufferPool(bufPool);
+ return NULL;
+ }
bufPool->bufferSize = 64 KB;
bufPool->totalBuffers = maxNbBuffers;
bufPool->nbBuffers = 0;
@@ -119,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_cu
return bufPool;
}
-static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
-{
- unsigned u;
- DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
- if (!bufPool) return; /* compatibility with free on NULL */
- for (u=0; u<bufPool->totalBuffers; u++) {
- DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
- ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
- }
- ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
- ZSTD_customFree(bufPool, bufPool->cMem);
-}
-
/* only works at initialization, not during compression */
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
{
- size_t const poolSize = sizeof(*bufPool)
- + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+ size_t const poolSize = sizeof(*bufPool);
+ size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
unsigned u;
size_t totalBufferSize = 0;
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
for (u=0; u<bufPool->totalBuffers; u++)
- totalBufferSize += bufPool->bTable[u].capacity;
+ totalBufferSize += bufPool->buffers[u].capacity;
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
- return poolSize + totalBufferSize;
+ return poolSize + arraySize + totalBufferSize;
}
/* ZSTDMT_setBufferSize() :
@@ -187,9 +195,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers) { /* try to use an existing buffer */
- buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
+ buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
size_t const availBufferSize = buf.capacity;
- bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
+ bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
/* large enough, but not too much */
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
@@ -250,14 +258,14 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
if (buf.start == NULL) return; /* compatible with release on NULL */
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers < bufPool->totalBuffers) {
- bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
+ bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
(U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return;
}
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
- /* Reached bufferPool capacity (should not happen) */
+ /* Reached bufferPool capacity (note: should not happen) */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
ZSTD_customFree(buf.start, bufPool->cMem);
}
@@ -350,16 +358,20 @@ typedef struct {
int totalCCtx;
int availCCtx;
ZSTD_customMem cMem;
- ZSTD_CCtx* cctx[1]; /* variable size */
+ ZSTD_CCtx** cctxs;
} ZSTDMT_CCtxPool;
-/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
{
- int cid;
- for (cid=0; cid<pool->totalCCtx; cid++)
- ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
+ if (!pool) return;
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
+ if (pool->cctxs) {
+ int cid;
+ for (cid=0; cid<pool->totalCCtx; cid++)
+ ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
+ ZSTD_customFree(pool->cctxs, pool->cMem);
+ }
ZSTD_customFree(pool, pool->cMem);
}
@@ -368,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
ZSTD_customMem cMem)
{
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
- sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
+ ZSTDMT_CCtxPool* const cctxPool =
+ (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
assert(nbWorkers > 0);
if (!cctxPool) return NULL;
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
ZSTD_customFree(cctxPool, cMem);
return NULL;
}
- cctxPool->cMem = cMem;
cctxPool->totalCCtx = nbWorkers;
+ cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
+ if (!cctxPool->cctxs) {
+ ZSTDMT_freeCCtxPool(cctxPool);
+ return NULL;
+ }
+ cctxPool->cMem = cMem;
+ cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
+ if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
- cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
- if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
return cctxPool;
}
@@ -402,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
{
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
{ unsigned const nbWorkers = cctxPool->totalCCtx;
- size_t const poolSize = sizeof(*cctxPool)
- + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
- unsigned u;
+ size_t const poolSize = sizeof(*cctxPool);
+ size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
size_t totalCCtxSize = 0;
+ unsigned u;
for (u=0; u<nbWorkers; u++) {
- totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
}
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
assert(nbWorkers > 0);
- return poolSize + totalCCtxSize;
+ return poolSize + arraySize + totalCCtxSize;
}
}
@@ -421,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
if (cctxPool->availCCtx) {
cctxPool->availCCtx--;
- { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
+ { ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
return cctx;
} }
@@ -435,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
if (cctx==NULL) return; /* compatibility with release on NULL */
ZSTD_pthread_mutex_lock(&pool->poolMutex);
if (pool->availCCtx < pool->totalCCtx)
- pool->cctx[pool->availCCtx++] = cctx;
+ pool->cctxs[pool->availCCtx++] = cctx;
else {
/* pool overflow : should not happen, since totalCCtx==nbWorkers */
DEBUGLOG(4, "CCtx pool overflow : free cctx");
@@ -601,11 +618,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
ZSTD_pthread_mutex_unlock(&serialState->mutex);
if (seqStore.size > 0) {
- size_t const err = ZSTD_referenceExternalSequences(
- jobCCtx, seqStore.seq, seqStore.size);
+ ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
- assert(!ZSTD_isError(err));
- (void)err;
}
}
@@ -657,12 +671,13 @@ typedef struct {
unsigned frameChecksumNeeded; /* used only by mtctx */
} ZSTDMT_jobDescription;
-#define JOB_ERROR(e) { \
- ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
- job->cSize = e; \
- ZSTD_pthread_mutex_unlock(&job->job_mutex); \
- goto _endJob; \
-}
+#define JOB_ERROR(e) \
+ do { \
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
+ job->cSize = e; \
+ ZSTD_pthread_mutex_unlock(&job->job_mutex); \
+ goto _endJob; \
+ } while (0)
/* ZSTDMT_compressionJob() is a POOL_function type */
static void ZSTDMT_compressionJob(void* jobDescription)
@@ -1091,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
{ unsigned jobNb;
unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
- mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+ mtctx->doneJobID, lastJobNb, mtctx->jobReady);
for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
unsigned const wJobID = jobNb & mtctx->jobIDMask;
ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];