diff options
Diffstat (limited to 'thirdparty/zstd/decompress')
-rw-r--r-- | thirdparty/zstd/decompress/huf_decompress.c | 314 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/huf_decompress_amd64.S | 57 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress.c | 116 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress_block.c | 439 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress_block.h | 2 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress_internal.h | 2 |
6 files changed, 544 insertions, 386 deletions
diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c index 5b217ac586..f85dd0beea 100644 --- a/thirdparty/zstd/decompress/huf_decompress.c +++ b/thirdparty/zstd/decompress/huf_decompress.c @@ -34,6 +34,12 @@ * Macros ****************************************************************/ +#ifdef HUF_DISABLE_FAST_DECODE +# define HUF_ENABLE_FAST_DECODE 0 +#else +# define HUF_ENABLE_FAST_DECODE 1 +#endif + /* These two optional macros force the use one way or another of the two * Huffman decompression implementations. You can't force in both directions * at the same time. @@ -158,17 +164,18 @@ static size_t HUF_initFastDStream(BYTE const* ip) { * op [in/out] - The output pointers, must be updated to reflect what is written. * bits [in/out] - The bitstream containers, must be updated to reflect the current state. * dt [in] - The decoding table. - * ilimit [in] - The input limit, stop when any input pointer is below ilimit. + * ilowest [in] - The beginning of the valid range of the input. Decoders may read + * down to this pointer. It may be below iend[0]. * oend [in] - The end of the output stream. op[3] must not cross oend. * iend [in] - The end of each input stream. ip[i] may cross iend[i], - * as long as it is above ilimit, but that indicates corruption. + * as long as it is above ilowest, but that indicates corruption. */ typedef struct { BYTE const* ip[4]; BYTE* op[4]; U64 bits[4]; void const* dt; - BYTE const* ilimit; + BYTE const* ilowest; BYTE* oend; BYTE const* iend[4]; } HUF_DecompressFastArgs; @@ -186,9 +193,9 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds void const* dt = DTable + 1; U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; - const BYTE* const ilimit = (const BYTE*)src + 6 + 8; + const BYTE* const istart = (const BYTE*)src; - BYTE* const oend = (BYTE*)dst + dstSize; + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); /* The fast decoding loop assumes 64-bit little-endian. * This condition is false on x32. @@ -196,6 +203,11 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds if (!MEM_isLittleEndian() || MEM_32bits()) return 0; + /* Avoid nullptr addition */ + if (dstSize == 0) + return 0; + assert(dst != NULL); + /* strict minimum : jump table + 1 byte per stream */ if (srcSize < 10) return ERROR(corruption_detected); @@ -209,7 +221,6 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds /* Read the jump table. */ { - const BYTE* const istart = (const BYTE*)src; size_t const length1 = MEM_readLE16(istart); size_t const length2 = MEM_readLE16(istart+2); size_t const length3 = MEM_readLE16(istart+4); @@ -221,10 +232,8 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds /* HUF_initFastDStream() requires this, and this small of an input * won't benefit from the ASM loop anyways. - * length1 must be >= 16 so that ip[0] >= ilimit before the loop - * starts. */ - if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) + if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8) return 0; if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ } @@ -256,11 +265,12 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds args->bits[2] = HUF_initFastDStream(args->ip[2]); args->bits[3] = HUF_initFastDStream(args->ip[3]); - /* If ip[] >= ilimit, it is guaranteed to be safe to - * reload bits[]. It may be beyond its section, but is - * guaranteed to be valid (>= istart). - */ - args->ilimit = ilimit; + /* The decoders must be sure to never read beyond ilowest. + * This is lower than iend[0], but allowing decoders to read + * down to ilowest can allow an extra iteration or two in the + * fast loop. + */ + args->ilowest = istart; args->oend = oend; args->dt = dt; @@ -285,13 +295,31 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg assert(sizeof(size_t) == 8); bit->bitContainer = MEM_readLEST(args->ip[stream]); bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); - bit->start = (const char*)args->iend[0]; + bit->start = (const char*)args->ilowest; bit->limitPtr = bit->start + sizeof(size_t); bit->ptr = (const char*)args->ip[stream]; return 0; } +/* Calls X(N) for each stream 0, 1, 2, 3. */ +#define HUF_4X_FOR_EACH_STREAM(X) \ + do { \ + X(0); \ + X(1); \ + X(2); \ + X(3); \ + } while (0) + +/* Calls X(N, var) for each stream 0, 1, 2, 3. */ +#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \ + do { \ + X(0, (var)); \ + X(1, (var)); \ + X(2, (var)); \ + X(3, (var)); \ + } while (0) + #ifndef HUF_FORCE_DECOMPRESS_X2 @@ -500,15 +528,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog } #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0) -#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ + } while (0) -#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ + } while (0) HINT_INLINE size_t HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) @@ -546,7 +578,7 @@ HUF_decompress1X1_usingDTable_internal_body( const HUF_DTable* DTable) { BYTE* op = (BYTE*)dst; - BYTE* const oend = op + dstSize; + BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize); const void* dtPtr = DTable + 1; const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; BIT_DStream_t bitD; @@ -574,6 +606,7 @@ HUF_decompress4X1_usingDTable_internal_body( { /* Check */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; @@ -609,7 +642,7 @@ HUF_decompress4X1_usingDTable_internal_body( if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ - if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ + assert(dstSize >= 6); /* validated above */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); @@ -692,7 +725,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* BYTE* op[4]; U16 const* const dtable = (U16 const*)args->dt; BYTE* const oend = args->oend; - BYTE const* const ilimit = args->ilimit; + BYTE const* const ilowest = args->ilowest; /* Copy the arguments to local variables */ ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); @@ -705,13 +738,12 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* for (;;) { BYTE* olimit; int stream; - int symbol; /* Assert loop preconditions */ #ifndef NDEBUG for (stream = 0; stream < 4; ++stream) { assert(op[stream] <= (stream == 3 ? oend : op[stream + 1])); - assert(ip[stream] >= ilimit); + assert(ip[stream] >= ilowest); } #endif /* Compute olimit */ @@ -721,7 +753,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes * per stream. */ - size_t const iiters = (size_t)(ip[0] - ilimit) / 7; + size_t const iiters = (size_t)(ip[0] - ilowest) / 7; /* We can safely run iters iterations before running bounds checks */ size_t const iters = MIN(oiters, iiters); size_t const symbols = iters * 5; @@ -732,8 +764,8 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* */ olimit = op[3] + symbols; - /* Exit fast decoding loop once we get close to the end. */ - if (op[3] + 20 > olimit) + /* Exit fast decoding loop once we reach the end. */ + if (op[3] == olimit) break; /* Exit the decoding loop if any input pointer has crossed the @@ -752,27 +784,42 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* } #endif +#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \ + do { \ + int const index = (int)(bits[(_stream)] >> 53); \ + int const entry = (int)dtable[index]; \ + bits[(_stream)] <<= (entry & 0x3F); \ + op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \ + } while (0) + +#define HUF_4X1_RELOAD_STREAM(_stream) \ + do { \ + int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ + int const nbBits = ctz & 7; \ + int const nbBytes = ctz >> 3; \ + op[(_stream)] += 5; \ + ip[(_stream)] -= nbBytes; \ + bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ + bits[(_stream)] <<= nbBits; \ + } while (0) + + /* Manually unroll the loop because compilers don't consistently + * unroll the inner loops, which destroys performance. + */ do { /* Decode 5 symbols in each of the 4 streams */ - for (symbol = 0; symbol < 5; ++symbol) { - for (stream = 0; stream < 4; ++stream) { - int const index = (int)(bits[stream] >> 53); - int const entry = (int)dtable[index]; - bits[stream] <<= (entry & 63); - op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF); - } - } - /* Reload the bitstreams */ - for (stream = 0; stream < 4; ++stream) { - int const ctz = ZSTD_countTrailingZeros64(bits[stream]); - int const nbBits = ctz & 7; - int const nbBytes = ctz >> 3; - op[stream] += 5; - ip[stream] -= nbBytes; - bits[stream] = MEM_read64(ip[stream]) | 1; - bits[stream] <<= nbBits; - } + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4); + + /* Reload each of the 4 the bitstreams */ + HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM); } while (op[3] < olimit); + +#undef HUF_4X1_DECODE_SYMBOL +#undef HUF_4X1_RELOAD_STREAM } _out: @@ -797,8 +844,8 @@ HUF_decompress4X1_usingDTable_internal_fast( HUF_DecompressFastLoopFn loopFn) { void const* dt = DTable + 1; - const BYTE* const iend = (const BYTE*)cSrc + 6; - BYTE* const oend = (BYTE*)dst + dstSize; + BYTE const* const ilowest = (BYTE const*)cSrc; + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); HUF_DecompressFastArgs args; { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); FORWARD_IF_ERROR(ret, "Failed to init fast loop args"); @@ -806,18 +853,22 @@ HUF_decompress4X1_usingDTable_internal_fast( return 0; } - assert(args.ip[0] >= args.ilimit); + assert(args.ip[0] >= args.ilowest); loopFn(&args); - /* Our loop guarantees that ip[] >= ilimit and that we haven't + /* Our loop guarantees that ip[] >= ilowest and that we haven't * overwritten any op[]. */ - assert(args.ip[0] >= iend); - assert(args.ip[1] >= iend); - assert(args.ip[2] >= iend); - assert(args.ip[3] >= iend); + assert(args.ip[0] >= ilowest); + assert(args.ip[0] >= ilowest); + assert(args.ip[1] >= ilowest); + assert(args.ip[2] >= ilowest); + assert(args.ip[3] >= ilowest); assert(args.op[3] <= oend); - (void)iend; + + assert(ilowest == args.ilowest); + assert(ilowest + 6 == args.iend[0]); + (void)ilowest; /* finish bit streams one by one. */ { size_t const segmentSize = (dstSize+3) / 4; @@ -868,7 +919,7 @@ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, } #endif - if (!(flags & HUF_flags_disableFast)) { + if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); if (ret != 0) return ret; @@ -1239,15 +1290,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c } #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0) -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ + } while (0) -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ + } while (0) HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, @@ -1307,7 +1362,7 @@ HUF_decompress1X2_usingDTable_internal_body( /* decode */ { BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; + BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize); const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; DTableDesc const dtd = HUF_getDTableDesc(DTable); @@ -1332,6 +1387,7 @@ HUF_decompress4X2_usingDTable_internal_body( const HUF_DTable* DTable) { if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; @@ -1367,7 +1423,7 @@ HUF_decompress4X2_usingDTable_internal_body( if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ - if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ + assert(dstSize >= 6 /* validated above */); CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); @@ -1472,7 +1528,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* BYTE* op[4]; BYTE* oend[4]; HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt; - BYTE const* const ilimit = args->ilimit; + BYTE const* const ilowest = args->ilowest; /* Copy the arguments to local registers. */ ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); @@ -1490,13 +1546,12 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* for (;;) { BYTE* olimit; int stream; - int symbol; /* Assert loop preconditions */ #ifndef NDEBUG for (stream = 0; stream < 4; ++stream) { assert(op[stream] <= oend[stream]); - assert(ip[stream] >= ilimit); + assert(ip[stream] >= ilowest); } #endif /* Compute olimit */ @@ -1509,7 +1564,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* * We also know that each input pointer is >= ip[0]. So we can run * iters loops before running out of input. */ - size_t iters = (size_t)(ip[0] - ilimit) / 7; + size_t iters = (size_t)(ip[0] - ilowest) / 7; /* Each iteration can produce up to 10 bytes of output per stream. * Each output stream my advance at different rates. So take the * minimum number of safe iterations among all the output streams. @@ -1527,8 +1582,8 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* */ olimit = op[3] + (iters * 5); - /* Exit the fast decoding loop if we are too close to the end. */ - if (op[3] + 10 > olimit) + /* Exit the fast decoding loop once we reach the end. */ + if (op[3] == olimit) break; /* Exit the decoding loop if any input pointer has crossed the @@ -1547,54 +1602,58 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* } #endif +#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \ + do { \ + if ((_decode3) || (_stream) != 3) { \ + int const index = (int)(bits[(_stream)] >> 53); \ + HUF_DEltX2 const entry = dtable[index]; \ + MEM_write16(op[(_stream)], entry.sequence); \ + bits[(_stream)] <<= (entry.nbBits) & 0x3F; \ + op[(_stream)] += (entry.length); \ + } \ + } while (0) + +#define HUF_4X2_RELOAD_STREAM(_stream) \ + do { \ + HUF_4X2_DECODE_SYMBOL(3, 1); \ + { \ + int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ + int const nbBits = ctz & 7; \ + int const nbBytes = ctz >> 3; \ + ip[(_stream)] -= nbBytes; \ + bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ + bits[(_stream)] <<= nbBits; \ + } \ + } while (0) + + /* Manually unroll the loop because compilers don't consistently + * unroll the inner loops, which destroys performance. + */ do { - /* Do 5 table lookups for each of the first 3 streams */ - for (symbol = 0; symbol < 5; ++symbol) { - for (stream = 0; stream < 3; ++stream) { - int const index = (int)(bits[stream] >> 53); - HUF_DEltX2 const entry = dtable[index]; - MEM_write16(op[stream], entry.sequence); - bits[stream] <<= (entry.nbBits); - op[stream] += (entry.length); - } - } - /* Do 1 table lookup from the final stream */ - { - int const index = (int)(bits[3] >> 53); - HUF_DEltX2 const entry = dtable[index]; - MEM_write16(op[3], entry.sequence); - bits[3] <<= (entry.nbBits); - op[3] += (entry.length); - } - /* Do 4 table lookups from the final stream & reload bitstreams */ - for (stream = 0; stream < 4; ++stream) { - /* Do a table lookup from the final stream. - * This is interleaved with the reloading to reduce register - * pressure. This shouldn't be necessary, but compilers can - * struggle with codegen with high register pressure. - */ - { - int const index = (int)(bits[3] >> 53); - HUF_DEltX2 const entry = dtable[index]; - MEM_write16(op[3], entry.sequence); - bits[3] <<= (entry.nbBits); - op[3] += (entry.length); - } - /* Reload the bistreams. The final bitstream must be reloaded - * after the 5th symbol was decoded. - */ - { - int const ctz = ZSTD_countTrailingZeros64(bits[stream]); - int const nbBits = ctz & 7; - int const nbBytes = ctz >> 3; - ip[stream] -= nbBytes; - bits[stream] = MEM_read64(ip[stream]) | 1; - bits[stream] <<= nbBits; - } - } + /* Decode 5 symbols from each of the first 3 streams. + * The final stream will be decoded during the reload phase + * to reduce register pressure. + */ + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + + /* Decode one symbol from the final stream */ + HUF_4X2_DECODE_SYMBOL(3, 1); + + /* Decode 4 symbols from the final stream & reload bitstreams. + * The final stream is reloaded last, meaning that all 5 symbols + * are decoded from the final stream before it is reloaded. + */ + HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM); } while (op[3] < olimit); } +#undef HUF_4X2_DECODE_SYMBOL +#undef HUF_4X2_RELOAD_STREAM + _out: /* Save the final values of each of the state variables back to args. */ @@ -1611,8 +1670,8 @@ HUF_decompress4X2_usingDTable_internal_fast( const HUF_DTable* DTable, HUF_DecompressFastLoopFn loopFn) { void const* dt = DTable + 1; - const BYTE* const iend = (const BYTE*)cSrc + 6; - BYTE* const oend = (BYTE*)dst + dstSize; + const BYTE* const ilowest = (const BYTE*)cSrc; + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); HUF_DecompressFastArgs args; { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); @@ -1621,16 +1680,19 @@ HUF_decompress4X2_usingDTable_internal_fast( return 0; } - assert(args.ip[0] >= args.ilimit); + assert(args.ip[0] >= args.ilowest); loopFn(&args); /* note : op4 already verified within main loop */ - assert(args.ip[0] >= iend); - assert(args.ip[1] >= iend); - assert(args.ip[2] >= iend); - assert(args.ip[3] >= iend); + assert(args.ip[0] >= ilowest); + assert(args.ip[1] >= ilowest); + assert(args.ip[2] >= ilowest); + assert(args.ip[3] >= ilowest); assert(args.op[3] <= oend); - (void)iend; + + assert(ilowest == args.ilowest); + assert(ilowest + 6 == args.iend[0]); + (void)ilowest; /* finish bitStreams one by one */ { @@ -1679,7 +1741,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, } #endif - if (!(flags & HUF_flags_disableFast)) { + if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); if (ret != 0) return ret; diff --git a/thirdparty/zstd/decompress/huf_decompress_amd64.S b/thirdparty/zstd/decompress/huf_decompress_amd64.S index 671624fe34..78da291ee3 100644 --- a/thirdparty/zstd/decompress/huf_decompress_amd64.S +++ b/thirdparty/zstd/decompress/huf_decompress_amd64.S @@ -10,11 +10,32 @@ #include "../common/portability_macros.h" +#if defined(__ELF__) && defined(__GNUC__) /* Stack marking * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart */ -#if defined(__ELF__) && defined(__GNUC__) .section .note.GNU-stack,"",%progbits + +#if defined(__aarch64__) +/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64. + * See: https://github.com/facebook/zstd/issues/3841 + * See: https://gcc.godbolt.org/z/sqr5T4ffK + * See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/ + * See: https://reviews.llvm.org/D62609 + */ +.pushsection .note.gnu.property, "a" +.p2align 3 +.long 4 /* size of the name - "GNU\0" */ +.long 0x10 /* size of descriptor */ +.long 0x5 /* NT_GNU_PROPERTY_TYPE_0 */ +.asciz "GNU" +.long 0xc0000000 /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */ +.long 4 /* pr_datasz - 4 bytes */ +.long 3 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */ +.p2align 3 /* pr_padding - bring everything to 8 byte alignment */ +.popsection +#endif + #endif #if ZSTD_ENABLE_ASM_X86_64_BMI2 @@ -131,7 +152,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop: movq 88(%rax), %bits3 movq 96(%rax), %dtable push %rax /* argument */ - push 104(%rax) /* ilimit */ + push 104(%rax) /* ilowest */ push 112(%rax) /* oend */ push %olimit /* olimit space */ @@ -156,11 +177,11 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop: shrq $2, %r15 movq %ip0, %rax /* rax = ip0 */ - movq 40(%rsp), %rdx /* rdx = ilimit */ - subq %rdx, %rax /* rax = ip0 - ilimit */ - movq %rax, %rbx /* rbx = ip0 - ilimit */ + movq 40(%rsp), %rdx /* rdx = ilowest */ + subq %rdx, %rax /* rax = ip0 - ilowest */ + movq %rax, %rbx /* rbx = ip0 - ilowest */ - /* rdx = (ip0 - ilimit) / 7 */ + /* rdx = (ip0 - ilowest) / 7 */ movabsq $2635249153387078803, %rdx mulq %rdx subq %rdx, %rbx @@ -183,9 +204,8 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop: /* If (op3 + 20 > olimit) */ movq %op3, %rax /* rax = op3 */ - addq $20, %rax /* rax = op3 + 20 */ - cmpq %rax, %olimit /* op3 + 20 > olimit */ - jb .L_4X1_exit + cmpq %rax, %olimit /* op3 == olimit */ + je .L_4X1_exit /* If (ip1 < ip0) go to exit */ cmpq %ip0, %ip1 @@ -316,7 +336,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop: /* Restore stack (oend & olimit) */ pop %rax /* olimit */ pop %rax /* oend */ - pop %rax /* ilimit */ + pop %rax /* ilowest */ pop %rax /* arg */ /* Save ip / op / bits */ @@ -387,7 +407,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop: movq 96(%rax), %dtable push %rax /* argument */ push %rax /* olimit */ - push 104(%rax) /* ilimit */ + push 104(%rax) /* ilowest */ movq 112(%rax), %rax push %rax /* oend3 */ @@ -414,9 +434,9 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop: /* We can consume up to 7 input bytes each iteration. */ movq %ip0, %rax /* rax = ip0 */ - movq 40(%rsp), %rdx /* rdx = ilimit */ - subq %rdx, %rax /* rax = ip0 - ilimit */ - movq %rax, %r15 /* r15 = ip0 - ilimit */ + movq 40(%rsp), %rdx /* rdx = ilowest */ + subq %rdx, %rax /* rax = ip0 - ilowest */ + movq %rax, %r15 /* r15 = ip0 - ilowest */ /* rdx = rax / 7 */ movabsq $2635249153387078803, %rdx @@ -426,7 +446,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop: addq %r15, %rdx shrq $2, %rdx - /* r15 = (ip0 - ilimit) / 7 */ + /* r15 = (ip0 - ilowest) / 7 */ movq %rdx, %r15 /* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */ @@ -467,9 +487,8 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop: /* If (op3 + 10 > olimit) */ movq %op3, %rax /* rax = op3 */ - addq $10, %rax /* rax = op3 + 10 */ - cmpq %rax, %olimit /* op3 + 10 > olimit */ - jb .L_4X2_exit + cmpq %rax, %olimit /* op3 == olimit */ + je .L_4X2_exit /* If (ip1 < ip0) go to exit */ cmpq %ip0, %ip1 @@ -537,7 +556,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop: pop %rax /* oend1 */ pop %rax /* oend2 */ pop %rax /* oend3 */ - pop %rax /* ilimit */ + pop %rax /* ilowest */ pop %rax /* olimit */ pop %rax /* arg */ diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c index 7bc2713429..2f03cf7b0c 100644 --- a/thirdparty/zstd/decompress/zstd_decompress.c +++ b/thirdparty/zstd/decompress/zstd_decompress.c @@ -55,18 +55,19 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ +#include "../common/error_private.h" +#include "../common/zstd_internal.h" /* blockProperties_t */ #include "../common/mem.h" /* low level memory routines */ +#include "../common/bits.h" /* ZSTD_highbit32 */ #define FSE_STATIC_LINKING_ONLY #include "../common/fse.h" #include "../common/huf.h" #include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */ -#include "../common/zstd_internal.h" /* blockProperties_t */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ -#include "../common/bits.h" /* ZSTD_highbit32 */ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) # include "../legacy/zstd_legacy.h" @@ -245,6 +246,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; dctx->disableHufAsm = 0; + dctx->maxBlockSizeParam = 0; } static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) @@ -265,6 +267,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) #endif dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; + dctx->isFrameDecompression = 1; #if DYNAMIC_BMI2 dctx->bmi2 = ZSTD_cpuSupportsBmi2(); #endif @@ -726,17 +729,17 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) return frameSizeInfo; } -static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format) { ZSTD_frameSizeInfo frameSizeInfo; ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) + if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameSizeInfoLegacy(src, srcSize); #endif - if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); assert(ZSTD_isError(frameSizeInfo.compressedSize) || @@ -750,7 +753,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize ZSTD_frameHeader zfh; /* Extract Frame Header */ - { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format); if (ZSTD_isError(ret)) return ZSTD_errorFrameSizeInfo(ret); if (ret > 0) @@ -793,15 +796,17 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize } } +static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format); + return frameSizeInfo.compressedSize; +} + /** ZSTD_findFrameCompressedSize() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame - * `srcSize` must be at least as large as the frame contained - * @return : the compressed size of the frame starting at `src` */ + * See docs in zstd.h + * Note: compatible with legacy mode */ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - return frameSizeInfo.compressedSize; + return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1); } /** ZSTD_decompressBound() : @@ -815,7 +820,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) unsigned long long bound = 0; /* Iterate over each frame */ while (srcSize > 0) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); size_t const compressedSize = frameSizeInfo.compressedSize; unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) @@ -835,7 +840,7 @@ size_t ZSTD_decompressionMargin(void const* src, size_t srcSize) /* Iterate over each frame */ while (srcSize > 0) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); size_t const compressedSize = frameSizeInfo.compressedSize; unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; ZSTD_frameHeader zfh; @@ -971,6 +976,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; } + /* Shrink the blockSizeMax if enabled */ + if (dctx->maxBlockSizeParam != 0) + dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam); + /* Loop on each block */ while (1) { BYTE* oBlockEnd = oend; @@ -1003,7 +1012,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); + assert(dctx->isFrameDecompression == 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming); break; case bt_raw : /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ @@ -1016,12 +1026,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, default: RETURN_ERROR(corruption_detected, "invalid block type"); } - - if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->validateChecksum) + FORWARD_IF_ERROR(decodedSize, "Block decompression failure"); + DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize); + if (dctx->validateChecksum) { XXH64_update(&dctx->xxhState, op, decodedSize); - if (decodedSize != 0) + } + if (decodedSize) /* support dst = NULL,0 */ { op += decodedSize; + } assert(ip != NULL); ip += cBlockSize; remainingSrcSize -= cBlockSize; @@ -1051,7 +1063,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, return (size_t)(op-ostart); } -static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, @@ -1071,7 +1085,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, while (srcSize >= ZSTD_startingInputLength(dctx->format)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { + if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) { size_t decodedSize; size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); if (ZSTD_isError(frameSize)) return frameSize; @@ -1081,6 +1095,15 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); if (ZSTD_isError(decodedSize)) return decodedSize; + { + unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize); + RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!"); + if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected, + "Frame header size does not match decoded size!"); + } + } + assert(decodedSize <= dstCapacity); dst = (BYTE*)dst + decodedSize; dstCapacity -= decodedSize; @@ -1092,7 +1115,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } #endif - if (srcSize >= 4) { + if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) { U32 const magicNumber = MEM_readLE32(src); DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { @@ -1319,7 +1342,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { case bt_compressed: DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); + assert(dctx->isFrameDecompression == 1); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming); dctx->expected = 0; /* Streaming not supported */ break; case bt_raw : @@ -1388,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeSkippableHeader: assert(src != NULL); assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + assert(dctx->format != ZSTD_f_zstd1_magicless); ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->stage = ZSTDds_skipFrame; @@ -1548,6 +1573,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->litEntropy = dctx->fseEntropy = 0; dctx->dictID = 0; dctx->bType = bt_reserved; + dctx->isFrameDecompression = 1; ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ dctx->LLTptr = dctx->entropy.LLTable; @@ -1819,6 +1845,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; + case ZSTD_d_maxBlockSize: + bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; + bounds.upperBound = ZSTD_BLOCKSIZE_MAX; + return bounds; default:; } @@ -1863,6 +1893,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value case ZSTD_d_disableHuffmanAssembly: *value = (int)dctx->disableHufAsm; return 0; + case ZSTD_d_maxBlockSize: + *value = dctx->maxBlockSizeParam; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1900,6 +1933,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value); dctx->disableHufAsm = value != 0; return 0; + case ZSTD_d_maxBlockSize: + if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value); + dctx->maxBlockSizeParam = value; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1911,6 +1948,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) || (reset == ZSTD_reset_session_and_parameters) ) { dctx->streamStage = zdss_init; dctx->noForwardProgress = 0; + dctx->isFrameDecompression = 1; } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { @@ -1927,11 +1965,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) return ZSTD_sizeof_DCtx(dctx); } -size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax) { - size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ - unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); + size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax); + /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block + * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing + * the block at the beginning of the output buffer, and maintain a full window. + * + * We need another blockSize worth of buffer so that we can store split + * literals at the end of the block without overwriting the extDict window. + */ + unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2); unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, @@ -1939,6 +1983,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long return minRBSize; } +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX); +} + size_t ZSTD_estimateDStreamSize(size_t windowSize) { size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); @@ -2134,12 +2183,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN && zds->fParams.frameType != ZSTD_skippableFrame && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format); if (cSize <= (size_t)(iend-istart)) { /* shortcut : using single-pass mode */ size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); if (ZSTD_isError(decompressedSize)) return decompressedSize; - DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()"); assert(istart != NULL); ip = istart + cSize; op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ @@ -2161,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB DEBUGLOG(4, "Consume header"); FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); - if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + if (zds->format == ZSTD_f_zstd1 + && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { @@ -2177,11 +2227,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, frameParameter_windowTooLarge, ""); + if (zds->maxBlockSizeParam != 0) + zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam); /* Adapt buffer sizes to frame header instructions */ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered - ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax) : 0; ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.c b/thirdparty/zstd/decompress/zstd_decompress_block.c index 09896a931e..76d7332e88 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.c +++ b/thirdparty/zstd/decompress/zstd_decompress_block.c @@ -51,6 +51,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } * Block decoding ***************************************************************/ +static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) +{ + size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX; + assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX); + return blockSizeMax; +} + /*! ZSTD_getcBlockSize() : * Provides the size of compressed block from block header `src` */ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, @@ -73,41 +80,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) { - if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) - { - /* room for litbuffer to fit without read faulting */ - dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); + assert(litSize <= blockSizeMax); + assert(dctx->isFrameDecompression || streaming == not_streaming); + assert(expectedWriteSize <= blockSizeMax); + if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) { + /* If we aren't streaming, we can just put the literals after the output + * of the current block. We don't need to worry about overwriting the + * extDict of our window, because it doesn't exist. + * So if we have space after the end of the block, just put it there. + */ + dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH; dctx->litBufferEnd = dctx->litBuffer + litSize; dctx->litBufferLocation = ZSTD_in_dst; - } - else if (litSize > ZSTD_LITBUFFEREXTRASIZE) - { - /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ + } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) { + /* Literals fit entirely within the extra buffer, put them there to avoid + * having to split the literals. + */ + dctx->litBuffer = dctx->litExtraBuffer; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; + } else { + assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE); + /* Literals must be split between the output block and the extra lit + * buffer. We fill the extra lit buffer with the tail of the literals, + * and put the rest of the literals at the end of the block, with + * WILDCOPY_OVERLENGTH of buffer room to allow for overreads. + * This MUST not write more than our maxBlockSize beyond dst, because in + * streaming mode, that could overwrite part of our extDict window. + */ if (splitImmediately) { /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; - } - else { + } else { /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; } dctx->litBufferLocation = ZSTD_split; - } - else - { - /* fits entirely within litExtraBuffer, so no split is necessary */ - dctx->litBuffer = dctx->litExtraBuffer; - dctx->litBufferEnd = dctx->litBuffer + litSize; - dctx->litBufferLocation = ZSTD_not_in_dst; + assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize); } } -/* Hidden declaration for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize, - void* dst, size_t dstCapacity, const streaming_operation streaming); /*! ZSTD_decodeLiteralsBlock() : * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current @@ -116,7 +131,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, * * @return : nb of bytes read from src (< srcSize ) * note : symbol not declared but exposed for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, +static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ void* dst, size_t dstCapacity, const streaming_operation streaming) { @@ -125,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, { const BYTE* const istart = (const BYTE*) src; symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); switch(litEncType) { @@ -140,7 +156,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhc = MEM_readLE32(istart); size_t hufSuccess; - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); int const flags = 0 | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0); @@ -167,7 +183,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); if (!singleStream) RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, "Not enough literals (%zu) for the 4-streams mode (min %u)", @@ -214,10 +230,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } if (dctx->litBufferLocation == ZSTD_split) { + assert(litSize > ZSTD_LITBUFFEREXTRASIZE); ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; + assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax); } RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); @@ -232,7 +250,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, case set_basic: { size_t litSize, lhSize; U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -251,6 +269,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ @@ -279,7 +298,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, case set_rle: { U32 const lhlCode = ((istart[0]) >> 2) & 3; size_t litSize, lhSize; - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -298,7 +317,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); if (dctx->litBufferLocation == ZSTD_split) @@ -320,6 +339,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } } +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, + void* dst, size_t dstCapacity); +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, + void* dst, size_t dstCapacity) +{ + dctx->isFrameDecompression = 0; + return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming); +} + /* Default FSE distribution tables. * These are pre-calculated FSE decoding tables using default distributions as defined in specification : * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions @@ -675,11 +706,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, /* SeqHead */ nbSeq = *ip++; - if (!nbSeq) { - *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); - return 1; - } if (nbSeq > 0x7F) { if (nbSeq == 0xFF) { RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); @@ -692,8 +718,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, } *nbSeqPtr = nbSeq; + if (nbSeq == 0) { + /* No sequence : section ends immediately */ + RETURN_ERROR_IF(ip != iend, corruption_detected, + "extraneous data present in the Sequences section"); + return (size_t)(ip - istart); + } + /* FSE table descriptors */ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); @@ -840,7 +874,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt /* ZSTD_safecopyDstBeforeSrc(): * This version allows overlap with dst before src, or handles the non-overlap case with dst after src * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ -static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { +static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) { ptrdiff_t const diff = op - ip; BYTE* const oend = op + length; @@ -869,6 +903,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). */ FORCE_NOINLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR size_t ZSTD_execSequenceEnd(BYTE* op, BYTE* const oend, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, @@ -916,6 +951,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op, * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. */ FORCE_NOINLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, BYTE* const oend, const BYTE* const oend_w, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, @@ -961,6 +997,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, } HINT_INLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR size_t ZSTD_execSequence(BYTE* op, BYTE* const oend, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, @@ -1059,6 +1096,7 @@ size_t ZSTD_execSequence(BYTE* op, } HINT_INLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, BYTE* const oend, const BYTE* const oend_w, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, @@ -1181,14 +1219,20 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +/** + * ZSTD_decodeSequence(): + * @p longOffsets : tells the decoder to reload more bit while decoding large offsets + * only used in 32-bit mode + * @return : Sequence (litL + matchL + offset) + */ FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq) { seq_t seq; /* - * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be - * loaded in one operation and extracted its fields by simply shifting or - * bit-extracting on aarch64. + * ZSTD_seqSymbol is a 64 bits wide structure. + * It can be loaded in one operation + * and its fields extracted by simply shifting or bit-extracting on aarch64. * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh * operations that cause performance drop. This can be avoided by using this * ZSTD_memcpy hack. @@ -1261,7 +1305,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) } else { offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset = temp; @@ -1288,17 +1332,22 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ + if (!isLastSeq) { + /* don't update FSE state for last Sequence */ + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ + BIT_reloadDStream(&seqState->DStream); + } } return seq; } -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) +#if DEBUGLEVEL >= 1 +static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) { size_t const windowSize = dctx->fParams.windowSize; /* No dictionary used. */ @@ -1312,30 +1361,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix /* Dictionary is active. */ return 1; } +#endif -MEM_STATIC void ZSTD_assertValidSequence( +static void ZSTD_assertValidSequence( ZSTD_DCtx const* dctx, BYTE const* op, BYTE const* oend, seq_t const seq, BYTE const* prefixStart, BYTE const* virtualStart) { #if DEBUGLEVEL >= 1 - size_t const windowSize = dctx->fParams.windowSize; - size_t const sequenceSize = seq.litLength + seq.matchLength; - BYTE const* const oLitEnd = op + seq.litLength; - DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - assert(op <= oend); - assert((size_t)(oend - op) >= sequenceSize); - assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); - if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { - size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); - /* Offset must be within the dictionary. */ - assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); - assert(seq.offset <= windowSize + dictSize); - } else { - /* Offset must be within our window. */ - assert(seq.offset <= windowSize); + if (dctx->isFrameDecompression) { + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_blockSizeMax(dctx)); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } } #else (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; @@ -1351,23 +1403,21 @@ DONT_VECTORIZE ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + maxDstSize; + BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize); BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); - (void)frame; + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); - /* Regen sequences */ + /* Literals are split between internal buffer & output buffer */ if (nbSeq) { seqState_t seqState; dctx->fseEntropy = 1; @@ -1386,8 +1436,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, BIT_DStream_completed < BIT_DStream_overflow); /* decompress without overrunning litPtr begins */ - { - seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */ /* Align the decompression loop to 32 + 16 bytes. * * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression @@ -1449,27 +1498,26 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, #endif /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ - for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { - size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); + for ( ; nbSeq; nbSeq--) { + sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); + if (litPtr + sequence.litLength > dctx->litBufferEnd) break; + { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif - if (UNLIKELY(ZSTD_isError(oneSeqSize))) - return oneSeqSize; - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) - break; - BIT_reloadDStream(&(seqState.DStream)); - sequence = ZSTD_decodeSequence(&seqState, isLongOffset); - } + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + } } + DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)"); /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ if (nbSeq > 0) { const size_t leftoverLit = dctx->litBufferEnd - litPtr; - if (leftoverLit) - { + DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength); + if (leftoverLit) { RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); sequence.litLength -= leftoverLit; @@ -1478,24 +1526,22 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, litPtr = dctx->litExtraBuffer; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; - { - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif if (UNLIKELY(ZSTD_isError(oneSeqSize))) return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); op += oneSeqSize; - if (--nbSeq) - BIT_reloadDStream(&(seqState.DStream)); } + nbSeq--; } } - if (nbSeq > 0) /* there is remaining lit from extra buffer */ - { + if (nbSeq > 0) { + /* there is remaining lit from extra buffer */ #if defined(__GNUC__) && defined(__x86_64__) __asm__(".p2align 6"); @@ -1514,35 +1560,34 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, # endif #endif - for (; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + for ( ; nbSeq ; nbSeq--) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif if (UNLIKELY(ZSTD_isError(oneSeqSize))) return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) - break; - BIT_reloadDStream(&(seqState.DStream)); } } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } } /* last literal segment */ - if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ - { - size_t const lastLLSize = litBufferEnd - litPtr; + if (dctx->litBufferLocation == ZSTD_split) { + /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ + size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); + DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize); RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memmove(op, litPtr, lastLLSize); @@ -1552,15 +1597,17 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; } - { size_t const lastLLSize = litBufferEnd - litPtr; + /* copy last literals from internal buffer */ + { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); + DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; - } - } + } } - return op-ostart; + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); + return (size_t)(op - ostart); } FORCE_INLINE_TEMPLATE size_t @@ -1568,13 +1615,12 @@ DONT_VECTORIZE ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; + BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; const BYTE* const litEnd = litPtr + dctx->litSize; @@ -1582,7 +1628,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq); - (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1597,11 +1642,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); assert(dst != NULL); - ZSTD_STATIC_ASSERT( - BIT_DStream_unfinished < BIT_DStream_completed && - BIT_DStream_endOfBuffer < BIT_DStream_completed && - BIT_DStream_completed < BIT_DStream_overflow); - #if defined(__GNUC__) && defined(__x86_64__) __asm__(".p2align 6"); __asm__("nop"); @@ -1616,73 +1656,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, # endif #endif - for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + for ( ; nbSeq ; nbSeq--) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif if (UNLIKELY(ZSTD_isError(oneSeqSize))) return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) - break; - BIT_reloadDStream(&(seqState.DStream)); } /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + assert(nbSeq == 0); + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } } /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; + { size_t const lastLLSize = (size_t)(litEnd - litPtr); + DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; - } - } + } } - return op-ostart; + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); + return (size_t)(op - ostart); } static size_t ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } static size_t ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE size_t -ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, +FORCE_INLINE_TEMPLATE + +size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, const BYTE* const prefixStart, const BYTE* const dictEnd) { prefetchPos += sequence.litLength; { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; - const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : memory address is only used for prefetching, not for dereferencing */ + /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : memory address is only used for prefetching, not for dereferencing */ + const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset); PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ } return prefetchPos + sequence.matchLength; @@ -1697,20 +1734,18 @@ ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; + BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize); BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1735,20 +1770,17 @@ ZSTD_decompressSequencesLong_body( ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + for (seqNb=0; seqNb<seqAdvance; seqNb++) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1); prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); sequences[seqNb] = sequence; } - RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, ""); /* decompress without stomping litBuffer */ - for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) { - seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); - size_t oneSeqSize; + for (; seqNb < nbSeq; seqNb++) { + seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1); - if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) - { + if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) { /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ const size_t leftoverLit = dctx->litBufferEnd - litPtr; if (leftoverLit) @@ -1761,26 +1793,26 @@ ZSTD_decompressSequencesLong_body( litPtr = dctx->litExtraBuffer; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; - oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); - sequences[seqNb & STORED_SEQS_MASK] = sequence; - op += oneSeqSize; - } + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } } else { /* lit buffer is either wholly contained in first or second split, or not split at all*/ - oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; @@ -1789,17 +1821,15 @@ ZSTD_decompressSequencesLong_body( op += oneSeqSize; } } - RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); /* finish queue */ seqNb -= seqAdvance; for ( ; seqNb<nbSeq ; seqNb++) { seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]); - if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) - { + if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) { const size_t leftoverLit = dctx->litBufferEnd - litPtr; - if (leftoverLit) - { + if (leftoverLit) { RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); sequence->litLength -= leftoverLit; @@ -1808,11 +1838,10 @@ ZSTD_decompressSequencesLong_body( litPtr = dctx->litExtraBuffer; litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferLocation = ZSTD_not_in_dst; - { - size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; @@ -1825,7 +1854,7 @@ ZSTD_decompressSequencesLong_body( ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); #endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; @@ -1837,8 +1866,7 @@ ZSTD_decompressSequencesLong_body( } /* last literal segment */ - if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ - { + if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */ size_t const lastLLSize = litBufferEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); if (op != NULL) { @@ -1856,17 +1884,16 @@ ZSTD_decompressSequencesLong_body( } } - return op-ostart; + return (size_t)(op - ostart); } static size_t ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1880,20 +1907,18 @@ DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1902,10 +1927,9 @@ static BMI2_TARGET_ATTRIBUTE size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1915,37 +1939,34 @@ typedef size_t (*ZSTD_decompressSequences_t)( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame); + const ZSTD_longOffset_e isLongOffset); #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { DEBUGLOG(5, "ZSTD_decompressSequences"); #if DYNAMIC_BMI2 if (ZSTD_DCtx_get_bmi2(dctx)) { - return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } static size_t ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); #if DYNAMIC_BMI2 if (ZSTD_DCtx_get_bmi2(dctx)) { - return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif - return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1960,16 +1981,15 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) + const ZSTD_longOffset_e isLongOffset) { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 if (ZSTD_DCtx_get_bmi2(dctx)) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -2051,20 +2071,20 @@ static size_t ZSTD_maxShortOffset(void) size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame, const streaming_operation streaming) + const void* src, size_t srcSize, const streaming_operation streaming) { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; - DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize); /* Note : the wording of the specification - * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX. + * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx). * This generally does not happen, as it makes little sense, * since an uncompressed block would feature same size and have no decompression cost. * Also, note that decoder from reference libzstd before < v1.5.4 * would consider this edge case as an error. - * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX + * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx) * for broader compatibility with the deployed ecosystem of zstd decoders */ - RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, ""); /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); @@ -2079,8 +2099,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, /* Compute the maximum block size, which must also work when !frame and fParams are unset. * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. */ - size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX)); - size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart); + size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx)); + size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart); /* isLongOffset must be true if there are long offsets. * Offsets are long if they are larger than ZSTD_maxShortOffset(). * We don't expect that to be the case in 64-bit mode. @@ -2145,21 +2165,22 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, { #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); #endif } #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ if (dctx->litBufferLocation == ZSTD_split) - return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); else - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); #endif } } +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) { if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ @@ -2176,8 +2197,10 @@ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { size_t dSize; + dctx->isFrameDecompression = 0; ZSTD_checkContinuity(dctx, dst, dstCapacity); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming); + FORWARD_IF_ERROR(dSize, ""); dctx->previousDstEnd = (char*)dst + dSize; return dSize; } diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.h b/thirdparty/zstd/decompress/zstd_decompress_block.h index 9d1318882d..ab152404ba 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.h +++ b/thirdparty/zstd/decompress/zstd_decompress_block.h @@ -47,7 +47,7 @@ typedef enum { */ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame, const streaming_operation streaming); + const void* src, size_t srcSize, const streaming_operation streaming); /* ZSTD_buildFSETable() : * generate FSE decoding table for one symbol (ll, ml or off) diff --git a/thirdparty/zstd/decompress/zstd_decompress_internal.h b/thirdparty/zstd/decompress/zstd_decompress_internal.h index c2ec5d9fbe..83a7a0115f 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_internal.h +++ b/thirdparty/zstd/decompress/zstd_decompress_internal.h @@ -153,6 +153,7 @@ struct ZSTD_DCtx_s size_t litSize; size_t rleSize; size_t staticSize; + int isFrameDecompression; #if DYNAMIC_BMI2 != 0 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ #endif @@ -166,6 +167,7 @@ struct ZSTD_DCtx_s ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ int disableHufAsm; + int maxBlockSizeParam; /* streaming */ ZSTD_dStreamStage streamStage; |