diff options
Diffstat (limited to 'thirdparty/astcenc/astcenc_internal.h')
-rw-r--r-- | thirdparty/astcenc/astcenc_internal.h | 144 |
1 files changed, 97 insertions, 47 deletions
diff --git a/thirdparty/astcenc/astcenc_internal.h b/thirdparty/astcenc/astcenc_internal.h index 63bbf8af53..715028ac8f 100644 --- a/thirdparty/astcenc/astcenc_internal.h +++ b/thirdparty/astcenc/astcenc_internal.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2023 Arm Limited +// Copyright 2011-2024 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -29,6 +29,7 @@ #include <cstdio> #endif #include <cstdlib> +#include <limits> #include "astcenc.h" #include "astcenc_mathlib.h" @@ -79,7 +80,7 @@ static constexpr unsigned int BLOCK_MAX_PARTITIONS { 4 }; /** @brief The number of partitionings, per partition count, suported by the ASTC format. */ static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { 1024 }; -/** @brief The maximum number of weights used during partition selection for texel clustering. */ +/** @brief The maximum number of texels used during partition selection for texel clustering. */ static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { 64 }; /** @brief The maximum number of weights a block can support. */ @@ -119,11 +120,9 @@ static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 }; static constexpr float ERROR_CALC_DEFAULT { 1e30f }; /** - * @brief The minimum texel count for a block to use the one partition fast path. - * - * This setting skips 4x4 and 5x4 block sizes. + * @brief The minimum tuning setting threshold for the one partition fast path. */ -static constexpr unsigned int TUNE_MIN_TEXELS_MODE0_FASTPATH { 24 }; +static constexpr float TUNE_MIN_SEARCH_MODE0 { 0.85f }; /** * @brief The maximum number of candidate encodings tested for each encoding mode. @@ -137,7 +136,7 @@ static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 8 }; * * This can be dynamically reduced by the compression quality preset. */ -static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 32 }; +static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 8 }; /** * @brief The maximum quant level using full angular endpoint search method. @@ -386,7 +385,7 @@ struct decimation_info * @brief The bilinear contribution of the N weights that are interpolated for each texel. * Value is between 0 and 1, stored transposed to improve vectorization. */ - alignas(ASTCENC_VECALIGN) float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS]; /** @brief The number of texels that each stored weight contributes to. */ uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS]; @@ -401,7 +400,7 @@ struct decimation_info * @brief The bilinear contribution to the N texels that use each weight. * Value is between 0 and 1, stored transposed to improve vectorization. */ - alignas(ASTCENC_VECALIGN) float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS]; + ASTCENC_ALIGNAS float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS]; /** * @brief The bilinear contribution to the Nth texel that uses each weight. @@ -581,7 +580,7 @@ struct block_size_descriptor decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES]; /** @brief The active decimation tables, stored in low indices. */ - alignas(ASTCENC_VECALIGN) decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES]; + ASTCENC_ALIGNAS decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES]; /** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */ uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES]; @@ -741,16 +740,16 @@ struct block_size_descriptor struct image_block { /** @brief The input (compress) or output (decompress) data for the red color component. */ - alignas(ASTCENC_VECALIGN) float data_r[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS]; /** @brief The input (compress) or output (decompress) data for the green color component. */ - alignas(ASTCENC_VECALIGN) float data_g[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS]; /** @brief The input (compress) or output (decompress) data for the blue color component. */ - alignas(ASTCENC_VECALIGN) float data_b[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS]; /** @brief The input (compress) or output (decompress) data for the alpha color component. */ - alignas(ASTCENC_VECALIGN) float data_a[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS]; /** @brief The number of texels in the block. */ uint8_t texel_count; @@ -773,6 +772,9 @@ struct image_block /** @brief Is this grayscale block where R == G == B for all texels? */ bool grayscale; + /** @brief Is the eventual decode using decode_unorm8 rounding? */ + bool decode_unorm8; + /** @brief Set to 1 if a texel is using HDR RGB endpoints (decompression only). */ uint8_t rgb_lns[BLOCK_MAX_TEXELS]; @@ -899,10 +901,10 @@ struct endpoints_and_weights endpoints ep; /** @brief The ideal weight for each texel; may be undecimated or decimated. */ - alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float weights[BLOCK_MAX_TEXELS]; /** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */ - alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float weight_error_scale[BLOCK_MAX_TEXELS]; }; /** @@ -932,7 +934,7 @@ struct encoding_choice_errors /** * @brief Preallocated working buffers, allocated per thread during context creation. */ -struct alignas(ASTCENC_VECALIGN) compression_working_buffers +struct ASTCENC_ALIGNAS compression_working_buffers { /** @brief Ideal endpoints and weights for plane 1. */ endpoints_and_weights ei1; @@ -948,7 +950,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers * * For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets. */ - alignas(ASTCENC_VECALIGN) float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS]; + ASTCENC_ALIGNAS float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS]; /** * @brief Decimated quantized weight values in the unquantized 0-64 range. @@ -958,7 +960,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS]; /** @brief Error of the best encoding combination for each block mode. */ - alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES]; + ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES]; /** @brief The best color quant for each block mode. */ uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES]; @@ -1025,13 +1027,13 @@ struct dt_init_working_buffers struct quant_and_transfer_table { /** @brief The unscrambled unquantized value. */ - int8_t quant_to_unquant[32]; + uint8_t quant_to_unquant[32]; /** @brief The scrambling order: scrambled_quant = map[unscrambled_quant]. */ - int8_t scramble_map[32]; + uint8_t scramble_map[32]; /** @brief The unscrambling order: unscrambled_unquant = map[scrambled_quant]. */ - int8_t unscramble_and_unquant_map[32]; + uint8_t unscramble_and_unquant_map[32]; /** * @brief A table of previous-and-next weights, indexed by the current unquantized value. @@ -1060,7 +1062,7 @@ static constexpr uint8_t SYM_BTYPE_NONCONST { 3 }; * @brief A symbolic representation of a compressed block. * * The symbolic representation stores the unpacked content of a single - * @c physical_compressed_block, in a form which is much easier to access for + * physical compressed block, in a form which is much easier to access for * the rest of the compressor code. */ struct symbolic_compressed_block @@ -1123,18 +1125,6 @@ struct symbolic_compressed_block }; /** - * @brief A physical representation of a compressed block. - * - * The physical representation stores the raw bytes of the format in memory. - */ -struct physical_compressed_block -{ - /** @brief The ASTC encoded data for a single block. */ - uint8_t data[16]; -}; - - -/** * @brief Parameter structure for @c compute_pixel_region_variance(). * * This function takes a structure to avoid spilling arguments to the stack on every function @@ -1578,6 +1568,33 @@ unsigned int find_best_partition_candidates( ============================================================================ */ /** + * @brief Get a vector mask indicating lanes decompressing into a UNORM8 value. + * + * @param decode_mode The color profile for LDR_SRGB settings. + * @param blk The image block for output image bitness settings. + * + * @return The component mask vector. + */ +static inline vmask4 get_u8_component_mask( + astcenc_profile decode_mode, + const image_block& blk +) { + vmask4 u8_mask(false); + // Decode mode writing to a unorm8 output value + if (blk.decode_unorm8) + { + u8_mask = vmask4(true); + } + // SRGB writing to a unorm8 RGB value + else if (decode_mode == ASTCENC_PRF_LDR_SRGB) + { + u8_mask = vmask4(true, true, true, false); + } + + return u8_mask; +} + +/** * @brief Setup computation of regional averages in an image. * * This must be done by only a single thread per image, before any thread calls @@ -1830,7 +1847,7 @@ uint8_t pack_color_endpoints( * * Endpoints must be unscrambled and converted into the 0-255 range before calling this functions. * - * @param decode_mode The decode mode (LDR, HDR). + * @param decode_mode The decode mode (LDR, HDR, etc). * @param format The color endpoint mode used. * @param input The raw array of encoded input integers. The length of this array * depends on @c format; it can be safely assumed to be large enough. @@ -1849,6 +1866,34 @@ void unpack_color_endpoints( vint4& output1); /** + * @brief Unpack an LDR RGBA color that uses delta encoding. + * + * @param input0 The packed endpoint 0 color. + * @param input1 The packed endpoint 1 color deltas. + * @param[out] output0 The unpacked endpoint 0 color. + * @param[out] output1 The unpacked endpoint 1 color. + */ +void rgba_delta_unpack( + vint4 input0, + vint4 input1, + vint4& output0, + vint4& output1); + +/** + * @brief Unpack an LDR RGBA color that uses direct encoding. + * + * @param input0 The packed endpoint 0 color. + * @param input1 The packed endpoint 1 color. + * @param[out] output0 The unpacked endpoint 0 color. + * @param[out] output1 The unpacked endpoint 1 color. + */ +void rgba_unpack( + vint4 input0, + vint4 input1, + vint4& output0, + vint4& output1); + +/** * @brief Unpack a set of quantized and decimated weights. * * TODO: Can we skip this for non-decimated weights now that the @c scb is @@ -2007,7 +2052,7 @@ void compute_angular_endpoints_2planes( void compress_block( const astcenc_contexti& ctx, const image_block& blk, - physical_compressed_block& pcb, + uint8_t pcb[16], compression_working_buffers& tmpbuf); /** @@ -2100,12 +2145,12 @@ float compute_symbolic_block_difference_1plane_1partition( * * @param bsd The block size information. * @param scb The symbolic representation. - * @param[out] pcb The binary encoded data. + * @param[out] pcb The physical compressed block output. */ void symbolic_to_physical( const block_size_descriptor& bsd, const symbolic_compressed_block& scb, - physical_compressed_block& pcb); + uint8_t pcb[16]); /** * @brief Convert a binary physical encoding into a symbolic representation. @@ -2114,12 +2159,12 @@ void symbolic_to_physical( * flagged as an error block if the encoding is invalid. * * @param bsd The block size information. - * @param pcb The binary encoded data. + * @param pcb The physical compresesd block input. * @param[out] scb The output symbolic representation. */ void physical_to_symbolic( const block_size_descriptor& bsd, - const physical_compressed_block& pcb, + const uint8_t pcb[16], symbolic_compressed_block& scb); /* ============================================================================ @@ -2128,10 +2173,11 @@ Platform-specific functions. /** * @brief Allocate an aligned memory buffer. * - * Allocated memory must be freed by aligned_free; + * Allocated memory must be freed by aligned_free. * * @param size The desired buffer size. - * @param align The desired buffer alignment; must be 2^N. + * @param align The desired buffer alignment; must be 2^N, may be increased + * by the implementation to a minimum allowable alignment. * * @return The memory buffer pointer or nullptr on allocation failure. */ @@ -2141,10 +2187,14 @@ T* aligned_malloc(size_t size, size_t align) void* ptr; int error = 0; + // Don't allow this to under-align a type + size_t min_align = astc::max(alignof(T), sizeof(void*)); + size_t real_align = astc::max(min_align, align); + #if defined(_WIN32) - ptr = _aligned_malloc(size, align); + ptr = _aligned_malloc(size, real_align); #else - error = posix_memalign(&ptr, align, size); + error = posix_memalign(&ptr, real_align, size); #endif if (error || (!ptr)) @@ -2164,9 +2214,9 @@ template<typename T> void aligned_free(T* ptr) { #if defined(_WIN32) - _aligned_free(reinterpret_cast<void*>(ptr)); + _aligned_free(ptr); #else - free(reinterpret_cast<void*>(ptr)); + free(ptr); #endif } |