summaryrefslogtreecommitdiffstats
path: root/thirdparty/astcenc/astcenc_entry.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/astcenc/astcenc_entry.cpp')
-rw-r--r--thirdparty/astcenc/astcenc_entry.cpp109
1 files changed, 65 insertions, 44 deletions
diff --git a/thirdparty/astcenc/astcenc_entry.cpp b/thirdparty/astcenc/astcenc_entry.cpp
index e53762c26a..71efe9cec4 100644
--- a/thirdparty/astcenc/astcenc_entry.cpp
+++ b/thirdparty/astcenc/astcenc_entry.cpp
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
-// Copyright 2011-2023 Arm Limited
+// Copyright 2011-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
@@ -55,6 +55,7 @@ struct astcenc_preset_config
float tune_2partition_early_out_limit_factor;
float tune_3partition_early_out_limit_factor;
float tune_2plane_early_out_limit_correlation;
+ float tune_search_mode0_enable;
};
/**
@@ -63,22 +64,22 @@ struct astcenc_preset_config
static const std::array<astcenc_preset_config, 6> preset_configs_high {{
{
ASTCENC_PRE_FASTEST,
- 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
+ 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f
}, {
ASTCENC_PRE_FAST,
- 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f
+ 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f
}, {
ASTCENC_PRE_MEDIUM,
- 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f
+ 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f
}, {
ASTCENC_PRE_THOROUGH,
- 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f
+ 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f
}, {
ASTCENC_PRE_VERYTHOROUGH,
- 4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
+ 4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
}, {
ASTCENC_PRE_EXHAUSTIVE,
- 4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
+ 4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
}
}};
@@ -88,22 +89,22 @@ static const std::array<astcenc_preset_config, 6> preset_configs_high {{
static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
{
ASTCENC_PRE_FASTEST,
- 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f
+ 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
}, {
ASTCENC_PRE_FAST,
- 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
+ 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
}, {
ASTCENC_PRE_MEDIUM,
- 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f
+ 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f
}, {
ASTCENC_PRE_THOROUGH,
- 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f
+ 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f
}, {
ASTCENC_PRE_VERYTHOROUGH,
- 4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
+ 4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
}, {
ASTCENC_PRE_EXHAUSTIVE,
- 4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
+ 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
}
}};
@@ -113,22 +114,22 @@ static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
static const std::array<astcenc_preset_config, 6> preset_configs_low {{
{
ASTCENC_PRE_FASTEST,
- 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f
+ 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
}, {
ASTCENC_PRE_FAST,
- 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f
+ 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
}, {
ASTCENC_PRE_MEDIUM,
- 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f
+ 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f
}, {
ASTCENC_PRE_THOROUGH,
- 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f
+ 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f
}, {
ASTCENC_PRE_VERYTHOROUGH,
- 4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
+ 4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f
}, {
ASTCENC_PRE_EXHAUSTIVE,
- 4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
+ 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f
}
}};
@@ -216,11 +217,13 @@ static astcenc_error validate_block_size(
/**
* @brief Validate flags.
*
- * @param flags The flags to check.
+ * @param profile The profile to check.
+ * @param flags The flags to check.
*
* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
*/
static astcenc_error validate_flags(
+ astcenc_profile profile,
unsigned int flags
) {
// Flags field must not contain any unknown flag bits
@@ -238,6 +241,14 @@ static astcenc_error validate_flags(
return ASTCENC_ERR_BAD_FLAGS;
}
+ // Decode_unorm8 must only be used with an LDR profile
+ bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8;
+ bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A);
+ if (is_unorm8 && is_hdr)
+ {
+ return ASTCENC_ERR_BAD_DECODE_MODE;
+ }
+
return ASTCENC_SUCCESS;
}
@@ -363,7 +374,7 @@ static astcenc_error validate_config(
return status;
}
- status = validate_flags(config.flags);
+ status = validate_flags(config.profile, config.flags);
if (status != ASTCENC_SUCCESS)
{
return status;
@@ -504,10 +515,10 @@ astcenc_error astcenc_config_init(
config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
- config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit, TUNE_MAX_TRIAL_CANDIDATES);
- config.tune_2partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_2partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
- config.tune_3partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_3partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
- config.tune_4partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_4partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
+ config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit;
+ config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit;
+ config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit;
+ config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit;
config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
(*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
@@ -516,6 +527,7 @@ astcenc_error astcenc_config_init(
config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
+ config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable;
}
// Start and end node are not the same - so interpolate between them
else
@@ -542,14 +554,10 @@ astcenc_error astcenc_config_init(
config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
config.tune_refinement_limit = LERPI(tune_refinement_limit);
- config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit),
- TUNE_MAX_TRIAL_CANDIDATES);
- config.tune_2partitioning_candidate_limit = astc::min(LERPUI(tune_2partitioning_candidate_limit),
- BLOCK_MAX_PARTITIONINGS);
- config.tune_3partitioning_candidate_limit = astc::min(LERPUI(tune_3partitioning_candidate_limit),
- BLOCK_MAX_PARTITIONINGS);
- config.tune_4partitioning_candidate_limit = astc::min(LERPUI(tune_4partitioning_candidate_limit),
- BLOCK_MAX_PARTITIONINGS);
+ config.tune_candidate_limit = LERPUI(tune_candidate_limit);
+ config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit);
+ config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit);
+ config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit);
config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
LERP(tune_db_limit_b_base) - 19 * ltexels);
@@ -558,6 +566,7 @@ astcenc_error astcenc_config_init(
config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
+ config.tune_search_mode0_enable = LERP(tune_search_mode0_enable);
#undef LERP
#undef LERPI
#undef LERPUI
@@ -585,13 +594,14 @@ astcenc_error astcenc_config_init(
case ASTCENC_PRF_HDR_RGB_LDR_A:
case ASTCENC_PRF_HDR:
config.tune_db_limit = 999.0f;
+ config.tune_search_mode0_enable = 0.0f;
break;
default:
return ASTCENC_ERR_BAD_PROFILE;
}
// Flags field must not contain any unknown flag bits
- status = validate_flags(flags);
+ status = validate_flags(profile, flags);
if (status != ASTCENC_SUCCESS)
{
return status;
@@ -689,6 +699,12 @@ astcenc_error astcenc_context_alloc(
}
ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
+ if (!ctx->bsd)
+ {
+ delete ctxo;
+ return ASTCENC_ERR_OUT_OF_MEM;
+ }
+
bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
can_omit_modes,
@@ -698,7 +714,7 @@ astcenc_error astcenc_context_alloc(
#if !defined(ASTCENC_DECOMPRESS_ONLY)
// Do setup only needed by compression
- if (!(status & ASTCENC_FLG_DECOMPRESS_ONLY))
+ if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
{
// Turn a dB limit into a per-texel error for faster use later
if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
@@ -712,7 +728,7 @@ astcenc_error astcenc_context_alloc(
size_t worksize = sizeof(compression_working_buffers) * thread_count;
ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
- static_assert((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0,
+ static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0),
"compression_working_buffers size must be multiple of vector alignment");
if (!ctx->working_buffers)
{
@@ -802,6 +818,8 @@ static void compress_image(
int row_blocks = xblocks;
int plane_blocks = xblocks * yblocks;
+ blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8;
+
// Populate the block channel weights
blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
ctx.config.cw_g_weight,
@@ -812,7 +830,7 @@ static void compress_image(
auto& temp_buffers = ctx.working_buffers[thread_index];
// Only the first thread actually runs the initializer
- ctxo.manage_compress.init(block_count);
+ ctxo.manage_compress.init(block_count, ctx.config.progress_callback);
// Determine if we can use an optimized load function
bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
@@ -914,8 +932,7 @@ static void compress_image(
int offset = ((z * yblocks + y) * xblocks + x) * 16;
uint8_t *bp = buffer + offset;
- physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp);
- compress_block(ctx, blk, *pcb, temp_buffers);
+ compress_block(ctx, blk, bp, temp_buffers);
}
ctxo.manage_compress.complete_task_assignment(count);
@@ -1138,6 +1155,7 @@ astcenc_error astcenc_decompress_image(
unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
+ unsigned int block_count = zblocks * yblocks * xblocks;
int row_blocks = xblocks;
int plane_blocks = xblocks * yblocks;
@@ -1152,6 +1170,9 @@ astcenc_error astcenc_decompress_image(
image_block blk;
blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
+ // Decode mode inferred from the output data type
+ blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8;
+
// If context thread count is one then implicitly reset
if (ctx->thread_count == 1)
{
@@ -1159,7 +1180,7 @@ astcenc_error astcenc_decompress_image(
}
// Only the first thread actually runs the initializer
- ctxo->manage_decompress.init(zblocks * yblocks * xblocks);
+ ctxo->manage_decompress.init(block_count, nullptr);
// All threads run this processing loop until there is no work remaining
while (true)
@@ -1182,10 +1203,9 @@ astcenc_error astcenc_decompress_image(
unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
const uint8_t* bp = data + offset;
- const physical_compressed_block& pcb = *reinterpret_cast<const physical_compressed_block*>(bp);
symbolic_compressed_block scb;
- physical_to_symbolic(*ctx->bsd, pcb, scb);
+ physical_to_symbolic(*ctx->bsd, bp, scb);
decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
x * block_x, y * block_y, z * block_z,
@@ -1224,9 +1244,8 @@ astcenc_error astcenc_get_block_info(
astcenc_contexti* ctx = &ctxo->context;
// Decode the compressed data into a symbolic form
- const physical_compressed_block&pcb = *reinterpret_cast<const physical_compressed_block*>(data);
symbolic_compressed_block scb;
- physical_to_symbolic(*ctx->bsd, pcb, scb);
+ physical_to_symbolic(*ctx->bsd, data, scb);
// Fetch the appropriate partition and decimation tables
block_size_descriptor& bsd = *ctx->bsd;
@@ -1359,6 +1378,8 @@ const char* astcenc_get_error_string(
return "ASTCENC_ERR_BAD_CONTEXT";
case ASTCENC_ERR_NOT_IMPLEMENTED:
return "ASTCENC_ERR_NOT_IMPLEMENTED";
+ case ASTCENC_ERR_BAD_DECODE_MODE:
+ return "ASTCENC_ERR_BAD_DECODE_MODE";
#if defined(ASTCENC_DIAGNOSTICS)
case ASTCENC_ERR_DTRACE_FAILURE:
return "ASTCENC_ERR_DTRACE_FAILURE";