diff options
Diffstat (limited to 'thirdparty/astcenc/astcenc_color_quantize.cpp')
-rw-r--r-- | thirdparty/astcenc/astcenc_color_quantize.cpp | 668 |
1 files changed, 379 insertions, 289 deletions
diff --git a/thirdparty/astcenc/astcenc_color_quantize.cpp b/thirdparty/astcenc/astcenc_color_quantize.cpp index b0fec7a74c..df17cac3c7 100644 --- a/thirdparty/astcenc/astcenc_color_quantize.cpp +++ b/thirdparty/astcenc/astcenc_color_quantize.cpp @@ -41,6 +41,27 @@ #include "astcenc_internal.h" /** + * @brief Compute the error of an LDR RGB or RGBA encoding. + * + * @param uquant0 The original endpoint 0 color. + * @param uquant1 The original endpoint 1 color. + * @param quant0 The unpacked quantized endpoint 0 color. + * @param quant1 The unpacked quantized endpoint 1 color. + * + * @return The MSE of the encoding. + */ +static float get_rgba_encoding_error( + vfloat4 uquant0, + vfloat4 uquant1, + vint4 quant0, + vint4 quant1 +) { + vfloat4 error0 = uquant0 - int_to_float(quant0); + vfloat4 error1 = uquant1 - int_to_float(quant1); + return hadd_s(error0 * error0 + error1 * error1); +} + +/** * @brief Determine the quantized value given a quantization level. * * @param quant_level The quantization level to use. @@ -57,6 +78,26 @@ static inline uint8_t quant_color( } /** + * @brief Determine the quantized value given a quantization level. + * + * @param quant_level The quantization level to use. + * @param value The value to convert. This must be in the 0-255 range. + * + * @return The unpacked quantized value, returned in 0-255 range. + */ +static inline vint4 quant_color3( + quant_method quant_level, + vint4 value +) { + vint4 index = value * 2 + 1; + return vint4( + color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()], + color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()], + color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()], + 0); +} + +/** * @brief Determine the quantized value given a quantization level and residual. * * @param quant_level The quantization level to use. @@ -84,6 +125,35 @@ static inline uint8_t quant_color( } /** + * @brief Determine the quantized value given a quantization level and residual. + * + * @param quant_level The quantization level to use. + * @param value The value to convert. This must be in the 0-255 range. + * @param valuef The original value before rounding, used to compute a residual. + * + * @return The unpacked quantized value, returned in 0-255 range. + */ +static inline vint4 quant_color3( + quant_method quant_level, + vint4 value, + vfloat4 valuef +) { + vint4 index = value * 2; + + // Compute the residual to determine if we should round down or up ties. + // Test should be residual >= 0, but empirical testing shows small bias helps. + vfloat4 residual = valuef - int_to_float(value); + vmask4 mask = residual >= vfloat4(-0.1f); + index = select(index, index + 1, mask); + + return vint4( + color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()], + color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()], + color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()], + 0); +} + +/** * @brief Quantize an LDR RGB color. * * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result. @@ -92,47 +162,33 @@ static inline uint8_t quant_color( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (r0, r1, g0, g1, b0, b1). + * @param[out] color0_out The output quantized color0 endpoint. + * @param[out] color1_out The output quantized color1 endpoint. * @param quant_level The quantization level to use. */ static void quantize_rgb( vfloat4 color0, vfloat4 color1, - uint8_t output[6], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - float r0 = astc::clamp255f(color0.lane<0>() * scale); - float g0 = astc::clamp255f(color0.lane<1>() * scale); - float b0 = astc::clamp255f(color0.lane<2>() * scale); + vint4 color0i, color1i; + vfloat4 nudge(0.2f); - float r1 = astc::clamp255f(color1.lane<0>() * scale); - float g1 = astc::clamp255f(color1.lane<1>() * scale); - float b1 = astc::clamp255f(color1.lane<2>() * scale); - - int ri0, gi0, bi0, ri1, gi1, bi1; - float rgb0_addon = 0.0f; - float rgb1_addon = 0.0f; do { - ri0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(r0 + rgb0_addon), 0), r0 + rgb0_addon); - gi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(g0 + rgb0_addon), 0), g0 + rgb0_addon); - bi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(b0 + rgb0_addon), 0), b0 + rgb0_addon); - ri1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(r1 + rgb1_addon), 255), r1 + rgb1_addon); - gi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(g1 + rgb1_addon), 255), g1 + rgb1_addon); - bi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(b1 + rgb1_addon), 255), b1 + rgb1_addon); - - rgb0_addon -= 0.2f; - rgb1_addon += 0.2f; - } while (ri0 + gi0 + bi0 > ri1 + gi1 + bi1); - - output[0] = static_cast<uint8_t>(ri0); - output[1] = static_cast<uint8_t>(ri1); - output[2] = static_cast<uint8_t>(gi0); - output[3] = static_cast<uint8_t>(gi1); - output[4] = static_cast<uint8_t>(bi0); - output[5] = static_cast<uint8_t>(bi1); + vint4 color0q = max(float_to_int_rtn(color0), vint4(0)); + color0i = quant_color3(quant_level, color0q, color0); + color0 = color0 - nudge; + + vint4 color1q = min(float_to_int_rtn(color1), vint4(255)); + color1i = quant_color3(quant_level, color1q, color1); + color1 = color1 + nudge; + } while (hadd_rgb_s(color0i) > hadd_rgb_s(color1i)); + + color0_out = color0i; + color1_out = color1i; } /** @@ -145,24 +201,24 @@ static void quantize_rgb( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (r0, r1, g0, g1, b0, b1, a0, a1). + * @param[out] color0_out The output quantized color0 endpoint. + * @param[out] color1_out The output quantized color1 endpoint. * @param quant_level The quantization level to use. */ static void quantize_rgba( vfloat4 color0, vfloat4 color1, - uint8_t output[8], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - float a0 = astc::clamp255f(color0.lane<3>() * scale); - float a1 = astc::clamp255f(color1.lane<3>() * scale); + quantize_rgb(color0, color1, color0_out, color1_out, quant_level); - output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0); - output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1); + float a0 = color0.lane<3>(); + float a1 = color1.lane<3>(); - quantize_rgb(color0, color1, output, quant_level); + color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0)); + color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1)); } /** @@ -172,7 +228,8 @@ static void quantize_rgba( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (r1, r0, g1, g0, b1, b0). + * @param[out] color0_out The output quantized color0 endpoint. + * @param[out] color1_out The output quantized color1 endpoint. * @param quant_level The quantization level to use. * * @return Returns @c false on failure, @c true on success. @@ -180,54 +237,35 @@ static void quantize_rgba( static bool try_quantize_rgb_blue_contract( vfloat4 color0, vfloat4 color1, - uint8_t output[6], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - float r0 = color0.lane<0>() * scale; - float g0 = color0.lane<1>() * scale; - float b0 = color0.lane<2>() * scale; - - float r1 = color1.lane<0>() * scale; - float g1 = color1.lane<1>() * scale; - float b1 = color1.lane<2>() * scale; - - // Apply inverse blue-contraction. This can produce an overflow; which means BC cannot be used. - r0 += (r0 - b0); - g0 += (g0 - b0); - r1 += (r1 - b1); - g1 += (g1 - b1); - - if (r0 < 0.0f || r0 > 255.0f || g0 < 0.0f || g0 > 255.0f || b0 < 0.0f || b0 > 255.0f || - r1 < 0.0f || r1 > 255.0f || g1 < 0.0f || g1 > 255.0f || b1 < 0.0f || b1 > 255.0f) + // Apply inverse blue-contraction + color0 += color0 - color0.swz<2, 2, 2, 3>(); + color1 += color1 - color1.swz<2, 2, 2, 3>(); + + // If anything overflows BC cannot be used + vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f)); + vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f)); + if (any(color0_error | color1_error)) { return false; } - // Quantize the inverse-blue-contracted color - int ri0 = quant_color(quant_level, astc::flt2int_rtn(r0), r0); - int gi0 = quant_color(quant_level, astc::flt2int_rtn(g0), g0); - int bi0 = quant_color(quant_level, astc::flt2int_rtn(b0), b0); - - int ri1 = quant_color(quant_level, astc::flt2int_rtn(r1), r1); - int gi1 = quant_color(quant_level, astc::flt2int_rtn(g1), g1); - int bi1 = quant_color(quant_level, astc::flt2int_rtn(b1), b1); + // Quantize the inverse blue-contracted color + vint4 color0i = quant_color3(quant_level, float_to_int_rtn(color0), color0); + vint4 color1i = quant_color3(quant_level, float_to_int_rtn(color1), color1); - // If color #1 is not larger than color #0 then blue-contraction cannot be used. Note that - // blue-contraction and quantization change this order, which is why we must test afterwards. - if (ri1 + gi1 + bi1 <= ri0 + gi0 + bi0) + // If color #1 is not larger than color #0 then blue-contraction cannot be used + // We must test afterwards because quantization can change the order + if (hadd_rgb_s(color1i) <= hadd_rgb_s(color0i)) { return false; } - output[0] = static_cast<uint8_t>(ri1); - output[1] = static_cast<uint8_t>(ri0); - output[2] = static_cast<uint8_t>(gi1); - output[3] = static_cast<uint8_t>(gi0); - output[4] = static_cast<uint8_t>(bi1); - output[5] = static_cast<uint8_t>(bi0); - + color0_out = color1i; + color1_out = color0i; return true; } @@ -238,7 +276,8 @@ static bool try_quantize_rgb_blue_contract( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (r1, r0, g1, g0, b1, b0, a1, a0). + * @param[out] color0_out The output quantized color0 endpoint. + * @param[out] color1_out The output quantized color1 endpoint. * @param quant_level The quantization level to use. * * @return Returns @c false on failure, @c true on success. @@ -246,18 +285,22 @@ static bool try_quantize_rgb_blue_contract( static bool try_quantize_rgba_blue_contract( vfloat4 color0, vfloat4 color1, - uint8_t output[8], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { - float scale = 1.0f / 257.0f; + if (try_quantize_rgb_blue_contract(color0, color1, color0_out, color1_out, quant_level)) + { + float a0 = color0.lane<3>(); + float a1 = color1.lane<3>(); - float a0 = astc::clamp255f(color0.lane<3>() * scale); - float a1 = astc::clamp255f(color1.lane<3>() * scale); + color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1)); + color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0)); - output[6] = quant_color(quant_level, astc::flt2int_rtn(a1), a1); - output[7] = quant_color(quant_level, astc::flt2int_rtn(a0), a0); + return true; + } - return try_quantize_rgb_blue_contract(color0, color1, output, quant_level); + return false; } /** @@ -269,7 +312,8 @@ static bool try_quantize_rgba_blue_contract( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (r0, r1, g0, g1, b0, b1). + * @param[out] color0_out The output quantized color0 endpoint. + * @param[out] color1_out The output quantized color1 endpoint. * @param quant_level The quantization level to use. * * @return Returns @c false on failure, @c true on success. @@ -277,85 +321,54 @@ static bool try_quantize_rgba_blue_contract( static bool try_quantize_rgb_delta( vfloat4 color0, vfloat4 color1, - uint8_t output[6], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - float r0 = astc::clamp255f(color0.lane<0>() * scale); - float g0 = astc::clamp255f(color0.lane<1>() * scale); - float b0 = astc::clamp255f(color0.lane<2>() * scale); - - float r1 = astc::clamp255f(color1.lane<0>() * scale); - float g1 = astc::clamp255f(color1.lane<1>() * scale); - float b1 = astc::clamp255f(color1.lane<2>() * scale); - - // Transform r0 to unorm9 - int r0a = astc::flt2int_rtn(r0); - int g0a = astc::flt2int_rtn(g0); - int b0a = astc::flt2int_rtn(b0); - - r0a <<= 1; - g0a <<= 1; - b0a <<= 1; + // Transform color0 to unorm9 + vint4 color0a = float_to_int_rtn(color0); + color0.set_lane<3>(0.0f); + color0a = lsl<1>(color0a); // Mask off the top bit - int r0b = r0a & 0xFF; - int g0b = g0a & 0xFF; - int b0b = b0a & 0xFF; + vint4 color0b = color0a & 0xFF; // Quantize then unquantize in order to get a value that we take differences against - int r0be = quant_color(quant_level, r0b); - int g0be = quant_color(quant_level, g0b); - int b0be = quant_color(quant_level, b0b); - - r0b = r0be | (r0a & 0x100); - g0b = g0be | (g0a & 0x100); - b0b = b0be | (b0a & 0x100); + vint4 color0be = quant_color3(quant_level, color0b); + color0b = color0be | (color0a & 0x100); // Get hold of the second value - int r1d = astc::flt2int_rtn(r1); - int g1d = astc::flt2int_rtn(g1); - int b1d = astc::flt2int_rtn(b1); - - r1d <<= 1; - g1d <<= 1; - b1d <<= 1; + vint4 color1d = float_to_int_rtn(color1); + color1d = lsl<1>(color1d); // ... and take differences - r1d -= r0b; - g1d -= g0b; - b1d -= b0b; + color1d = color1d - color0b; + color1d.set_lane<3>(0); // Check if the difference is too large to be encodable - if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64) + if (any((color1d > vint4(63)) | (color1d < vint4(-64)))) { return false; } // Insert top bit of the base into the offset - r1d &= 0x7F; - g1d &= 0x7F; - b1d &= 0x7F; - - r1d |= (r0b & 0x100) >> 1; - g1d |= (g0b & 0x100) >> 1; - b1d |= (b0b & 0x100) >> 1; + color1d = color1d & 0x7F; + color1d = color1d | lsr<1>(color0b & 0x100); // Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails // since we have then corrupted either the top bit of the base or the sign bit of the offset - int r1de = quant_color(quant_level, r1d); - int g1de = quant_color(quant_level, g1d); - int b1de = quant_color(quant_level, b1d); + vint4 color1de = quant_color3(quant_level, color1d); - if (((r1d ^ r1de) | (g1d ^ g1de) | (b1d ^ b1de)) & 0xC0) + vint4 color_flips = (color1d ^ color1de) & 0xC0; + color_flips.set_lane<3>(0); + if (any(color_flips != vint4::zero())) { return false; } // If the sum of offsets triggers blue-contraction then encoding fails - vint4 ep0(r0be, g0be, b0be, 0); - vint4 ep1(r1de, g1de, b1de, 0); + vint4 ep0 = color0be; + vint4 ep1 = color1de; bit_transfer_signed(ep1, ep0); if (hadd_rgb_s(ep1) < 0) { @@ -369,111 +382,90 @@ static bool try_quantize_rgb_delta( return false; } - output[0] = static_cast<uint8_t>(r0be); - output[1] = static_cast<uint8_t>(r1de); - output[2] = static_cast<uint8_t>(g0be); - output[3] = static_cast<uint8_t>(g1de); - output[4] = static_cast<uint8_t>(b0be); - output[5] = static_cast<uint8_t>(b1de); - + color0_out = color0be; + color1_out = color1de; return true; } +/** + * @brief Try to quantize an LDR RGB color using delta encoding and blue-contraction. + * + * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB. + * + * @param color0 The input unquantized color0 endpoint. + * @param color1 The input unquantized color1 endpoint. + * @param[out] color0_out The output quantized color0 endpoint. + * @param[out] color1_out The output quantized color1 endpoint. + * @param quant_level The quantization level to use. + * + * @return Returns @c false on failure, @c true on success. + */ static bool try_quantize_rgb_delta_blue_contract( vfloat4 color0, vfloat4 color1, - uint8_t output[6], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { // Note: Switch around endpoint colors already at start - float scale = 1.0f / 257.0f; + std::swap(color0, color1); - float r1 = color0.lane<0>() * scale; - float g1 = color0.lane<1>() * scale; - float b1 = color0.lane<2>() * scale; + // Apply inverse blue-contraction + color0 += color0 - color0.swz<2, 2, 2, 3>(); + color1 += color1 - color1.swz<2, 2, 2, 3>(); - float r0 = color1.lane<0>() * scale; - float g0 = color1.lane<1>() * scale; - float b0 = color1.lane<2>() * scale; - - // Apply inverse blue-contraction. This can produce an overflow; which means BC cannot be used. - r0 += (r0 - b0); - g0 += (g0 - b0); - r1 += (r1 - b1); - g1 += (g1 - b1); - - if (r0 < 0.0f || r0 > 255.0f || g0 < 0.0f || g0 > 255.0f || b0 < 0.0f || b0 > 255.0f || - r1 < 0.0f || r1 > 255.0f || g1 < 0.0f || g1 > 255.0f || b1 < 0.0f || b1 > 255.0f) + // If anything overflows BC cannot be used + vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f)); + vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f)); + if (any(color0_error | color1_error)) { return false; } - // Transform r0 to unorm9 - int r0a = astc::flt2int_rtn(r0); - int g0a = astc::flt2int_rtn(g0); - int b0a = astc::flt2int_rtn(b0); - r0a <<= 1; - g0a <<= 1; - b0a <<= 1; + // Transform color0 to unorm9 + vint4 color0a = float_to_int_rtn(color0); + color0.set_lane<3>(0.0f); + color0a = lsl<1>(color0a); // Mask off the top bit - int r0b = r0a & 0xFF; - int g0b = g0a & 0xFF; - int b0b = b0a & 0xFF; - - // Quantize, then unquantize in order to get a value that we take differences against. - int r0be = quant_color(quant_level, r0b); - int g0be = quant_color(quant_level, g0b); - int b0be = quant_color(quant_level, b0b); + vint4 color0b = color0a & 0xFF; - r0b = r0be | (r0a & 0x100); - g0b = g0be | (g0a & 0x100); - b0b = b0be | (b0a & 0x100); + // Quantize then unquantize in order to get a value that we take differences against + vint4 color0be = quant_color3(quant_level, color0b); + color0b = color0be | (color0a & 0x100); // Get hold of the second value - int r1d = astc::flt2int_rtn(r1); - int g1d = astc::flt2int_rtn(g1); - int b1d = astc::flt2int_rtn(b1); - - r1d <<= 1; - g1d <<= 1; - b1d <<= 1; + vint4 color1d = float_to_int_rtn(color1); + color1d = lsl<1>(color1d); - // .. and take differences! - r1d -= r0b; - g1d -= g0b; - b1d -= b0b; + // ... and take differences + color1d = color1d - color0b; + color1d.set_lane<3>(0); // Check if the difference is too large to be encodable - if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64) + if (any((color1d > vint4(63)) | (color1d < vint4(-64)))) { return false; } // Insert top bit of the base into the offset - r1d &= 0x7F; - g1d &= 0x7F; - b1d &= 0x7F; - - r1d |= (r0b & 0x100) >> 1; - g1d |= (g0b & 0x100) >> 1; - b1d |= (b0b & 0x100) >> 1; - - // Then quantize and unquantize; if this causes any of the top two bits to flip, - // then encoding fails, since we have then corrupted either the top bit of the base - // or the sign bit of the offset. - int r1de = quant_color(quant_level, r1d); - int g1de = quant_color(quant_level, g1d); - int b1de = quant_color(quant_level, b1d); - - if (((r1d ^ r1de) | (g1d ^ g1de) | (b1d ^ b1de)) & 0xC0) + color1d = color1d & 0x7F; + color1d = color1d | lsr<1>(color0b & 0x100); + + // Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails + // since we have then corrupted either the top bit of the base or the sign bit of the offset + vint4 color1de = quant_color3(quant_level, color1d); + + vint4 color_flips = (color1d ^ color1de) & 0xC0; + color_flips.set_lane<3>(0); + if (any(color_flips != vint4::zero())) { return false; } // If the sum of offsets does not trigger blue-contraction then encoding fails - vint4 ep0(r0be, g0be, b0be, 0); - vint4 ep1(r1de, g1de, b1de, 0); + vint4 ep0 = color0be; + vint4 ep1 = color1de; bit_transfer_signed(ep1, ep0); if (hadd_rgb_s(ep1) >= 0) { @@ -487,13 +479,8 @@ static bool try_quantize_rgb_delta_blue_contract( return false; } - output[0] = static_cast<uint8_t>(r0be); - output[1] = static_cast<uint8_t>(r1de); - output[2] = static_cast<uint8_t>(g0be); - output[3] = static_cast<uint8_t>(g1de); - output[4] = static_cast<uint8_t>(b0be); - output[5] = static_cast<uint8_t>(b1de); - + color0_out = color0be; + color1_out = color1de; return true; } @@ -508,7 +495,8 @@ static bool try_quantize_rgb_delta_blue_contract( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (x, x, x, x, x, x, a0, a1). + * @param[out] color0_out The output quantized color0 endpoint; must preserve lane 0/1/2. + * @param[out] color1_out The output quantized color1 endpoint; must preserve lane 0/1/2. * @param quant_level The quantization level to use. * * @return Returns @c false on failure, @c true on success. @@ -516,13 +504,12 @@ static bool try_quantize_rgb_delta_blue_contract( static bool try_quantize_alpha_delta( vfloat4 color0, vfloat4 color1, - uint8_t output[8], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - float a0 = astc::clamp255f(color0.lane<3>() * scale); - float a1 = astc::clamp255f(color1.lane<3>() * scale); + float a0 = color0.lane<3>(); + float a1 = color1.lane<3>(); int a0a = astc::flt2int_rtn(a0); a0a <<= 1; @@ -561,8 +548,8 @@ static bool try_quantize_alpha_delta( return false; } - output[6] = static_cast<uint8_t>(a0be); - output[7] = static_cast<uint8_t>(a1de); + color0_out.set_lane<3>(a0be); + color1_out.set_lane<3>(a1de); return true; } @@ -589,13 +576,11 @@ static bool try_quantize_luminance_alpha_delta( uint8_t output[4], quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - float l0 = astc::clamp255f(hadd_rgb_s(color0) * ((1.0f / 3.0f) * scale)); - float l1 = astc::clamp255f(hadd_rgb_s(color1) * ((1.0f / 3.0f) * scale)); + float l0 = hadd_rgb_s(color0) * (1.0f / 3.0f); + float l1 = hadd_rgb_s(color1) * (1.0f / 3.0f); - float a0 = astc::clamp255f(color0.lane<3>() * scale); - float a1 = astc::clamp255f(color1.lane<3>() * scale); + float a0 = color0.lane<3>(); + float a1 = color1.lane<3>(); int l0a = astc::flt2int_rtn(l0); int a0a = astc::flt2int_rtn(a0); @@ -693,7 +678,8 @@ static bool try_quantize_luminance_alpha_delta( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (r0, r1, b0, b1, g0, g1, a0, a1). + * @param[out] color0_out The output quantized color0 endpoint + * @param[out] color1_out The output quantized color1 endpoint * @param quant_level The quantization level to use. * * @return Returns @c false on failure, @c true on success. @@ -701,14 +687,14 @@ static bool try_quantize_luminance_alpha_delta( static bool try_quantize_rgba_delta( vfloat4 color0, vfloat4 color1, - uint8_t output[8], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { - return try_quantize_rgb_delta(color0, color1, output, quant_level) && - try_quantize_alpha_delta(color0, color1, output, quant_level); + return try_quantize_rgb_delta(color0, color1, color0_out, color1_out, quant_level) && + try_quantize_alpha_delta(color0, color1, color0_out, color1_out, quant_level); } - /** * @brief Try to quantize an LDR RGBA color using delta and blue contract encoding. * @@ -720,7 +706,8 @@ static bool try_quantize_rgba_delta( * * @param color0 The input unquantized color0 endpoint. * @param color1 The input unquantized color1 endpoint. - * @param[out] output The output endpoints, returned as (r0, r1, b0, b1, g0, g1, a0, a1). + * @param[out] color0_out The output quantized color0 endpoint + * @param[out] color1_out The output quantized color1 endpoint * @param quant_level The quantization level to use. * * @return Returns @c false on failure, @c true on success. @@ -728,12 +715,13 @@ static bool try_quantize_rgba_delta( static bool try_quantize_rgba_delta_blue_contract( vfloat4 color0, vfloat4 color1, - uint8_t output[8], + vint4& color0_out, + vint4& color1_out, quant_method quant_level ) { // Note that we swap the color0 and color1 ordering for alpha to match RGB blue-contract - return try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level) && - try_quantize_alpha_delta(color1, color0, output, quant_level); + return try_quantize_rgb_delta_blue_contract(color0, color1, color0_out, color1_out, quant_level) && + try_quantize_alpha_delta(color1, color0, color0_out, color1_out, quant_level); } /** @@ -774,6 +762,8 @@ static void quantize_rgbs( /** * @brief Quantize an LDR RGBA color using scale encoding. * + * @param color0 The input unquantized color0 alpha endpoint. + * @param color1 The input unquantized color1 alpha endpoint. * @param color The input unquantized color endpoint and scale factor. * @param[out] output The output endpoints, returned as (r0, g0, b0, s, a0, a1). * @param quant_level The quantization level to use. @@ -785,10 +775,8 @@ static void quantize_rgbs_alpha( uint8_t output[6], quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - float a0 = astc::clamp255f(color0.lane<3>() * scale); - float a1 = astc::clamp255f(color1.lane<3>() * scale); + float a0 = color0.lane<3>(); + float a1 = color1.lane<3>(); output[4] = quant_color(quant_level, astc::flt2int_rtn(a0), a0); output[5] = quant_color(quant_level, astc::flt2int_rtn(a1), a1); @@ -810,13 +798,8 @@ static void quantize_luminance( uint8_t output[2], quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - color0 = color0 * scale; - color1 = color1 * scale; - - float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (1.0f / 3.0f)); - float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (1.0f / 3.0f)); + float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f); + float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f); if (lum0 > lum1) { @@ -843,16 +826,11 @@ static void quantize_luminance_alpha( uint8_t output[4], quant_method quant_level ) { - float scale = 1.0f / 257.0f; - - color0 = color0 * scale; - color1 = color1 * scale; - - float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (1.0f / 3.0f)); - float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (1.0f / 3.0f)); + float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f); + float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f); - float a0 = astc::clamp255f(color0.lane<3>()); - float a1 = astc::clamp255f(color1.lane<3>()); + float a0 = color0.lane<3>(); + float a1 = color1.lane<3>(); output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0); output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1); @@ -1939,58 +1917,170 @@ uint8_t pack_color_endpoints( ) { assert(QUANT_6 <= quant_level && quant_level <= QUANT_256); - // We do not support negative colors - color0 = max(color0, 0.0f); - color1 = max(color1, 0.0f); + // Clamp colors to a valid LDR range + // Note that HDR has a lower max, handled in the conversion functions + color0 = clamp(0.0f, 65535.0f, color0); + color1 = clamp(0.0f, 65535.0f, color1); + + // Pre-scale the LDR value we need to the 0-255 quantizable range + vfloat4 color0_ldr = color0 * (1.0f / 257.0f); + vfloat4 color1_ldr = color1 * (1.0f / 257.0f); uint8_t retval = 0; + float best_error = ERROR_CALC_DEFAULT; + vint4 color0_out, color1_out; + vint4 color0_out2, color1_out2; switch (format) { case FMT_RGB: if (quant_level <= QUANT_160) { - if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level)) + if (try_quantize_rgb_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level)) { + vint4 color0_unpack; + vint4 color1_unpack; + rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack); + retval = FMT_RGB_DELTA; - break; + best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); } - if (try_quantize_rgb_delta(color0, color1, output, quant_level)) + + if (try_quantize_rgb_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level)) { - retval = FMT_RGB_DELTA; - break; + vint4 color0_unpack; + vint4 color1_unpack; + rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack); + + float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); + if (error < best_error) + { + retval = FMT_RGB_DELTA; + best_error = error; + color0_out = color0_out2; + color1_out = color1_out2; + } } } - if (quant_level < QUANT_256 && try_quantize_rgb_blue_contract(color0, color1, output, quant_level)) + + if (quant_level < QUANT_256) { - retval = FMT_RGB; - break; + if (try_quantize_rgb_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level)) + { + vint4 color0_unpack; + vint4 color1_unpack; + rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack); + + float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); + if (error < best_error) + { + retval = FMT_RGB; + best_error = error; + color0_out = color0_out2; + color1_out = color1_out2; + } + } } - quantize_rgb(color0, color1, output, quant_level); - retval = FMT_RGB; + + { + quantize_rgb(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level); + + vint4 color0_unpack; + vint4 color1_unpack; + rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack); + + float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); + if (error < best_error) + { + retval = FMT_RGB; + color0_out = color0_out2; + color1_out = color1_out2; + } + } + + // TODO: Can we vectorize this? + output[0] = static_cast<uint8_t>(color0_out.lane<0>()); + output[1] = static_cast<uint8_t>(color1_out.lane<0>()); + output[2] = static_cast<uint8_t>(color0_out.lane<1>()); + output[3] = static_cast<uint8_t>(color1_out.lane<1>()); + output[4] = static_cast<uint8_t>(color0_out.lane<2>()); + output[5] = static_cast<uint8_t>(color1_out.lane<2>()); break; case FMT_RGBA: if (quant_level <= QUANT_160) { - if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quant_level)) + if (try_quantize_rgba_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level)) { + vint4 color0_unpack; + vint4 color1_unpack; + rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack); + retval = FMT_RGBA_DELTA; - break; + best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); } - if (try_quantize_rgba_delta(color0, color1, output, quant_level)) + + if (try_quantize_rgba_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level)) { - retval = FMT_RGBA_DELTA; - break; + vint4 color0_unpack; + vint4 color1_unpack; + rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack); + + float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); + if (error < best_error) + { + retval = FMT_RGBA_DELTA; + best_error = error; + color0_out = color0_out2; + color1_out = color1_out2; + } } } - if (quant_level < QUANT_256 && try_quantize_rgba_blue_contract(color0, color1, output, quant_level)) + + if (quant_level < QUANT_256) { - retval = FMT_RGBA; - break; + if (try_quantize_rgba_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level)) + { + vint4 color0_unpack; + vint4 color1_unpack; + rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack); + + float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); + if (error < best_error) + { + retval = FMT_RGBA; + best_error = error; + color0_out = color0_out2; + color1_out = color1_out2; + } + } } - quantize_rgba(color0, color1, output, quant_level); - retval = FMT_RGBA; + + { + quantize_rgba(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level); + + vint4 color0_unpack; + vint4 color1_unpack; + rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack); + + float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack); + if (error < best_error) + { + retval = FMT_RGBA; + color0_out = color0_out2; + color1_out = color1_out2; + } + } + + // TODO: Can we vectorize this? + output[0] = static_cast<uint8_t>(color0_out.lane<0>()); + output[1] = static_cast<uint8_t>(color1_out.lane<0>()); + output[2] = static_cast<uint8_t>(color0_out.lane<1>()); + output[3] = static_cast<uint8_t>(color1_out.lane<1>()); + output[4] = static_cast<uint8_t>(color0_out.lane<2>()); + output[5] = static_cast<uint8_t>(color1_out.lane<2>()); + output[6] = static_cast<uint8_t>(color0_out.lane<3>()); + output[7] = static_cast<uint8_t>(color1_out.lane<3>()); break; case FMT_RGB_SCALE: @@ -2009,7 +2099,7 @@ uint8_t pack_color_endpoints( break; case FMT_RGB_SCALE_ALPHA: - quantize_rgbs_alpha(color0, color1, rgbs_color, output, quant_level); + quantize_rgbs_alpha(color0_ldr, color1_ldr, rgbs_color, output, quant_level); retval = FMT_RGB_SCALE_ALPHA; break; @@ -2025,20 +2115,20 @@ uint8_t pack_color_endpoints( break; case FMT_LUMINANCE: - quantize_luminance(color0, color1, output, quant_level); + quantize_luminance(color0_ldr, color1_ldr, output, quant_level); retval = FMT_LUMINANCE; break; case FMT_LUMINANCE_ALPHA: if (quant_level <= 18) { - if (try_quantize_luminance_alpha_delta(color0, color1, output, quant_level)) + if (try_quantize_luminance_alpha_delta(color0_ldr, color1_ldr, output, quant_level)) { retval = FMT_LUMINANCE_ALPHA_DELTA; break; } } - quantize_luminance_alpha(color0, color1, output, quant_level); + quantize_luminance_alpha(color0_ldr, color1_ldr, output, quant_level); retval = FMT_LUMINANCE_ALPHA; break; |