summaryrefslogtreecommitdiffstats
path: root/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/basis_universal/transcoder/basisu_transcoder.cpp')
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder.cpp2057
1 files changed, 2024 insertions, 33 deletions
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
index c698861f3b..32018cd282 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
@@ -1,5 +1,5 @@
// basisu_transcoder.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -17,6 +17,11 @@
#include <limits.h>
#include "basisu_containers_impl.h"
+#define BASISU_ASTC_HELPERS_IMPLEMENTATION
+#include "basisu_astc_helpers.h"
+
+#include "basisu_astc_hdr_core.h"
+
#ifndef BASISD_IS_BIG_ENDIAN
// TODO: This doesn't work on OSX. How can this be so difficult?
//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
@@ -139,6 +144,10 @@
#endif
#endif
+#ifndef BASISD_SUPPORT_UASTC_HDR
+ #define BASISD_SUPPORT_UASTC_HDR 1
+#endif
+
#define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0
#define BASISD_WRITE_NEW_DXT1_TABLES 0
#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0
@@ -1908,17 +1917,24 @@ namespace basist
void basisu_transcoder_init()
{
if (g_transcoder_initialized)
- {
- BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
+ {
+ BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
return;
- }
+ }
- BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
+ BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
#if BASISD_SUPPORT_UASTC
uastc_init();
#endif
+#if BASISD_SUPPORT_UASTC_HDR
+ // TODO: Examine this, optimize for startup time/mem utilization.
+ astc_helpers::init_tables(false);
+
+ astc_hdr_core_init();
+#endif
+
#if BASISD_SUPPORT_ASTC
transcoder_init_astc();
#endif
@@ -2027,6 +2043,10 @@ namespace basist
transcoder_init_pvrtc2();
#endif
+#if BASISD_SUPPORT_UASTC_HDR
+ bc6h_enc_init();
+#endif
+
g_transcoder_initialized = true;
}
@@ -6928,7 +6948,7 @@ namespace basist
static inline int sq(int x) { return x * x; }
- // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0.
+ // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0.
// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha!
// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
@@ -7515,6 +7535,8 @@ namespace basist
}
#endif // BASISD_SUPPORT_PVRTC2
+ //------------------------------------------------------------------------------------------------
+
basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
m_pGlobal_codebook(nullptr),
m_selector_history_buf_size(0)
@@ -8620,7 +8642,7 @@ namespace basist
// Now make sure the output buffer is large enough, or we'll overwrite memory.
if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
return false;
}
}
@@ -8632,7 +8654,7 @@ namespace basist
if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
return false;
}
}
@@ -8640,7 +8662,7 @@ namespace basist
{
if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
return false;
}
}
@@ -9242,13 +9264,17 @@ namespace basist
return status;
}
+
+ //------------------------------------------------------------------------------------------------
basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
{
}
- bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
- uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+ bool basisu_lowlevel_uastc_transcoder::transcode_slice(
+ void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
+ const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
{
BASISU_NOTE_UNUSED(pState);
@@ -9784,6 +9810,317 @@ namespace basist
return status;
}
+
+ //------------------------------------------------------------------------------------------------
+
+ basisu_lowlevel_uastc_hdr_transcoder::basisu_lowlevel_uastc_hdr_transcoder()
+ {
+ }
+
+ bool basisu_lowlevel_uastc_hdr_transcoder::transcode_slice(
+ void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
+ const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+ basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
+ {
+ BASISU_NOTE_UNUSED(pState);
+ BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
+ BASISU_NOTE_UNUSED(has_alpha);
+ BASISU_NOTE_UNUSED(channel0);
+ BASISU_NOTE_UNUSED(channel1);
+ BASISU_NOTE_UNUSED(decode_flags);
+
+ assert(g_transcoder_initialized);
+ if (!g_transcoder_initialized)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder not globally initialized.\n");
+ return false;
+ }
+
+#if BASISD_SUPPORT_UASTC_HDR
+ const uint32_t total_blocks = num_blocks_x * num_blocks_y;
+
+ if (!output_row_pitch_in_blocks_or_pixels)
+ {
+ if (basis_block_format_is_uncompressed(fmt))
+ output_row_pitch_in_blocks_or_pixels = orig_width;
+ else
+ output_row_pitch_in_blocks_or_pixels = num_blocks_x;
+ }
+
+ if (basis_block_format_is_uncompressed(fmt))
+ {
+ if (!output_rows_in_pixels)
+ output_rows_in_pixels = orig_height;
+ }
+
+ uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
+ if (image_data_size < total_expected_block_bytes)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
+ return false;
+ }
+
+ const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
+
+ bool status = false;
+
+ // TODO: Optimize pure memcpy() case.
+
+ for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
+ {
+ void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
+
+ for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
+ {
+ switch (fmt)
+ {
+ case block_format::cUASTC_HDR_4x4:
+ case block_format::cASTC_HDR_4x4:
+ {
+ // Nothing to do, UASTC HDR is just ASTC.
+ memcpy(pDst_block, pSource_block, sizeof(uastc_block));
+ status = true;
+ break;
+ }
+ case block_format::cBC6H:
+ {
+ status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
+ break;
+ }
+ case block_format::cRGB_9E5:
+ {
+ astc_helpers::log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
+ static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
+ );
+
+ uint32_t blk_texels[4][4];
+
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
+
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels;
+ } // y
+ }
+ }
+
+ break;
+ }
+ case block_format::cRGBA_HALF:
+ {
+ astc_helpers::log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ half_float* pDst_pixels = reinterpret_cast<half_float*>(
+ static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
+ );
+
+ half_float blk_texels[4][4][4];
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ for (uint32_t x = 0; x < max_x; x++)
+ {
+ pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
+ pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
+ pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
+ pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
+ } // x
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
+ } // y
+ }
+ }
+
+ break;
+ }
+ case block_format::cRGB_HALF:
+ {
+ astc_helpers:: log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ half_float* pDst_pixels =
+ reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
+
+ half_float blk_texels[4][4][4];
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ for (uint32_t x = 0; x < max_x; x++)
+ {
+ pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
+ pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
+ pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
+ } // x
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
+ } // y
+ }
+ }
+
+ break;
+ }
+ default:
+ assert(0);
+ break;
+
+ }
+
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); return false;
+ }
+
+ } // block_x
+
+ } // block_y
+
+ return true;
+#else
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
+
+ BASISU_NOTE_UNUSED(decode_flags);
+ BASISU_NOTE_UNUSED(channel0);
+ BASISU_NOTE_UNUSED(channel1);
+ BASISU_NOTE_UNUSED(output_rows_in_pixels);
+ BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
+ BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
+ BASISU_NOTE_UNUSED(fmt);
+ BASISU_NOTE_UNUSED(image_data_size);
+ BASISU_NOTE_UNUSED(pImage_data);
+ BASISU_NOTE_UNUSED(num_blocks_x);
+ BASISU_NOTE_UNUSED(num_blocks_y);
+ BASISU_NOTE_UNUSED(pDst_blocks);
+
+ return false;
+#endif
+ }
+
+ bool basisu_lowlevel_uastc_hdr_transcoder::transcode_image(
+ transcoder_texture_format target_format,
+ void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+ const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+ uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+ uint32_t slice_offset, uint32_t slice_length,
+ uint32_t decode_flags,
+ bool has_alpha,
+ bool is_video,
+ uint32_t output_row_pitch_in_blocks_or_pixels,
+ basisu_transcoder_state* pState,
+ uint32_t output_rows_in_pixels,
+ int channel0, int channel1)
+ {
+ BASISU_NOTE_UNUSED(is_video);
+ BASISU_NOTE_UNUSED(level_index);
+ BASISU_NOTE_UNUSED(decode_flags);
+
+ if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: source data buffer too small\n");
+ return false;
+ }
+
+ const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
+ const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
+
+ if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: output buffer size too small\n");
+ return false;
+ }
+
+ bool status = false;
+
+ switch (target_format)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFBC6H:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGB_HALF:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGBA_HALF:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGB_9E5:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+ }
+ break;
+ }
+ default:
+ {
+ assert(0);
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: Invalid format\n");
+ break;
+ }
+ }
+
+ return status;
+ }
+
+ //------------------------------------------------------------------------------------------------
basisu_transcoder::basisu_transcoder() :
m_ready_to_transcode(false)
@@ -10390,7 +10727,7 @@ namespace basist
}
else
{
- // Nothing special to do for UASTC.
+ // Nothing special to do for UASTC/UASTC HDR.
if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
{
m_lowlevel_etc1s_decoder.clear();
@@ -10510,7 +10847,14 @@ namespace basist
return false;
}
- if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+ if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+ {
+ return m_lowlevel_uastc_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
+ pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
+ fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
+ output_rows_in_pixels, channel0, channel1, decode_flags);
+ }
+ else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
{
return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
@@ -10742,7 +11086,18 @@ namespace basist
memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
}
- if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+ if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+ {
+ const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
+
+ // Use the container independent image transcode method.
+ status = m_lowlevel_uastc_hdr_decoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
+ pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
+ decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ }
+ else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
{
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
@@ -10808,20 +11163,27 @@ namespace basist
return 8;
case transcoder_texture_format::cTFBC7_RGBA:
case transcoder_texture_format::cTFBC7_ALT:
+ case transcoder_texture_format::cTFBC6H:
case transcoder_texture_format::cTFETC2_RGBA:
case transcoder_texture_format::cTFBC3_RGBA:
case transcoder_texture_format::cTFBC5_RG:
case transcoder_texture_format::cTFASTC_4x4_RGBA:
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFFXT1_RGB:
case transcoder_texture_format::cTFETC2_EAC_RG11:
return 16;
case transcoder_texture_format::cTFRGBA32:
+ case transcoder_texture_format::cTFRGB_9E5:
return sizeof(uint32_t);
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
return sizeof(uint16_t);
+ case transcoder_texture_format::cTFRGB_HALF:
+ return sizeof(half_float) * 3;
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return sizeof(half_float) * 4;
default:
assert(0);
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10845,17 +11207,22 @@ namespace basist
case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_RGBA";
case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
case transcoder_texture_format::cTFRGBA32: return "RGBA32";
case transcoder_texture_format::cTFRGB565: return "RGB565";
case transcoder_texture_format::cTFBGR565: return "BGR565";
case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
+ case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
+ case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
+ case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
+ case transcoder_texture_format::cTFBC6H: return "BC6H";
default:
assert(0);
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10881,7 +11248,13 @@ namespace basist
case block_format::cRGB565: return "RGB565";
case block_format::cBGR565: return "BGR565";
case block_format::cRGBA4444: return "RGBA4444";
+ case block_format::cRGBA_HALF: return "RGBA_HALF";
+ case block_format::cRGB_HALF: return "RGB_HALF";
+ case block_format::cRGB_9E5: return "RGB_9E5";
case block_format::cUASTC_4x4: return "UASTC_4x4";
+ case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
+ case block_format::cBC6H: return "BC6H";
+ case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
case block_format::cFXT1_RGB: return "FXT1_RGB";
case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
@@ -10914,11 +11287,13 @@ namespace basist
bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
{
+ // TODO: Technically ASTC_HDR does support alpha, but UASTC_HDR doesn't yet support it. Unsure what to do here.
switch (fmt)
{
case transcoder_texture_format::cTFETC2_RGBA:
case transcoder_texture_format::cTFBC3_RGBA:
case transcoder_texture_format::cTFASTC_4x4_RGBA:
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
case transcoder_texture_format::cTFBC7_RGBA:
case transcoder_texture_format::cTFBC7_ALT:
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
@@ -10926,6 +11301,23 @@ namespace basist
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFRGBA32:
case transcoder_texture_format::cTFRGBA4444:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ }
+
+ bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
+ {
+ switch (fmt)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return true;
default:
break;
@@ -10947,13 +11339,18 @@ namespace basist
case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
- case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4;
+ case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
+ case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
+ case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
+ case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
+ case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
@@ -10975,6 +11372,9 @@ namespace basist
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return true;
default:
break;
@@ -10995,6 +11395,9 @@ namespace basist
case block_format::cRGBA4444_COLOR:
case block_format::cRGBA4444_ALPHA:
case block_format::cRGBA4444_COLOR_OPAQUE:
+ case block_format::cRGBA_HALF:
+ case block_format::cRGB_HALF:
+ case block_format::cRGB_9E5:
return true;
default:
break;
@@ -11007,11 +11410,16 @@ namespace basist
switch (fmt)
{
case transcoder_texture_format::cTFRGBA32:
+ case transcoder_texture_format::cTFRGB_9E5:
return sizeof(uint32_t);
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
return sizeof(uint16_t);
+ case transcoder_texture_format::cTFRGB_HALF:
+ return sizeof(half_float) * 3;
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return sizeof(half_float) * 4;
default:
break;
}
@@ -11038,8 +11446,26 @@ namespace basist
bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
{
- if (fmt == basis_tex_format::cUASTC4x4)
+ if (fmt == basis_tex_format::cUASTC_HDR_4x4)
+ {
+ // UASTC HDR
+#if BASISD_SUPPORT_UASTC_HDR
+ switch (tex_type)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
+ return true;
+ default:
+ break;
+ }
+#endif
+ }
+ else if (fmt == basis_tex_format::cUASTC4x4)
{
+ // UASTC LDR
#if BASISD_SUPPORT_UASTC
switch (tex_type)
{
@@ -11049,6 +11475,12 @@ namespace basist
case transcoder_texture_format::cTFATC_RGB:
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFFXT1_RGB:
+ // UASTC LDR doesn't support transcoding to HDR formats
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return false;
default:
return true;
@@ -11057,6 +11489,7 @@ namespace basist
}
else
{
+ // ETC1S
switch (tex_type)
{
// ETC1 and uncompressed are always supported.
@@ -11812,7 +12245,7 @@ namespace basist
// Encodes 3 values to output, usable for any range that uses quints and bits
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
{
- // First extract the trits and the bits from the 5 input values
+ // First extract the quints and the bits from the 3 input values
int quints = 0, bits[3];
const uint32_t bit_mask = (1 << n) - 1;
for (int i = 0; i < 3; i++)
@@ -12131,11 +12564,13 @@ namespace basist
return bits & ((1U << codesize) - 1U);
}
-
- uint32_t byte_bit_offset = bit_offset & 7U;
- const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
- bit_offset += codesize;
- return (w >> byte_bit_offset)& ((1U << codesize) - 1U);
+ else
+ {
+ uint32_t byte_bit_offset = bit_offset & 7U;
+ const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
+ bit_offset += codesize;
+ return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
+ }
}
bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
@@ -12170,6 +12605,7 @@ namespace basist
return false;
unpacked.m_mode = mode;
+ unpacked.m_common_pattern = 0;
uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
@@ -16663,10 +17099,12 @@ namespace basist
memcpy(&m_header, pData, sizeof(m_header));
- // We only support UASTC and ETC1S
- if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED)
+ // We only support UASTC LDR, UASTC HDR and ETC1S.
+ // Note the DFD's contents are what we are guided by for decoding the KTX2 file, not this format field (currently).
+ if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) &&
+ (m_header.m_vk_format != basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK))
{
- BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n");
+ BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
return false;
}
@@ -16890,6 +17328,16 @@ namespace basist
// We're assuming "DATA" means RGBA so it has alpha.
m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
}
+ else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR)
+ {
+ m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
+
+ m_dfd_samples = 1;
+ m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
+
+ // We're assuming "DATA" means RGBA so it has alpha.
+ m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
+ }
else
{
// Unsupported DFD color model.
@@ -17167,7 +17615,8 @@ namespace basist
return false;
}
}
- else if (m_format == basist::basis_tex_format::cUASTC4x4)
+ else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
+ (m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
{
// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
@@ -17188,14 +17637,29 @@ namespace basist
return false;
}
- if (!m_uastc_transcoder.transcode_image(fmt,
- pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
- (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
- 0, (uint32_t)total_2D_image_size,
- decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
{
- BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
- return false;
+ if (!m_uastc_hdr_transcoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+ 0, (uint32_t)total_2D_image_size,
+ decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ {
+ BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+ return false;
+ }
+ }
+ else
+ {
+ if (!m_uastc_transcoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+ 0, (uint32_t)total_2D_image_size,
+ decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ {
+ BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+ return false;
+ }
}
}
else
@@ -17476,4 +17940,1531 @@ namespace basist
#endif
}
+ //-------------------------------
+
+#ifdef BASISD_SUPPORT_UASTC_HDR
+ // This float->half conversion matches how "F32TO16" works on Intel GPU's.
+ basist::half_float float_to_half(float val)
+ {
+ union { float f; int32_t i; uint32_t u; } fi = { val };
+ const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
+ int s = flt_s, e = 0, m = 0;
+
+ // inf/NaN
+ if (flt_e == 0xff)
+ {
+ e = 31;
+ if (flt_m != 0) // NaN
+ m = 1;
+ }
+ // not zero or denormal
+ else if (flt_e != 0)
+ {
+ int new_exp = flt_e - 127;
+ if (new_exp > 15)
+ e = 31;
+ else if (new_exp < -14)
+ m = lrintf((1 << 24) * fabsf(fi.f));
+ else
+ {
+ e = new_exp + 15;
+ m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
+ }
+ }
+
+ assert((0 <= m) && (m <= 1024));
+ if (m == 1024)
+ {
+ e++;
+ m = 0;
+ }
+
+ assert((s >= 0) && (s <= 1));
+ assert((e >= 0) && (e <= 31));
+ assert((m >= 0) && (m <= 1023));
+
+ basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
+ return result;
+ }
+
+ //------------------------------------------------------------------------------------------------
+ // HDR support
+ //
+ // Originally from bc6h_enc.cpp
+ // BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
+
+ const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
+ {
+ // 2 subsets
+ { 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs
+ { 7, 6, 6, 6, }, // 1
+ { 11, 5, 4, 4, }, // 2
+ { 11, 4, 5, 4, }, // 3
+ { 11, 4, 4, 5, }, // 4
+ { 9, 5, 5, 5, }, // 5
+ { 8, 6, 5, 5, }, // 6
+ { 8, 5, 6, 5, }, // 7
+ { 8, 5, 5, 6, }, // 8
+ { 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs
+ // 1 subset
+ { 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
+ { 11, 9, 9, 9, }, // 11
+ { 12, 8, 8, 8, }, // 12
+ { 16, 4, 4, 4, } // 13, also useful for solid blocks
+ };
+
+ const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
+
+ const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
+ {
+ // comp_index, subset*2+lh_index, last_bit, first_bit
+ //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta
+ { { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
+ { 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
+ { 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
+ { { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
+ { 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
+ { 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
+ { 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
+ { 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
+ { 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
+ { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
+ { 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
+ { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
+ { { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
+ { 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
+ { 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
+ { { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+ { 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
+ { { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+ { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
+ { { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
+ { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
+ { { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
+ { 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
+ { 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
+ //------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
+ //------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
+ };
+
+ // The same as the first 32 2-subset patterns in BC7.
+ // Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
+ const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
+ {
+ { {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
+ { {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
+ { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} },
+ { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
+ { {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
+ { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
+ { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
+ };
+
+ const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+ const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+
+ struct bc6h_logical_block
+ {
+ uint32_t m_mode;
+ uint32_t m_partition_pattern; // must be 0 if 1 subset
+ uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed
+ uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0
+
+ void clear()
+ {
+ basisu::clear_obj(*this);
+ }
+ };
+
+ static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+ {
+ assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+ assert(val < (1ULL << num_bits));
+
+ if (bit_pos < 64)
+ {
+ l |= (val << bit_pos);
+
+ if ((bit_pos + num_bits) > 64)
+ h |= (val >> (64 - bit_pos));
+ }
+ else
+ {
+ h |= (val << (bit_pos - 64));
+ }
+
+ bit_pos += num_bits;
+ assert(bit_pos <= 128);
+ }
+
+ static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+ {
+ assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+ assert(val < (1ULL << num_bits));
+
+ for (uint32_t i = 0; i < num_bits; i++)
+ write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
+ }
+
+ static void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
+ {
+ const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
+
+ const uint32_t mode = log_blk.m_mode;
+ assert(mode < NUM_BC6H_MODES);
+
+ uint64_t l = s_mode_bits[mode], h = 0;
+ uint32_t bit_pos = (mode >= 2) ? 5 : 2;
+
+ const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
+
+ assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
+ ((num_subsets == 1) && (!log_blk.m_partition_pattern)));
+
+ // Sanity checks
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits
+ assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
+ assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
+ assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
+ }
+
+ const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
+
+ while (pLayout->m_comp != -1)
+ {
+ uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
+
+ if (pLayout->m_first_bit == -1)
+ {
+ write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
+ }
+ else
+ {
+ const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
+
+ v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
+ v &= ((1 << total_bits) - 1);
+
+ if (pLayout->m_first_bit > pLayout->m_last_bit)
+ write_rev_bits(v, total_bits, bit_pos, l, h);
+ else
+ write_bits(v, total_bits, bit_pos, l, h);
+ }
+
+ pLayout++;
+ }
+
+ const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ const uint32_t sel = log_blk.m_weights[i];
+
+ uint32_t num_bits = num_mode_sel_bits;
+ if (num_subsets == 2)
+ {
+ const uint32_t subset_index = pPat[i];
+ num_bits -= (subset_index >> 7);
+ }
+ else if (!i)
+ {
+ num_bits--;
+ }
+
+ assert(sel < (1u << num_bits));
+
+ write_bits(sel, num_bits, bit_pos, l, h);
+ }
+
+ assert(bit_pos == 128);
+
+ basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
+ basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
+ basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
+ basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
+ }
+
+#if 0
+ static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
+ {
+ int unq;
+
+ if (bits_per_comp >= 15)
+ unq = comp;
+ else if (comp == 0)
+ unq = 0;
+ else if (comp == ((1u << bits_per_comp) - 1u))
+ unq = 0xFFFFu;
+ else
+ unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
+
+ return unq;
+ }
+#endif
+
+ // Suboptimal, but very close.
+ static inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
+ {
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+ return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
+ }
+
+ // 6,7,8,9,10,11,12
+ const uint32_t BC6H_BLOG_TAB_MIN = 6;
+ const uint32_t BC6H_BLOG_TAB_MAX = 12;
+ //const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
+
+ // Handles 16, or 6-12 bits. Others assert.
+ static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
+ {
+ BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
+ BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
+
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+
+ if (num_bits == 16)
+ {
+ return bc6h_half_to_blog(h, 16);
+ }
+ else
+ {
+ assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
+
+ // Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
+ return bc6h_half_to_blog(h, num_bits);
+ }
+ }
+
+ bool g_bc6h_enc_initialized;
+
+ void bc6h_enc_init()
+ {
+ if (g_bc6h_enc_initialized)
+ return;
+
+ g_bc6h_enc_initialized = true;
+ }
+
+ // mode 10, 4-bit weights
+ void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 15);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
+ log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ if (log_blk.m_weights[0] & 8)
+ {
+ for (uint32_t i = 0; i < 16; i++)
+ log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+ }
+ }
+
+ log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ // Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
+ void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 15);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
+ {
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ const int delta_bitmask = (1 << num_delta_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ assert(num_delta_bits < num_base_bits);
+ assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
+
+ uint32_t blog_endpoints[3][2];
+
+ // Convert half endpoints to blog 16, 12, or 11
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+ assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+ blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+ assert((int)blog_endpoints[c][1] <= base_bitmask);
+ }
+
+ // Copy weights
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ // Ensure first weight MSB is 0
+ if (log_blk.m_weights[0] & 8)
+ {
+ // Invert weights
+ for (uint32_t i = 0; i < 16; i++)
+ log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+ // Swap blog quantized endpoints
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+ }
+ }
+
+ const int max_delta = (1 << (num_delta_bits - 1)) - 1;
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmask);
+
+ bool failed_flag = false;
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ if ((delta < min_delta) || (delta > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta & delta_bitmask;
+ }
+
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ return;
+ }
+
+ // Worst case fall back to mode 10, which can handle any endpoints
+ bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
+ }
+
+ // Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
+ void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
+ log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
+
+ log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
+ log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ const uint32_t pat_index = 0;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ if (log_blk.m_weights[0] & 4)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (log_blk.m_weights[15] & 4)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ log_blk.m_mode = 9;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ // Tries modes 0-8, falls back to mode 9
+ void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+ {
+ static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
+ const uint32_t mode = s_mode_order[mode_iter];
+
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+ const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+ uint32_t blog_endpoints[3][4];
+
+ // Convert half endpoints to blog 7-11
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+ blog_endpoints[c][2] = blog_endpoints[c][0];
+ assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+ blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+ blog_endpoints[c][3] = blog_endpoints[c][1];
+ assert((int)blog_endpoints[c][1] <= base_bitmask);
+ }
+
+ const uint32_t pat_index = 0;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ if (log_blk.m_weights[0] & 4)
+ {
+ // Swap part 0's endpoints/weights
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (log_blk.m_weights[15] & 4)
+ {
+ // Swap part 1's endpoints/weights
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ bool failed_flag = false;
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+ int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+ if ((delta0 < min_delta) || (delta0 > max_delta) ||
+ (delta1 < min_delta) || (delta1 > max_delta) ||
+ (delta2 < min_delta) || (delta2 > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+ if (failed_flag)
+ break;
+ }
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ return;
+
+ } // mode_iter
+
+ bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
+ }
+
+ // pEndpoints[subset][comp][lh_index]
+ void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+ assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
+ log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
+ }
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+ if (invert_flag)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
+ std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
+ }
+ }
+
+ const uint32_t pat_index = bc7_pattern;
+ assert(pat_index < 32);
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ bool swap_flags[2] = { false, false };
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if ((pPat[i] & 0x80) == 0)
+ continue;
+
+ if (log_blk.m_weights[i] & 4)
+ {
+ const uint32_t p = pPat[i] & 1;
+ swap_flags[p] = true;
+ }
+ }
+
+ if (swap_flags[0])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (swap_flags[1])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ log_blk.m_mode = 9;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+ {
+ static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
+ const uint32_t mode = s_mode_order[mode_iter];
+
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+ const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+ uint32_t blog_endpoints[3][4];
+
+ // Convert half endpoints to blog 7-11
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
+ blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
+ }
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+ if (invert_flag)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
+ std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
+ }
+ }
+
+ const uint32_t pat_index = bc7_pattern;
+ assert(pat_index < 32);
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ bool swap_flags[2] = { false, false };
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if ((pPat[i] & 0x80) == 0)
+ continue;
+
+ if (log_blk.m_weights[i] & 4)
+ {
+ const uint32_t p = pPat[i] & 1;
+ swap_flags[p] = true;
+ }
+ }
+
+ if (swap_flags[0])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (swap_flags[1])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ // Try packing the endpoints
+ bool failed_flag = false;
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+ int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+ if ((delta0 < min_delta) || (delta0 > max_delta) ||
+ (delta1 < min_delta) || (delta1 > max_delta) ||
+ (delta2 < min_delta) || (delta2 > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+ if (failed_flag)
+ break;
+ }
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ //half_float blk[16 * 3];
+ //unpack_bc6h(pPacked_block, blk, false);
+
+ return;
+ }
+
+ bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
+ }
+
+ bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
+ {
+ assert(g_bc6h_enc_initialized);
+
+ if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
+ return false;
+
+ // ASTC block unpacker won't allow Inf/NaN's to come through.
+ //if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
+ // return false;
+
+ uint8_t weights[16];
+ memset(weights, 0, sizeof(weights));
+
+ half_float endpoints[3][2];
+ endpoints[0][0] = pColor[0];
+ endpoints[0][1] = pColor[0];
+
+ endpoints[1][0] = pColor[1];
+ endpoints[1][1] = pColor[1];
+
+ endpoints[2][0] = pColor[2];
+ endpoints[2][1] = pColor[2];
+
+ bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // basisu_astc_hdr_core.cpp
+
+ static bool g_astc_hdr_core_initialized;
+ static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void astc_hdr_core_init()
+ {
+ if (g_astc_hdr_core_initialized)
+ return;
+
+ memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
+
+ for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
+ {
+ const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
+ //const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
+
+ assert(astc_pattern < 1024);
+ g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
+ }
+
+ g_astc_hdr_core_initialized = true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ static inline int astc_hdr_sign_extend(int src, int num_src_bits)
+ {
+ assert(basisu::in_range(num_src_bits, 2, 31));
+
+ const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
+ if (negative)
+ return src | ~((1 << num_src_bits) - 1);
+ else
+ return src & ((1 << num_src_bits) - 1);
+ }
+
+ static inline void astc_hdr_pack_bit(
+ int& dst, int dst_bit,
+ int src_val, int src_bit = 0)
+ {
+ assert(dst_bit >= 0 && dst_bit <= 31);
+ int bit = basisu::get_bit(src_val, src_bit);
+ dst |= (bit << dst_bit);
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void decode_mode7_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale)
+ {
+ assert(g_astc_hdr_core_initialized);
+
+ for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+ {
+ assert(pEndpoints[i] <= 255);
+ }
+
+ const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
+
+ // Extract mode bits and unpack to major component and mode.
+ const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
+
+ int majcomp, mode;
+ if ((modeval & 0xC) != 0xC)
+ {
+ majcomp = modeval >> 2;
+ mode = modeval & 3;
+ }
+ else if (modeval != 0xF)
+ {
+ majcomp = modeval & 3;
+ mode = 4;
+ }
+ else
+ {
+ majcomp = 0;
+ mode = 5;
+ }
+
+ // Extract low-order bits of r, g, b, and s.
+ int red = v0 & 0x3f;
+ int green = v1 & 0x1f;
+ int blue = v2 & 0x1f;
+ int scale = v3 & 0x1f;
+
+ // Extract high-order bits, which may be assigned depending on mode
+ int x0 = (v1 >> 6) & 1;
+ int x1 = (v1 >> 5) & 1;
+ int x2 = (v2 >> 6) & 1;
+ int x3 = (v2 >> 5) & 1;
+ int x4 = (v3 >> 7) & 1;
+ int x5 = (v3 >> 6) & 1;
+ int x6 = (v3 >> 5) & 1;
+
+ // Now move the high-order xs into the right place.
+ const int ohm = 1 << mode;
+ if (ohm & 0x30) green |= x0 << 6;
+ if (ohm & 0x3A) green |= x1 << 5;
+ if (ohm & 0x30) blue |= x2 << 6;
+ if (ohm & 0x3A) blue |= x3 << 5;
+ if (ohm & 0x3D) scale |= x6 << 5;
+ if (ohm & 0x2D) scale |= x5 << 6;
+ if (ohm & 0x04) scale |= x4 << 7;
+ if (ohm & 0x3B) red |= x4 << 6;
+ if (ohm & 0x04) red |= x3 << 6;
+ if (ohm & 0x10) red |= x5 << 7;
+ if (ohm & 0x0F) red |= x2 << 7;
+ if (ohm & 0x05) red |= x1 << 8;
+ if (ohm & 0x0A) red |= x0 << 8;
+ if (ohm & 0x05) red |= x0 << 9;
+ if (ohm & 0x02) red |= x6 << 9;
+ if (ohm & 0x01) red |= x3 << 10;
+ if (ohm & 0x02) red |= x5 << 10;
+
+ // Shift the bits to the top of the 12-bit result.
+ static const int s_shamts[6] = { 1,1,2,3,4,5 };
+
+ const int shamt = s_shamts[mode];
+ red <<= shamt;
+ green <<= shamt;
+ blue <<= shamt;
+ scale <<= shamt;
+
+ // Minor components are stored as differences
+ if (mode != 5)
+ {
+ green = red - green;
+ blue = red - blue;
+ }
+
+ // Swizzle major component into place
+ if (majcomp == 1)
+ std::swap(red, green);
+
+ if (majcomp == 2)
+ std::swap(red, blue);
+
+ // Clamp output values, set alpha to 1.0
+ e[1][0] = basisu::clamp(red, 0, 0xFFF);
+ e[1][1] = basisu::clamp(green, 0, 0xFFF);
+ e[1][2] = basisu::clamp(blue, 0, 0xFFF);
+
+ e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
+ e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
+ e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
+
+ if (pScale)
+ *pScale = scale;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool decode_mode7_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale,
+ uint32_t ise_endpoint_range)
+ {
+ assert(g_astc_hdr_core_initialized);
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
+ }
+ else
+ {
+ uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
+
+ for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+ dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+ decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
+ }
+
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ if (e[i][0] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][1] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][2] > (int)MAX_QLOG12)
+ return false;
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void decode_mode11_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3])
+ {
+#ifdef _DEBUG
+ for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+ {
+ assert(pEndpoints[i] <= 255);
+ }
+#endif
+
+ const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
+
+ if (maj_comp == 3)
+ {
+ // Direct, qlog8 and qlog7
+ e[0][0] = pEndpoints[0] << 4;
+ e[1][0] = pEndpoints[1] << 4;
+
+ e[0][1] = pEndpoints[2] << 4;
+ e[1][1] = pEndpoints[3] << 4;
+
+ e[0][2] = (pEndpoints[4] & 127) << 5;
+ e[1][2] = (pEndpoints[5] & 127) << 5;
+ }
+ else
+ {
+ int v0 = pEndpoints[0];
+ int v1 = pEndpoints[1];
+ int v2 = pEndpoints[2];
+ int v3 = pEndpoints[3];
+ int v4 = pEndpoints[4];
+ int v5 = pEndpoints[5];
+
+ int mode = 0;
+ astc_hdr_pack_bit(mode, 0, v1, 7);
+ astc_hdr_pack_bit(mode, 1, v2, 7);
+ astc_hdr_pack_bit(mode, 2, v3, 7);
+
+ int va = v0;
+ astc_hdr_pack_bit(va, 8, v1, 6);
+
+ int vb0 = v2 & 63;
+ int vb1 = v3 & 63;
+ int vc = v1 & 63;
+
+ int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
+ int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
+ static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
+ vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
+ vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
+
+ int x0 = basisu::get_bit(v2, 6);
+ int x1 = basisu::get_bit(v3, 6);
+ int x2 = basisu::get_bit(v4, 6);
+ int x3 = basisu::get_bit(v5, 6);
+ int x4 = basisu::get_bit(v4, 5);
+ int x5 = basisu::get_bit(v5, 5);
+
+ const uint32_t ohm = 1U << mode;
+ if (ohm & 0xA4) va |= (x0 << 9);
+ if (ohm & 0x08) va |= (x2 << 9);
+ if (ohm & 0x50) va |= (x4 << 9);
+ if (ohm & 0x50) va |= (x5 << 10);
+ if (ohm & 0xA0) va |= (x1 << 10);
+ if (ohm & 0xC0) va |= (x2 << 11);
+ if (ohm & 0x04) vc |= (x1 << 6);
+ if (ohm & 0xE8) vc |= (x3 << 6);
+ if (ohm & 0x20) vc |= (x2 << 7);
+ if (ohm & 0x5B) vb0 |= (x0 << 6);
+ if (ohm & 0x5B) vb1 |= (x1 << 6);
+ if (ohm & 0x12) vb0 |= (x2 << 7);
+ if (ohm & 0x12) vb1 |= (x3 << 7);
+
+ const int shamt = (mode >> 1) ^ 3;
+
+ va = (uint32_t)va << shamt;
+ vb0 = (uint32_t)vb0 << shamt;
+ vb1 = (uint32_t)vb1 << shamt;
+ vc = (uint32_t)vc << shamt;
+ vd0 = (uint32_t)vd0 << shamt;
+ vd1 = (uint32_t)vd1 << shamt;
+
+ // qlog12
+ e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
+ e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
+ e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
+
+ e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
+ e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
+ e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
+
+ if (maj_comp)
+ {
+ std::swap(e[0][0], e[0][maj_comp]);
+ std::swap(e[1][0], e[1][maj_comp]);
+ }
+ }
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool decode_mode11_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ uint32_t ise_endpoint_range)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ decode_mode11_to_qlog12_ise20(pEndpoints, e);
+ }
+ else
+ {
+ uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
+
+ for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+ dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+ decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
+ }
+
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ if (e[i][0] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][1] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][2] > (int)MAX_QLOG12)
+ return false;
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
+
+ if (best_blk.m_weight_ise_range == 5)
+ {
+ // Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
+ bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
+ }
+ else
+ {
+ uint8_t bc6h_weights[16];
+
+ if (best_blk.m_weight_ise_range == 1)
+ {
+ // weight ISE 1: 3 levels
+ static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 2)
+ {
+ // weight ISE 2: 4 levels
+ static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 3)
+ {
+ // weight ISE 3: 5 levels
+ static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 4)
+ {
+ // weight ISE 4: 6 levels
+ static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 6)
+ {
+ // weight ISE 6: 10 levels
+ static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 7)
+ {
+ // weight ISE 7: 12 levels
+ static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 8)
+ {
+ // 16 levels
+ memcpy(bc6h_weights, best_blk.m_weights, 16);
+ }
+ else
+ {
+ assert(0);
+ return false;
+ }
+
+ bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert(best_blk.m_num_partitions == 2);
+ assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
+
+ // UASTC HDR checks
+ // Both CEM's must be equal in 2-subset UASTC HDR.
+ if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
+ return false;
+ if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
+ return false;
+
+ if (best_blk.m_color_endpoint_modes[0] == 7)
+ {
+ if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
+ ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
+ ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
+ ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
+ ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
+ ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
+ {
+ return false;
+ }
+ }
+
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ int e[2][3];
+ if (best_blk.m_color_endpoint_modes[0] == 7)
+ {
+ bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
+ if (!success)
+ return false;
+ }
+ else
+ {
+ bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
+ if (!success)
+ return false;
+ }
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ bc6h_endpoints[s][c][0] = qlog_to_half_slow(e[0][c], 12);
+ if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
+ return false;
+
+ bc6h_endpoints[s][c][1] = qlog_to_half_slow(e[1][c], 12);
+ if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
+ return false;
+ }
+ }
+
+ uint8_t bc6h_weights[16];
+ if (best_blk.m_weight_ise_range == 1)
+ {
+ static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 2)
+ {
+ static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 3)
+ {
+ static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 4)
+ {
+ static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 5)
+ {
+ memcpy(bc6h_weights, best_blk.m_weights, 16);
+ }
+ else
+ {
+ assert(0);
+ return false;
+ }
+
+ bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+ bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ if (!g_astc_hdr_core_initialized)
+ {
+ assert(0);
+ return false;
+ }
+
+ astc_helpers::log_astc_block log_blk;
+
+ if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
+ {
+ // Failed unpacking ASTC data
+ return false;
+ }
+
+ return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+ bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ if (!g_astc_hdr_core_initialized)
+ {
+ assert(0);
+ return false;
+ }
+
+ if (log_blk.m_solid_color_flag_ldr)
+ {
+ // Don't support LDR solid colors.
+ return false;
+ }
+
+ if (log_blk.m_solid_color_flag_hdr)
+ {
+ // Solid color HDR block
+ return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
+ }
+
+ // Only support 4x4 grid sizes
+ if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
+ return false;
+
+ // Don't support dual plane encoding
+ if (log_blk.m_dual_plane)
+ return false;
+
+ if (log_blk.m_num_partitions == 1)
+ {
+ // Handle 1 partition (or subset)
+
+ // UASTC HDR checks
+ if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
+ return false;
+
+ int e[2][3];
+ bool success;
+
+ if (log_blk.m_color_endpoint_modes[0] == 7)
+ {
+ if (log_blk.m_endpoint_ise_range != 20)
+ return false;
+
+ success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
+ }
+ else if (log_blk.m_color_endpoint_modes[0] == 11)
+ {
+ // UASTC HDR checks
+ if (log_blk.m_weight_ise_range <= 7)
+ {
+ if (log_blk.m_endpoint_ise_range != 20)
+ return false;
+ }
+ else if (log_blk.m_endpoint_ise_range != 19)
+ {
+ return false;
+ }
+
+ success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
+ }
+ else
+ {
+ return false;
+ }
+
+ if (!success)
+ return false;
+
+ // Transform endpoints to half float
+ half_float h_e[3][2] =
+ {
+ { qlog_to_half_slow(e[0][0], 12), qlog_to_half_slow(e[1][0], 12) },
+ { qlog_to_half_slow(e[0][1], 12), qlog_to_half_slow(e[1][1], 12) },
+ { qlog_to_half_slow(e[0][2], 12), qlog_to_half_slow(e[1][2], 12) }
+ };
+
+ // Sanity check for NaN/Inf
+ for (uint32_t i = 0; i < 2; i++)
+ if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
+ return false;
+
+ // Transcode to bc6h
+ if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
+ return false;
+ }
+ else if (log_blk.m_num_partitions == 2)
+ {
+ // Handle 2 partition (or subset)
+ int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
+ if (common_bc7_pat_index < 0)
+ return false;
+
+ assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
+ return false;
+ }
+ else
+ {
+ // Only supports 1 or 2 partitions (or subsets)
+ return false;
+ }
+
+ return true;
+ }
+#endif // BASISD_SUPPORT_UASTC_HDR
+
} // namespace basist