summaryrefslogtreecommitdiffstats
path: root/thirdparty/basis_universal/transcoder
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/basis_universal/transcoder')
-rw-r--r--thirdparty/basis_universal/transcoder/basisu.h105
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h102
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_astc_helpers.h3587
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_containers.h62
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_containers_impl.h47
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_file_headers.h5
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder.cpp2057
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder.h80
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h216
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc2
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc2
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h1
12 files changed, 6173 insertions, 93 deletions
diff --git a/thirdparty/basis_universal/transcoder/basisu.h b/thirdparty/basis_universal/transcoder/basisu.h
index 1230b59ec6..939ee79e62 100644
--- a/thirdparty/basis_universal/transcoder/basisu.h
+++ b/thirdparty/basis_universal/transcoder/basisu.h
@@ -1,5 +1,5 @@
// basisu.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -117,13 +117,26 @@ namespace basisu
typedef basisu::vector<uint64_t> uint64_vec;
typedef basisu::vector<int> int_vec;
typedef basisu::vector<bool> bool_vec;
+ typedef basisu::vector<float> float_vec;
void enable_debug_printf(bool enabled);
void debug_printf(const char *pFmt, ...);
-
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
+
template <typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); }
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
+
template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; }
template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
@@ -162,10 +175,45 @@ namespace basisu
template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; }
template<typename T> inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; }
+ // Open interval
+ inline bool in_bounds(int v, int l, int h)
+ {
+ return (v >= l) && (v < h);
+ }
+
+ // Closed interval
+ inline bool in_range(int v, int l, int h)
+ {
+ return (v >= l) && (v <= h);
+ }
+
inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; }
template<typename T> inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); }
+ inline uint32_t get_bit(uint32_t src, int ndx)
+ {
+ assert(in_bounds(ndx, 0, 32));
+ return (src >> ndx) & 1;
+ }
+
+ inline bool is_bit_set(uint32_t src, int ndx)
+ {
+ return get_bit(src, ndx) != 0;
+ }
+
+ inline uint32_t get_bits(uint32_t val, int low, int high)
+ {
+ const int num_bits = (high - low) + 1;
+ assert(in_range(num_bits, 1, 32));
+
+ val >>= low;
+ if (num_bits != 32)
+ val &= ((1u << num_bits) - 1);
+
+ return val;
+ }
+
template<typename T, typename R> inline void append_vector(T &vec, const R *pObjs, size_t n)
{
if (n)
@@ -267,6 +315,11 @@ namespace basisu
return true;
}
+ static inline uint32_t read_le_word(const uint8_t* pBytes)
+ {
+ return (pBytes[1] << 8U) | (pBytes[0]);
+ }
+
static inline uint32_t read_le_dword(const uint8_t *pBytes)
{
return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]);
@@ -303,6 +356,10 @@ namespace basisu
return *this;
}
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
inline operator uint32_t() const
{
switch (NumBytes)
@@ -354,6 +411,9 @@ namespace basisu
}
}
}
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
};
enum eZero { cZero };
@@ -402,8 +462,11 @@ namespace basisu
cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block)
cBC4, // DXT5A
cBC5, // 3DC/DXN (two BC4/DXT5A blocks)
+ cBC6HSigned, // HDR
+ cBC6HUnsigned, // HDR
cBC7,
- cASTC4x4, // LDR only
+ cASTC_LDR_4x4, // ASTC 4x4 LDR only
+ cASTC_HDR_4x4, // ASTC 4x4 HDR only (but may use LDR ASTC blocks internally)
cPVRTC1_4_RGB,
cPVRTC1_4_RGBA,
cATC_RGB,
@@ -413,17 +476,22 @@ namespace basisu
cETC2_R11_EAC,
cETC2_RG11_EAC,
cUASTC4x4,
+ cUASTC_HDR_4x4,
cBC1_NV,
cBC1_AMD,
-
+
// Uncompressed/raw pixels
cRGBA32,
cRGB565,
cBGR565,
cRGBA4444,
- cABGR4444
+ cABGR4444,
+ cRGBA_HALF,
+ cRGB_HALF,
+ cRGB_9E5
};
+ // This is bytes per block for GPU formats, or bytes per texel for uncompressed formats.
inline uint32_t get_bytes_per_block(texture_format fmt)
{
switch (fmt)
@@ -443,13 +511,27 @@ namespace basisu
case texture_format::cETC2_R11_EAC:
return 8;
case texture_format::cRGBA32:
- return sizeof(uint32_t) * 16;
+ case texture_format::cRGB_9E5:
+ return sizeof(uint32_t);
+ case texture_format::cRGB_HALF:
+ return sizeof(uint16_t) * 3;
+ case texture_format::cRGBA_HALF:
+ return sizeof(uint16_t) * 4;
+ case texture_format::cRGB565:
+ case texture_format::cBGR565:
+ case texture_format::cRGBA4444:
+ case texture_format::cABGR4444:
+ return sizeof(uint16_t);
+
default:
break;
}
+
+ // Everything else is 16 bytes/block.
return 16;
}
+ // This is qwords per block for GPU formats, or not valid for uncompressed formats.
inline uint32_t get_qwords_per_block(texture_format fmt)
{
return get_bytes_per_block(fmt) >> 3;
@@ -473,6 +555,17 @@ namespace basisu
BASISU_NOTE_UNUSED(fmt);
return 4;
}
+
+ inline bool is_hdr_texture_format(texture_format fmt)
+ {
+ if (fmt == texture_format::cASTC_HDR_4x4)
+ return true;
+ if (fmt == texture_format::cUASTC_HDR_4x4)
+ return true;
+ if ((fmt == texture_format::cBC6HSigned) || (fmt == texture_format::cBC6HUnsigned))
+ return true;
+ return false;
+ }
} // namespace basisu
diff --git a/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
new file mode 100644
index 0000000000..82dcd2bfe1
--- /dev/null
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
@@ -0,0 +1,102 @@
+// File: basisu_astc_hdr_core.h
+#pragma once
+#include "basisu_astc_helpers.h"
+
+namespace basist
+{
+ struct astc_blk
+ {
+ uint8_t m_vals[16];
+ };
+
+ // ASTC_HDR_MAX_VAL is the maximum color component value that can be encoded.
+ // If the input has values higher than this, they need to be linearly scaled so all values are between [0,ASTC_HDR_MAX_VAL], and the linear scaling inverted in the shader.
+ const float ASTC_HDR_MAX_VAL = 65216.0f; // actually MAX_QLOG12_VAL
+
+ // Maximum usable QLOG encodings, and their floating point equivalent values, that don't result in NaN/Inf's.
+ const uint32_t MAX_QLOG7 = 123;
+ //const float MAX_QLOG7_VAL = 55296.0f;
+
+ const uint32_t MAX_QLOG8 = 247;
+ //const float MAX_QLOG8_VAL = 60416.0f;
+
+ const uint32_t MAX_QLOG9 = 495;
+ //const float MAX_QLOG9_VAL = 62976.0f;
+
+ const uint32_t MAX_QLOG10 = 991;
+ //const float MAX_QLOG10_VAL = 64256.0f;
+
+ const uint32_t MAX_QLOG11 = 1983;
+ //const float MAX_QLOG11_VAL = 64896.0f;
+
+ const uint32_t MAX_QLOG12 = 3967;
+ //const float MAX_QLOG12_VAL = 65216.0f;
+
+ const uint32_t MAX_QLOG16 = 63487;
+ const float MAX_QLOG16_VAL = 65504.0f;
+
+ const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;
+
+ // Notes:
+ // qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
+ // However, this is not lossless in the general sense.
+ inline half_float qlog16_to_half_slow(uint32_t qlog16)
+ {
+ assert(qlog16 <= 0xFFFF);
+
+ int C = qlog16;
+
+ int E = (C & 0xF800) >> 11;
+ int M = C & 0x7FF;
+
+ int Mt;
+ if (M < 512)
+ Mt = 3 * M;
+ else if (M >= 1536)
+ Mt = 5 * M - 2048;
+ else
+ Mt = 4 * M - 512;
+
+ int Cf = (E << 10) + (Mt >> 3);
+ return (half_float)Cf;
+ }
+
+ // This is not lossless
+ inline half_float qlog_to_half_slow(uint32_t qlog, uint32_t bits)
+ {
+ assert((bits >= 7U) && (bits <= 16U));
+ assert(qlog < (1U << bits));
+
+ int C = qlog << (16 - bits);
+ return qlog16_to_half_slow(C);
+ }
+
+ void astc_hdr_core_init();
+
+ void decode_mode7_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale);
+
+ bool decode_mode7_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale,
+ uint32_t ise_endpoint_range);
+
+ void decode_mode11_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3]);
+
+ bool decode_mode11_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ uint32_t ise_endpoint_range);
+
+ bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk);
+ bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk);
+
+ bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk);
+ bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk);
+
+} // namespace basist
diff --git a/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
new file mode 100644
index 0000000000..09a234b2ae
--- /dev/null
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
@@ -0,0 +1,3587 @@
+// basisu_astc_helpers.h
+// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.
+#pragma once
+#ifndef BASISU_ASTC_HELPERS_HEADER
+#define BASISU_ASTC_HELPERS_HEADER
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <fenv.h>
+
+namespace astc_helpers
+{
+ const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64]
+ const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
+ const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
+ const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
+
+ static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
+ extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
+
+ // The Color Endpoint Modes (CEM's)
+ enum cems
+ {
+ CEM_LDR_LUM_DIRECT = 0,
+ CEM_LDR_LUM_BASE_PLUS_OFS = 1,
+ CEM_HDR_LUM_LARGE_RANGE = 2,
+ CEM_HDR_LUM_SMALL_RANGE = 3,
+ CEM_LDR_LUM_ALPHA_DIRECT = 4,
+ CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,
+ CEM_LDR_RGB_BASE_SCALE = 6,
+ CEM_HDR_RGB_BASE_SCALE = 7,
+ CEM_LDR_RGB_DIRECT = 8,
+ CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,
+ CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,
+ CEM_HDR_RGB = 11,
+ CEM_LDR_RGBA_DIRECT = 12,
+ CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,
+ CEM_HDR_RGB_LDR_ALPHA = 14,
+ CEM_HDR_RGB_HDR_ALPHA = 15
+ };
+
+ // All Bounded Integer Sequence Coding (BISE or ISE) ranges.
+ // Weights: Ranges [0,11] are valid.
+ // Endpoints: Ranges [4,20] are valid.
+ enum bise_levels
+ {
+ BISE_2_LEVELS = 0,
+ BISE_3_LEVELS = 1,
+ BISE_4_LEVELS = 2,
+ BISE_5_LEVELS = 3,
+ BISE_6_LEVELS = 4,
+ BISE_8_LEVELS = 5,
+ BISE_10_LEVELS = 6,
+ BISE_12_LEVELS = 7,
+ BISE_16_LEVELS = 8,
+ BISE_20_LEVELS = 9,
+ BISE_24_LEVELS = 10,
+ BISE_32_LEVELS = 11,
+ BISE_40_LEVELS = 12,
+ BISE_48_LEVELS = 13,
+ BISE_64_LEVELS = 14,
+ BISE_80_LEVELS = 15,
+ BISE_96_LEVELS = 16,
+ BISE_128_LEVELS = 17,
+ BISE_160_LEVELS = 18,
+ BISE_192_LEVELS = 19,
+ BISE_256_LEVELS = 20
+ };
+
+ const uint32_t TOTAL_ISE_RANGES = 21;
+
+ // Valid endpoint ISE ranges
+ const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4
+ const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20
+ const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;
+
+ // Valid weight ISE ranges
+ const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0
+ const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11
+ const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;
+
+ // The ISE range table.
+ extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)
+
+ // Possible Color Component Select values, used in dual plane mode.
+ // The CCS component will be interpolated using the 2nd weight plane.
+ enum ccs
+ {
+ CCS_GBA_R = 0,
+ CCS_RBA_G = 1,
+ CCS_RGA_B = 2,
+ CCS_RGB_A = 3
+ };
+
+ struct astc_block
+ {
+ uint32_t m_vals[4];
+ };
+
+ const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode
+ const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode
+ const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits)
+ const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block
+
+ struct log_astc_block
+ {
+ bool m_error_flag;
+
+ bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
+ uint16_t m_solid_color[4];
+
+ // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
+ uint32_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block
+
+ bool m_dual_plane;
+
+ uint32_t m_weight_ise_range; // 0-11
+ uint32_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
+
+ uint32_t m_color_component_selector; // 0-3, 0=GBA R, 1=RBA G, 2=RGA B, 3=RGB A, only used in dual plane mode
+
+ uint32_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode)
+ uint32_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1
+
+ uint32_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
+
+ // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc.
+ uint8_t m_weights[MAX_GRID_WEIGHTS];
+
+ // ISE endpoint values
+ // Endpoint order examples:
+ // 1 subset LA : LL0 LH0 AL0 AH0
+ // 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0
+ // 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0
+ // 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1
+ // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1
+ // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1
+ uint8_t m_endpoints[MAX_ENDPOINTS];
+
+ void clear()
+ {
+ memset(this, 0, sizeof(*this));
+ }
+ };
+
+ // Open interval
+ inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+ inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+
+ inline uint32_t get_bits(uint32_t val, int low, int high)
+ {
+ const int num_bits = (high - low) + 1;
+ assert((num_bits >= 1) && (num_bits <= 32));
+
+ val >>= low;
+ if (num_bits != 32)
+ val &= ((1u << num_bits) - 1);
+
+ return val;
+ }
+
+ // Returns the number of levels in the given ISE range.
+ inline uint32_t get_ise_levels(uint32_t ise_range)
+ {
+ assert(ise_range < TOTAL_ISE_RANGES);
+ return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];
+ }
+
+ inline int get_ise_sequence_bits(int count, int range)
+ {
+ // See 18.22 Data Size Determination
+ int total_bits = g_ise_range_table[range][0] * count;
+ total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
+ total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
+ return total_bits;
+ }
+
+ inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)
+ {
+ assert(w <= MAX_WEIGHT_VALUE);
+ return (l * (64 - w) + h * w + 32) >> 6;
+ }
+
+ void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range);
+
+ // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
+ bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr);
+
+ // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
+ void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a);
+
+ // Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
+ void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah);
+
+ // These helpers are all quite slow, but are useful for table preparation.
+
+ // Dequantizes ISE encoded endpoint val to [0,255]
+ uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11
+
+ // Dequantizes ISE encoded weight val to [0,64]
+ uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10
+
+ uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);
+ uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);
+
+ void create_quant_tables(
+ uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
+ uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
+ uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
+ uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
+ uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
+ bool weight_flag); // false if block endpoints, true if weights
+
+ // True if the CEM is LDR.
+ bool is_cem_ldr(uint32_t mode);
+ inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }
+
+ // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).
+ bool is_valid_block_size(uint32_t w, uint32_t h);
+
+ bool block_has_any_hdr_cems(const log_astc_block& log_blk);
+ bool block_has_any_ldr_cems(const log_astc_block& log_blk);
+
+ // Returns the # of endpoint values for the given CEM.
+ inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }
+
+ struct dequant_table
+ {
+ basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
+ basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
+ basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels]
+ basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
+
+ void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs)
+ {
+ m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256);
+ m_ISE_to_val.resize(num_levels);
+ if (init_rank_tabs)
+ {
+ m_ISE_to_rank.resize(num_levels);
+ m_rank_to_ISE.resize(num_levels);
+ }
+ }
+ };
+
+ struct dequant_tables
+ {
+ dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];
+ dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];
+
+ const dequant_table& get_weight_tab(uint32_t range) const
+ {
+ assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
+ return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
+ }
+
+ dequant_table& get_weight_tab(uint32_t range)
+ {
+ assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
+ return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
+ }
+
+ const dequant_table& get_endpoint_tab(uint32_t range) const
+ {
+ assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
+ return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
+ }
+
+ dequant_table& get_endpoint_tab(uint32_t range)
+ {
+ assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
+ return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
+ }
+
+ void init(bool init_rank_tabs)
+ {
+ for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)
+ {
+ const uint32_t num_levels = get_ise_levels(range);
+ dequant_table& tab = get_weight_tab(range);
+
+ tab.init(true, num_levels, init_rank_tabs);
+
+ create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true);
+ }
+
+ for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)
+ {
+ const uint32_t num_levels = get_ise_levels(range);
+ dequant_table& tab = get_endpoint_tab(range);
+
+ tab.init(false, num_levels, init_rank_tabs);
+
+ create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false);
+ }
+ }
+ };
+
+ extern dequant_tables g_dequant_tables;
+ void init_tables(bool init_rank_tabs);
+
+ // Procedurally returns the texel partition/subset index given the block coordinate and config.
+ int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
+
+ void blue_contract(
+ int r, int g, int b, int a,
+ int& dr, int& dg, int& db, int& da);
+
+ void bit_transfer_signed(int& a, int& b);
+
+ void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);
+
+ typedef uint16_t half_float;
+ half_float float_to_half(float val, bool toward_zero);
+ float half_to_float(half_float hval);
+
+ const int MAX_RGB9E5 = 0xff80;
+ void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
+ uint32_t pack_rgb9e5(float r, float g, float b);
+
+ enum decode_mode
+ {
+ cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks
+ cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks
+ cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks
+ cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
+ };
+
+ // Decodes logical block to output pixels.
+ // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)
+ bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);
+
+ void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);
+
+ // Unpack a physical ASTC encoded GPU texture block to a logical block description.
+ bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);
+
+} // namespace astc_helpers
+
+#endif // BASISU_ASTC_HELPERS_HEADER
+
+//------------------------------------------------------------------
+
+#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION
+
+namespace astc_helpers
+{
+ template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }
+ template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }
+
+ const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = {
+ { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 },
+ { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 },
+ { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 },
+ { 12, 10 }, { 12, 12 }
+ };
+
+ const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =
+ {
+ //b t q
+ //2 3 5 // rng ise_index notes
+ { 1, 0, 0 }, // 0..1 0
+ { 0, 1, 0 }, // 0..2 1
+ { 2, 0, 0 }, // 0..3 2
+ { 0, 0, 1 }, // 0..4 3
+ { 1, 1, 0 }, // 0..5 4 min endpoint ISE index
+ { 3, 0, 0 }, // 0..7 5
+ { 1, 0, 1 }, // 0..9 6
+ { 2, 1, 0 }, // 0..11 7
+ { 4, 0, 0 }, // 0..15 8
+ { 2, 0, 1 }, // 0..19 9
+ { 3, 1, 0 }, // 0..23 10
+ { 5, 0, 0 }, // 0..31 11 max weight ISE index
+ { 3, 0, 1 }, // 0..39 12
+ { 4, 1, 0 }, // 0..47 13
+ { 6, 0, 0 }, // 0..63 14
+ { 4, 0, 1 }, // 0..79 15
+ { 5, 1, 0 }, // 0..95 16
+ { 7, 0, 0 }, // 0..127 17
+ { 5, 0, 1 }, // 0..159 18
+ { 6, 1, 0 }, // 0..191 19
+ { 8, 0, 0 }, // 0..255 20
+ };
+
+ static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)
+ {
+ uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
+
+ assert(codesize <= 9);
+ if (codesize)
+ {
+ uint32_t byte_bit_offset = bit_offset & 7;
+ uint32_t val = code << byte_bit_offset;
+
+ uint32_t index = bit_offset >> 3;
+ pBuf[index] |= (uint8_t)val;
+
+ if (codesize > (8 - byte_bit_offset))
+ pBuf[index + 1] |= (uint8_t)(val >> 8);
+
+ bit_offset += codesize;
+ }
+ }
+
+ static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
+ {
+ return (bits >> low) & ((1 << (high - low + 1)) - 1);
+ }
+
+ // Writes bits to output in an endian safe way
+ static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)
+ {
+ assert(total_bits <= 31);
+ assert(value < (1u << total_bits));
+
+ uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
+
+ while (total_bits)
+ {
+ const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));
+
+ pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
+
+ bit_pos += bits_to_write;
+ total_bits -= bits_to_write;
+ value >>= bits_to_write;
+ }
+ }
+
+ static const uint8_t g_astc_quint_encode[125] =
+ {
+ 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
+ 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
+ 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
+ 126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/
+ };
+
+ // Encodes 3 values to output, usable for any range that uses quints and bits
+ static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
+ {
+ // First extract the quints and the bits from the 3 input values
+ int quints = 0, bits[3];
+ const uint32_t bit_mask = (1 << n) - 1;
+ for (int i = 0; i < 3; i++)
+ {
+ static const int s_muls[3] = { 1, 5, 25 };
+
+ const int t = pValues[i] >> n;
+
+ quints += t * s_muls[i];
+ bits[i] = pValues[i] & bit_mask;
+ }
+
+ // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
+ // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
+
+ assert(quints < 125);
+ const int T = g_astc_quint_encode[quints];
+
+ // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
+ astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
+ (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
+ }
+
+ static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
+ 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
+ 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
+ 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
+ 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
+ 191, 223, 124, 125, 126 };
+
+ // Encodes 5 values to output, usable for any range that uses trits and bits
+ static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
+ {
+ // First extract the trits and the bits from the 5 input values
+ int trits = 0, bits[5];
+ const uint32_t bit_mask = (1 << n) - 1;
+ for (int i = 0; i < 5; i++)
+ {
+ static const int s_muls[5] = { 1, 3, 9, 27, 81 };
+
+ const int t = pValues[i] >> n;
+
+ trits += t * s_muls[i];
+ bits[i] = pValues[i] & bit_mask;
+ }
+
+ // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
+ // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
+
+ assert(trits < 243);
+ const int T = g_astc_trit_encode[trits];
+
+ // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
+ astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
+
+ astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
+ (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
+ }
+
+ // Packs values using ASTC's BISE to output buffer.
+ void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range)
+ {
+ uint32_t temp[5] = { 0 };
+
+ const int num_bits = g_ise_range_table[range][0];
+
+ int group_size = 0;
+ if (g_ise_range_table[range][1])
+ group_size = 5;
+ else if (g_ise_range_table[range][2])
+ group_size = 3;
+
+#ifndef NDEBUG
+ const uint32_t num_levels = get_ise_levels(range);
+ for (int i = 0; i < num_vals; i++)
+ {
+ assert(pSrc_vals[i] < num_levels);
+ }
+#endif
+
+ if (group_size)
+ {
+ // Range has trits or quints - pack each group of 5 or 3 values
+ const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
+
+ for (int group_index = 0; group_index < total_groups; group_index++)
+ {
+ uint8_t vals[5] = { 0 };
+
+ const int limit = my_min(group_size, num_vals - group_index * group_size);
+ for (int i = 0; i < limit; i++)
+ vals[i] = pSrc_vals[group_index * group_size + i];
+
+ if (group_size == 5)
+ astc_encode_trits(temp, vals, bit_pos, num_bits);
+ else
+ astc_encode_quints(temp, vals, bit_pos, num_bits);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < num_vals; i++)
+ astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
+ }
+
+ // TODO: Could this write too many bits on incomplete blocks?
+ pDst[0] |= temp[0]; pDst[1] |= temp[1];
+ pDst[2] |= temp[2]; pDst[3] |= temp[3];
+ }
+
+ inline uint32_t rev_dword(uint32_t bits)
+ {
+ uint32_t v = (bits << 16) | (bits >> 16);
+ v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);
+ v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);
+ return v;
+ }
+
+ static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }
+
+ static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)
+ {
+ config_bits = 0;
+
+ const int W = log_block.m_grid_width, H = log_block.m_grid_height;
+
+ const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision
+ const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits
+
+ // See Tables 81-82
+ // Compute p from weight range
+ uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);
+
+ // Rearrange p's bits to p0 p2 p1
+ p = (p >> 1) + ((p & 1) << 2);
+
+ // Try encoding each row of table 82.
+
+ // W+4 H+2
+ if (is_packable(W - 4, 2) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);
+ return true;
+ }
+
+ // W+8 H+2
+ if (is_packable(W - 8, 2) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);
+ return true;
+ }
+
+ // W+2 H+8
+ if (is_packable(W - 2, 2) && is_packable(H - 8, 2))
+ {
+ config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);
+ return true;
+ }
+
+ // W+2 H+6
+ if (is_packable(W - 2, 2) && is_packable(H - 6, 1))
+ {
+ config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
+ return true;
+ }
+
+ // W+2 H+2
+ if (is_packable(W - 2, 1) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
+ return true;
+ }
+
+ // 12 H+2
+ if ((W == 12) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);
+ return true;
+ }
+
+ // W+2 12
+ if ((H == 12) && is_packable(W - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);
+ return true;
+ }
+
+ // 6 10
+ if ((W == 6) && (H == 10))
+ {
+ config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);
+ return true;
+ }
+
+ // 10 6
+ if ((W == 10) && (H == 6))
+ {
+ config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);
+ return true;
+ }
+
+ // W+6 H+6 (no dual plane or high prec)
+ if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))
+ {
+ config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);
+ return true;
+ }
+
+ // Failed: unsupported weight grid dimensions or config.
+ return false;
+ }
+
+ bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range)
+ {
+ memset(&phys_block, 0, sizeof(phys_block));
+
+ if (pExpected_endpoint_range)
+ *pExpected_endpoint_range = -1;
+
+ assert(!log_block.m_error_flag);
+ if (log_block.m_error_flag)
+ return false;
+
+ if (log_block.m_solid_color_flag_ldr)
+ {
+ pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
+ return true;
+ }
+ else if (log_block.m_solid_color_flag_hdr)
+ {
+ pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
+ return true;
+ }
+
+ if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))
+ return false;
+
+ // Max usable weight range is 11
+ if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)
+ return false;
+
+ // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
+ if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
+ return false;
+
+ if (log_block.m_color_component_selector > 3)
+ return false;
+
+ uint32_t config_bits = 0;
+ if (!get_config_bits(log_block, config_bits))
+ return false;
+
+ uint32_t bit_pos = 0;
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
+
+ const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
+ const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
+
+ // 18.24 Illegal Encodings
+ if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
+ return false;
+
+ uint32_t total_extra_bits = 0;
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
+
+ if (log_block.m_num_partitions > 1)
+ {
+ if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)
+ return false;
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
+
+ uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
+ for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
+ {
+ highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]);
+ lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]);
+ }
+
+ if (highest_cem > 15)
+ return false;
+
+ // Ensure CEM range is contiguous
+ if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))
+ return false;
+
+ // See tables 79/80
+ uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;
+ if (lowest_cem != highest_cem)
+ {
+ encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));
+
+ // See tables at 23.11 Color Endpoint Mode
+ for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
+ {
+ const int M = log_block.m_color_endpoint_modes[j] & 3;
+
+ const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);
+ if ((C & 1) != C)
+ return false;
+
+ encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));
+ }
+
+ total_extra_bits = 3 * log_block.m_num_partitions - 4;
+
+ if ((total_weight_bits + total_extra_bits) > 128)
+ return false;
+
+ uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
+ astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
+ }
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
+ }
+ else
+ {
+ if (log_block.m_partition_id)
+ return false;
+ if (log_block.m_color_endpoint_modes[0] > 15)
+ return false;
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
+ }
+
+ if (log_block.m_dual_plane)
+ {
+ if (log_block.m_num_partitions > 3)
+ return false;
+
+ total_extra_bits += 2;
+
+ uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
+ astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
+ }
+
+ const uint32_t total_config_bits = bit_pos + total_extra_bits;
+ const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
+ if (num_remaining_bits < 0)
+ return false;
+
+ uint32_t total_cem_vals = 0;
+ for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
+ total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);
+
+ if (total_cem_vals > MAX_ENDPOINTS)
+ return false;
+
+ int endpoint_ise_range = -1;
+ for (int k = 20; k > 0; k--)
+ {
+ int bits = get_ise_sequence_bits(total_cem_vals, k);
+ if (bits <= num_remaining_bits)
+ {
+ endpoint_ise_range = k;
+ break;
+ }
+ }
+
+ // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
+ if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
+ return false;
+
+ // Ensure the caller utilized the right endpoint ISE range.
+ if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)
+ {
+ if (pExpected_endpoint_range)
+ *pExpected_endpoint_range = endpoint_ise_range;
+ return false;
+ }
+
+ // Pack endpoints forwards
+ encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
+
+ // Pack weights backwards
+ uint32_t weight_data[4] = { 0 };
+ encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);
+
+ for (uint32_t i = 0; i < 4; i++)
+ phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);
+
+ return true;
+ }
+
+ static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)
+ {
+ assert(num_src_bits <= num_dst_bits);
+ assert((src & ((1 << num_src_bits) - 1)) == src);
+
+ uint32_t dst = 0;
+ for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)
+ dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
+
+ return dst;
+ }
+
+ uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)
+ {
+ assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));
+ assert(val < get_ise_levels(ise_range));
+
+ uint32_t u = 0;
+
+ switch (ise_range)
+ {
+ case 5:
+ {
+ u = bit_replication_scale(val, 3, 8);
+ break;
+ }
+ case 8:
+ {
+ u = bit_replication_scale(val, 4, 8);
+ break;
+ }
+ case 11:
+ {
+ u = bit_replication_scale(val, 5, 8);
+ break;
+ }
+ case 14:
+ {
+ u = bit_replication_scale(val, 6, 8);
+ break;
+ }
+ case 17:
+ {
+ u = bit_replication_scale(val, 7, 8);
+ break;
+ }
+ case 20:
+ {
+ u = val;
+ break;
+ }
+ case 4:
+ case 6:
+ case 7:
+ case 9:
+ case 10:
+ case 12:
+ case 13:
+ case 15:
+ case 16:
+ case 18:
+ case 19:
+ {
+ const uint32_t num_bits = g_ise_range_table[ise_range][0];
+ const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
+ const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
+
+ // compute Table 103 row index
+ const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;
+
+ assert(range_index >= 0 && range_index <= 10);
+
+ uint32_t bits = val & ((1 << num_bits) - 1);
+ uint32_t tval = val >> num_bits;
+
+ assert(tval < (num_trits ? 3U : 5U));
+
+ uint32_t a = bits & 1;
+ uint32_t b = (bits >> 1) & 1;
+ uint32_t c = (bits >> 2) & 1;
+ uint32_t d = (bits >> 3) & 1;
+ uint32_t e = (bits >> 4) & 1;
+ uint32_t f = (bits >> 5) & 1;
+
+ uint32_t A = a ? 511 : 0;
+ uint32_t B = 0;
+
+ switch (range_index)
+ {
+ case 2:
+ {
+ // 876543210
+ // b000b0bb0
+ B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
+ break;
+ }
+ case 3:
+ {
+ // 876543210
+ // b0000bb00
+ B = (b << 2) | (b << 3) | (b << 8);
+ break;
+ }
+ case 4:
+ {
+ // 876543210
+ // cb000cbcb
+ B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);
+ break;
+ }
+ case 5:
+ {
+ // 876543210
+ // cb0000cbc
+ B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);
+ break;
+ }
+ case 6:
+ {
+ // 876543210
+ // dcb000dcb
+ B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);
+ break;
+ }
+ case 7:
+ {
+ // 876543210
+ // dcb0000dc
+ B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);
+ break;
+ }
+ case 8:
+ {
+ // 876543210
+ // edcb000ed
+ B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);
+ break;
+ }
+ case 9:
+ {
+ // 876543210
+ // edcb0000e
+ B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);
+ break;
+ }
+ case 10:
+ {
+ // 876543210
+ // fedcb000f
+ B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);
+ break;
+ }
+ default:
+ break;
+ }
+
+ static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
+ uint32_t C = C_vals[range_index];
+ uint32_t D = tval;
+
+ u = D * C + B;
+ u = u ^ A;
+ u = (A & 0x80) | (u >> 2);
+
+ break;
+ }
+ default:
+ {
+ assert(0);
+ break;
+ }
+ }
+
+ return u;
+ }
+
+ uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)
+ {
+ assert(val < get_ise_levels(ise_range));
+
+ uint32_t u = 0;
+ switch (ise_range)
+ {
+ case 0:
+ {
+ u = val ? 63 : 0;
+ break;
+ }
+ case 1: // 0-2
+ {
+ const uint8_t s_tab_0_2[3] = { 0, 32, 63 };
+ u = s_tab_0_2[val];
+ break;
+ }
+ case 2: // 0-3
+ {
+ u = bit_replication_scale(val, 2, 6);
+ break;
+ }
+ case 3: // 0-4
+ {
+ const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };
+ u = s_tab_0_4[val];
+ break;
+ }
+ case 5: // 0-7
+ {
+ u = bit_replication_scale(val, 3, 6);
+ break;
+ }
+ case 8: // 0-15
+ {
+ u = bit_replication_scale(val, 4, 6);
+ break;
+ }
+ case 11: // 0-31
+ {
+ u = bit_replication_scale(val, 5, 6);
+ break;
+ }
+ case 4: // 0-5
+ case 6: // 0-9
+ case 7: // 0-11
+ case 9: // 0-19
+ case 10: // 0-23
+ {
+ const uint32_t num_bits = g_ise_range_table[ise_range][0];
+ const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
+ const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
+
+ // compute Table 103 row index
+ const int range_index = num_bits * 2 + (num_quints ? 1 : 0);
+
+ // Extract bits and tris/quints from value
+ const uint32_t bits = val & ((1u << num_bits) - 1);
+ const uint32_t D = val >> num_bits;
+
+ assert(D < (num_trits ? 3U : 5U));
+
+ // Now dequantize
+ // See Table 103. ASTC weight unquantization parameters
+ static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };
+
+ const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;
+
+ const uint32_t A = (a == 0) ? 0 : 0x7F;
+
+ uint32_t B = 0;
+ if (range_index == 4)
+ B = ((b << 6) | (b << 2) | (b << 0));
+ else if (range_index == 5)
+ B = ((b << 6) | (b << 1));
+ else if (range_index == 6)
+ B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));
+
+ const uint32_t C = C_table[range_index - 2];
+
+ u = D * C + B;
+ u = u ^ A;
+ u = (A & 0x20) | (u >> 2);
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+
+ if (u > 32)
+ u++;
+
+ return u;
+ }
+
+ // Returns the nearest ISE symbol given a [0,255] endpoint value.
+ uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)
+ {
+ assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);
+
+ const uint32_t total_levels = get_ise_levels(ise_range);
+ int best_e = INT_MAX, best_index = 0;
+ for (uint32_t i = 0; i < total_levels; i++)
+ {
+ const int qv = dequant_bise_endpoint(i, ise_range);
+ int e = labs(v - qv);
+ if (e < best_e)
+ {
+ best_e = e;
+ best_index = i;
+ if (!best_e)
+ break;
+ }
+ }
+ return best_index;
+ }
+
+ // Returns the nearest ISE weight given a [0,64] endpoint value.
+ uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)
+ {
+ assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
+ assert(v <= (int)MAX_WEIGHT_VALUE);
+
+ const uint32_t total_levels = get_ise_levels(ise_range);
+ int best_e = INT_MAX, best_index = 0;
+ for (uint32_t i = 0; i < total_levels; i++)
+ {
+ const int qv = dequant_bise_weight(i, ise_range);
+ int e = labs(v - qv);
+ if (e < best_e)
+ {
+ best_e = e;
+ best_index = i;
+ if (!best_e)
+ break;
+ }
+ }
+ return best_index;
+ }
+
+ void create_quant_tables(
+ uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
+ uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
+ uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
+ uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
+ uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
+ bool weight_flag) // false if block endpoints, true if weights
+ {
+ const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256;
+
+ for (uint32_t i = 0; i < num_dequant_vals; i++)
+ {
+ uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);
+
+ if (pVal_to_ise)
+ pVal_to_ise[i] = (uint8_t)bise_index;
+
+ if (pISE_to_val)
+ pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);
+ }
+
+ if (pISE_to_rank || pRank_to_ISE)
+ {
+ const uint32_t num_levels = get_ise_levels(ise_range);
+
+ if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])
+ {
+ // Only bits
+ for (uint32_t i = 0; i < num_levels; i++)
+ {
+ if (pISE_to_rank)
+ pISE_to_rank[i] = (uint8_t)i;
+
+ if (pRank_to_ISE)
+ pRank_to_ISE[i] = (uint8_t)i;
+ }
+ }
+ else
+ {
+ // Range has trits or quints
+ uint32_t vals[256];
+ for (uint32_t i = 0; i < num_levels; i++)
+ {
+ uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);
+
+ // Low=ISE value
+ // High=dequantized value
+ vals[i] = (v << 16) | i;
+ }
+
+ // Sorts by dequantized value
+ std::sort(vals, vals + num_levels);
+
+ for (uint32_t rank = 0; rank < num_levels; rank++)
+ {
+ uint32_t ise_val = (uint8_t)vals[rank];
+
+ if (pISE_to_rank)
+ pISE_to_rank[ise_val] = (uint8_t)rank;
+
+ if (pRank_to_ISE)
+ pRank_to_ISE[rank] = (uint8_t)ise_val;
+ }
+ }
+ }
+ }
+
+ void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
+ {
+ uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
+ memset(pDst, 0xFF, 16);
+
+ pDst[0] = 0b11111100;
+ pDst[1] = 0b11111101;
+
+ pDst[8] = (uint8_t)rh;
+ pDst[9] = (uint8_t)(rh >> 8);
+ pDst[10] = (uint8_t)gh;
+ pDst[11] = (uint8_t)(gh >> 8);
+ pDst[12] = (uint8_t)bh;
+ pDst[13] = (uint8_t)(bh >> 8);
+ pDst[14] = (uint8_t)ah;
+ pDst[15] = (uint8_t)(ah >> 8);
+ }
+
+ // rh-ah are half-floats
+ void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
+ {
+ uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
+ memset(pDst, 0xFF, 16);
+
+ pDst[0] = 0b11111100;
+
+ pDst[8] = (uint8_t)rh;
+ pDst[9] = (uint8_t)(rh >> 8);
+ pDst[10] = (uint8_t)gh;
+ pDst[11] = (uint8_t)(gh >> 8);
+ pDst[12] = (uint8_t)bh;
+ pDst[13] = (uint8_t)(bh >> 8);
+ pDst[14] = (uint8_t)ah;
+ pDst[15] = (uint8_t)(ah >> 8);
+ }
+
+ bool is_cem_ldr(uint32_t mode)
+ {
+ switch (mode)
+ {
+ case CEM_LDR_LUM_DIRECT:
+ case CEM_LDR_LUM_BASE_PLUS_OFS:
+ case CEM_LDR_LUM_ALPHA_DIRECT:
+ case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
+ case CEM_LDR_RGB_BASE_SCALE:
+ case CEM_LDR_RGB_DIRECT:
+ case CEM_LDR_RGB_BASE_PLUS_OFFSET:
+ case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
+ case CEM_LDR_RGBA_DIRECT:
+ case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ bool is_valid_block_size(uint32_t w, uint32_t h)
+ {
+ assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM));
+ assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM));
+
+#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;
+ SIZECHK(4, 4);
+ SIZECHK(5, 4);
+
+ SIZECHK(5, 5);
+
+ SIZECHK(6, 5);
+ SIZECHK(6, 6);
+
+ SIZECHK(8, 5);
+ SIZECHK(8, 6);
+ SIZECHK(10, 5);
+ SIZECHK(10, 6);
+
+ SIZECHK(8, 8);
+ SIZECHK(10, 8);
+ SIZECHK(10, 10);
+
+ SIZECHK(12, 10);
+ SIZECHK(12, 12);
+#undef SIZECHK
+
+ return false;
+ }
+
+ bool block_has_any_hdr_cems(const log_astc_block& log_blk)
+ {
+ assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
+
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
+ return true;
+
+ return false;
+ }
+
+ bool block_has_any_ldr_cems(const log_astc_block& log_blk)
+ {
+ assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
+
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
+ return true;
+
+ return false;
+ }
+
+ dequant_tables g_dequant_tables;
+
+ void precompute_texel_partitions_4x4();
+
+ void init_tables(bool init_rank_tabs)
+ {
+ g_dequant_tables.init(init_rank_tabs);
+
+ precompute_texel_partitions_4x4();
+ }
+
+ struct weighted_sample
+ {
+ uint8_t m_src_x;
+ uint8_t m_src_y;
+ uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
+ };
+
+ static void compute_upsample_weights(
+ int block_width, int block_height,
+ int weight_grid_width, int weight_grid_height,
+ weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
+ {
+ const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);
+ const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);
+
+ for (int texelY = 0; texelY < block_height; texelY++)
+ {
+ for (int texelX = 0; texelX < block_width; texelX++)
+ {
+ const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;
+ const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;
+ const uint32_t jX = gX >> 4;
+ const uint32_t jY = gY >> 4;
+ const uint32_t fX = gX & 0xf;
+ const uint32_t fY = gY & 0xf;
+ const uint32_t w11 = (fX * fY + 8) >> 4;
+ const uint32_t w10 = fY - w11;
+ const uint32_t w01 = fX - w11;
+ const uint32_t w00 = 16 - fX - fY + w11;
+
+ weighted_sample& s = pWeights[texelX + texelY * block_width];
+ s.m_src_x = (uint8_t)jX;
+ s.m_src_y = (uint8_t)jY;
+ s.m_weights[0][0] = (uint8_t)w00;
+ s.m_weights[0][1] = (uint8_t)w01;
+ s.m_weights[1][0] = (uint8_t)w10;
+ s.m_weights[1][1] = (uint8_t)w11;
+ }
+ }
+ }
+
+ // Should be dequantized [0,64] weights
+ static void upsample_weight_grid(
+ uint32_t bx, uint32_t by, // destination/to dimension
+ uint32_t wx, uint32_t wy, // source/from dimension
+ const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
+ uint8_t* pDst_weights) // [by][bx]
+ {
+ assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
+ assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
+
+ const uint32_t total_src_weights = wx * wy;
+ const uint32_t total_dst_weights = bx * by;
+
+ if (total_src_weights == total_dst_weights)
+ {
+ memcpy(pDst_weights, pSrc_weights, total_src_weights);
+ return;
+ }
+
+ weighted_sample weights[12 * 12];
+ compute_upsample_weights(bx, by, wx, wy, weights);
+
+ const weighted_sample* pS = weights;
+
+ for (uint32_t y = 0; y < by; y++)
+ {
+ for (uint32_t x = 0; x < bx; x++, ++pS)
+ {
+ const uint32_t w00 = pS->m_weights[0][0];
+ const uint32_t w01 = pS->m_weights[0][1];
+ const uint32_t w10 = pS->m_weights[1][0];
+ const uint32_t w11 = pS->m_weights[1][1];
+
+ assert(w00 || w01 || w10 || w11);
+
+ const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;
+
+ uint32_t total = 8;
+ if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;
+ if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;
+ if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;
+ if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;
+
+ pDst_weights[x + y * bx] = (uint8_t)(total >> 4);
+ }
+ }
+ }
+
+ inline uint32_t hash52(uint32_t v)
+ {
+ uint32_t p = v;
+ p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
+ p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
+ p ^= p << 6; p ^= p >> 17;
+ return p;
+ }
+
+ int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
+ {
+ assert(zIn == 0);
+
+ const uint32_t x = small_block ? xIn << 1 : xIn;
+ const uint32_t y = small_block ? yIn << 1 : yIn;
+ const uint32_t z = small_block ? zIn << 1 : zIn;
+ const uint32_t seed = seedIn + 1024 * (num_partitions - 1);
+ const uint32_t rnum = hash52(seed);
+
+ uint8_t seed1 = (uint8_t)(rnum & 0xf);
+ uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);
+ uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);
+ uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);
+ uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);
+ uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);
+ uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);
+ uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);
+ uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);
+ uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);
+ uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);
+ uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
+
+ seed1 = (uint8_t)(seed1 * seed1);
+ seed2 = (uint8_t)(seed2 * seed2);
+ seed3 = (uint8_t)(seed3 * seed3);
+ seed4 = (uint8_t)(seed4 * seed4);
+ seed5 = (uint8_t)(seed5 * seed5);
+ seed6 = (uint8_t)(seed6 * seed6);
+ seed7 = (uint8_t)(seed7 * seed7);
+ seed8 = (uint8_t)(seed8 * seed8);
+ seed9 = (uint8_t)(seed9 * seed9);
+ seed10 = (uint8_t)(seed10 * seed10);
+ seed11 = (uint8_t)(seed11 * seed11);
+ seed12 = (uint8_t)(seed12 * seed12);
+
+ const int shA = (seed & 2) != 0 ? 4 : 5;
+ const int shB = (num_partitions == 3) ? 6 : 5;
+ const int sh1 = (seed & 1) != 0 ? shA : shB;
+ const int sh2 = (seed & 1) != 0 ? shB : shA;
+ const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
+
+ seed1 = (uint8_t)(seed1 >> sh1);
+ seed2 = (uint8_t)(seed2 >> sh2);
+ seed3 = (uint8_t)(seed3 >> sh1);
+ seed4 = (uint8_t)(seed4 >> sh2);
+ seed5 = (uint8_t)(seed5 >> sh1);
+ seed6 = (uint8_t)(seed6 >> sh2);
+ seed7 = (uint8_t)(seed7 >> sh1);
+ seed8 = (uint8_t)(seed8 >> sh2);
+ seed9 = (uint8_t)(seed9 >> sh3);
+ seed10 = (uint8_t)(seed10 >> sh3);
+ seed11 = (uint8_t)(seed11 >> sh3);
+ seed12 = (uint8_t)(seed12 >> sh3);
+
+ const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
+ const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
+ const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
+ const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
+
+ return (a >= b && a >= c && a >= d) ? 0
+ : (b >= c && b >= d) ? 1
+ : (c >= d) ? 2
+ : 3;
+ }
+
+ static uint32_t g_texel_partitions_4x4[1024][2];
+
+ void precompute_texel_partitions_4x4()
+ {
+ for (uint32_t p = 0; p < 1024; p++)
+ {
+ uint32_t v2 = 0, v3 = 0;
+
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ const uint32_t shift = x * 2 + y * 8;
+ v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift);
+ v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift);
+ }
+ }
+
+ g_texel_partitions_4x4[p][0] = v2;
+ g_texel_partitions_4x4[p][1] = v3;
+ }
+ }
+
+ static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
+ {
+ assert(g_texel_partitions_4x4[1][0]);
+ assert(seed < 1024);
+ assert((x <= 3) && (y <= 3));
+ assert((num_partitions >= 2) && (num_partitions <= 3));
+
+ const uint32_t shift = x * 2 + y * 8;
+ return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;
+ }
+
+ void blue_contract(
+ int r, int g, int b, int a,
+ int &dr, int &dg, int &db, int &da)
+ {
+ dr = (r + b) >> 1;
+ dg = (g + b) >> 1;
+ db = b;
+ da = a;
+ }
+
+ inline void bit_transfer_signed(int& a, int& b)
+ {
+ b >>= 1;
+ b |= (a & 0x80);
+ a >>= 1;
+ a &= 0x3F;
+ if ((a & 0x20) != 0)
+ a -= 0x40;
+ }
+
+ static inline int clamp(int a, int l, int h)
+ {
+ if (a < l)
+ a = l;
+ else if (a > h)
+ a = h;
+ return a;
+ }
+
+ static inline float clampf(float a, float l, float h)
+ {
+ if (a < l)
+ a = l;
+ else if (a > h)
+ a = h;
+ return a;
+ }
+
+ inline int sign_extend(int src, int num_src_bits)
+ {
+ assert((num_src_bits >= 2) && (num_src_bits <= 31));
+
+ const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
+ if (negative)
+ return src | ~((1 << num_src_bits) - 1);
+ else
+ return src & ((1 << num_src_bits) - 1);
+ }
+
+ // endpoints is [4][2]
+ void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)
+ {
+ assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);
+
+ int v0 = pE[0], v1 = pE[1];
+
+ int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];
+ int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];
+
+ switch (cem_index)
+ {
+ case CEM_LDR_LUM_DIRECT:
+ {
+ e0_r = v0; e1_r = v1;
+ e0_g = v0; e1_g = v1;
+ e0_b = v0; e1_b = v1;
+ e0_a = 0xFF; e1_a = 0xFF;
+ break;
+ }
+ case CEM_LDR_LUM_BASE_PLUS_OFS:
+ {
+ int l0 = (v0 >> 2) | (v1 & 0xc0);
+ int l1 = l0 + (v1 & 0x3f);
+
+ if (l1 > 0xFF)
+ l1 = 0xFF;
+
+ e0_r = l0; e1_r = l1;
+ e0_g = l0; e1_g = l1;
+ e0_b = l0; e1_b = l1;
+ e0_a = 0xFF; e1_a = 0xFF;
+ break;
+ }
+ case CEM_LDR_LUM_ALPHA_DIRECT:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ e0_r = v0; e1_r = v1;
+ e0_g = v0; e1_g = v1;
+ e0_b = v0; e1_b = v1;
+ e0_a = v2; e1_a = v3;
+ break;
+ }
+ case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ bit_transfer_signed(v1, v0);
+ bit_transfer_signed(v3, v2);
+
+ e0_r = v0; e1_r = v0 + v1;
+ e0_g = v0; e1_g = v0 + v1;
+ e0_b = v0; e1_b = v0 + v1;
+ e0_a = v2; e1_a = v2 + v3;
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
+ pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGB_BASE_SCALE:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ e0_r = (v0 * v3) >> 8; e1_r = v0;
+ e0_g = (v1 * v3) >> 8; e1_g = v1;
+ e0_b = (v2 * v3) >> 8; e1_b = v2;
+ e0_a = 0xFF; e1_a = 0xFF;
+
+ break;
+ }
+ case CEM_LDR_RGB_DIRECT:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ if ((v1 + v3 + v5) >= (v0 + v2 + v4))
+ {
+ e0_r = v0; e1_r = v1;
+ e0_g = v2; e1_g = v3;
+ e0_b = v4; e1_b = v5;
+ e0_a = 0xFF; e1_a = 0xFF;
+ }
+ else
+ {
+ blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGB_BASE_PLUS_OFFSET:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ bit_transfer_signed(v1, v0);
+ bit_transfer_signed(v3, v2);
+ bit_transfer_signed(v5, v4);
+
+ if ((v1 + v3 + v5) >= 0)
+ {
+ e0_r = v0; e1_r = v0 + v1;
+ e0_g = v2; e1_g = v2 + v3;
+ e0_b = v4; e1_b = v4 + v5;
+ e0_a = 0xFF; e1_a = 0xFF;
+ }
+ else
+ {
+ blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
+ pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ e0_r = (v0 * v3) >> 8; e1_r = v0;
+ e0_g = (v1 * v3) >> 8; e1_g = v1;
+ e0_b = (v2 * v3) >> 8; e1_b = v2;
+ e0_a = v4; e1_a = v5;
+
+ break;
+ }
+ case CEM_LDR_RGBA_DIRECT:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
+
+ if ((v1 + v3 + v5) >= (v0 + v2 + v4))
+ {
+ e0_r = v0; e1_r = v1;
+ e0_g = v2; e1_g = v3;
+ e0_b = v4; e1_b = v5;
+ e0_a = v6; e1_a = v7;
+ }
+ else
+ {
+ blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
+
+ bit_transfer_signed(v1, v0);
+ bit_transfer_signed(v3, v2);
+ bit_transfer_signed(v5, v4);
+ bit_transfer_signed(v7, v6);
+
+ if ((v1 + v3 + v5) >= 0)
+ {
+ e0_r = v0; e1_r = v0 + v1;
+ e0_g = v2; e1_g = v2 + v3;
+ e0_b = v4; e1_b = v4 + v5;
+ e0_a = v6; e1_a = v6 + v7;
+ }
+ else
+ {
+ blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
+ pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
+ }
+
+ break;
+ }
+ case CEM_HDR_LUM_LARGE_RANGE:
+ {
+ int y0, y1;
+ if (v1 >= v0)
+ {
+ y0 = (v0 << 4);
+ y1 = (v1 << 4);
+ }
+ else
+ {
+ y0 = (v1 << 4) + 8;
+ y1 = (v0 << 4) - 8;
+ }
+
+ e0_r = y0; e1_r = y1;
+ e0_g = y0; e1_g = y1;
+ e0_b = y0; e1_b = y1;
+ e0_a = 0x780; e1_a = 0x780;
+
+ break;
+ }
+ case CEM_HDR_LUM_SMALL_RANGE:
+ {
+ int y0, y1, d;
+
+ if ((v0 & 0x80) != 0)
+ {
+ y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
+ d = (v1 & 0x1F) << 2;
+ }
+ else
+ {
+ y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
+ d = (v1 & 0x0F) << 1;
+ }
+
+ y1 = y0 + d;
+ if (y1 > 0xFFF)
+ y1 = 0xFFF;
+
+ e0_r = y0; e1_r = y1;
+ e0_g = y0; e1_g = y1;
+ e0_b = y0; e1_b = y1;
+ e0_a = 0x780; e1_a = 0x780;
+
+ break;
+ }
+ case CEM_HDR_RGB_BASE_SCALE:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
+
+ int majcomp, mode;
+ if ((modeval & 0xC) != 0xC)
+ {
+ majcomp = modeval >> 2;
+ mode = modeval & 3;
+ }
+ else if (modeval != 0xF)
+ {
+ majcomp = modeval & 3;
+ mode = 4;
+ }
+ else
+ {
+ majcomp = 0;
+ mode = 5;
+ }
+
+ int red = v0 & 0x3f;
+ int green = v1 & 0x1f;
+ int blue = v2 & 0x1f;
+ int scale = v3 & 0x1f;
+
+ int x0 = (v1 >> 6) & 1;
+ int x1 = (v1 >> 5) & 1;
+ int x2 = (v2 >> 6) & 1;
+ int x3 = (v2 >> 5) & 1;
+ int x4 = (v3 >> 7) & 1;
+ int x5 = (v3 >> 6) & 1;
+ int x6 = (v3 >> 5) & 1;
+
+ int ohm = 1 << mode;
+ if (ohm & 0x30) green |= x0 << 6;
+ if (ohm & 0x3A) green |= x1 << 5;
+ if (ohm & 0x30) blue |= x2 << 6;
+ if (ohm & 0x3A) blue |= x3 << 5;
+ if (ohm & 0x3D) scale |= x6 << 5;
+ if (ohm & 0x2D) scale |= x5 << 6;
+ if (ohm & 0x04) scale |= x4 << 7;
+ if (ohm & 0x3B) red |= x4 << 6;
+ if (ohm & 0x04) red |= x3 << 6;
+ if (ohm & 0x10) red |= x5 << 7;
+ if (ohm & 0x0F) red |= x2 << 7;
+ if (ohm & 0x05) red |= x1 << 8;
+ if (ohm & 0x0A) red |= x0 << 8;
+ if (ohm & 0x05) red |= x0 << 9;
+ if (ohm & 0x02) red |= x6 << 9;
+ if (ohm & 0x01) red |= x3 << 10;
+ if (ohm & 0x02) red |= x5 << 10;
+
+ static const int s_shamts[6] = { 1,1,2,3,4,5 };
+
+ const int shamt = s_shamts[mode];
+ red <<= shamt;
+ green <<= shamt;
+ blue <<= shamt;
+ scale <<= shamt;
+
+ if (mode != 5)
+ {
+ green = red - green;
+ blue = red - blue;
+ }
+
+ if (majcomp == 1)
+ std::swap(red, green);
+
+ if (majcomp == 2)
+ std::swap(red, blue);
+
+ e1_r = clamp(red, 0, 0xFFF);
+ e1_g = clamp(green, 0, 0xFFF);
+ e1_b = clamp(blue, 0, 0xFFF);
+ e1_a = 0x780;
+
+ e0_r = clamp(red - scale, 0, 0xFFF);
+ e0_g = clamp(green - scale, 0, 0xFFF);
+ e0_b = clamp(blue - scale, 0, 0xFFF);
+ e0_a = 0x780;
+
+ break;
+ }
+ case CEM_HDR_RGB_HDR_ALPHA:
+ case CEM_HDR_RGB_LDR_ALPHA:
+ case CEM_HDR_RGB:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
+
+ e0_a = 0x780;
+ e1_a = 0x780;
+
+ if (majcomp == 3)
+ {
+ e0_r = v0 << 4;
+ e0_g = v2 << 4;
+ e0_b = (v4 & 0x7f) << 5;
+
+ e1_r = v1 << 4;
+ e1_g = v3 << 4;
+ e1_b = (v5 & 0x7f) << 5;
+ }
+ else
+ {
+ int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);
+ int va = v0 | ((v1 & 0x40) << 2);
+ int vb0 = v2 & 0x3f;
+ int vb1 = v3 & 0x3f;
+ int vc = v1 & 0x3f;
+ int vd0 = v4 & 0x7f;
+ int vd1 = v5 & 0x7f;
+
+ static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };
+ vd0 = sign_extend(vd0, s_dbitstab[mode]);
+ vd1 = sign_extend(vd1, s_dbitstab[mode]);
+
+ int x0 = (v2 >> 6) & 1;
+ int x1 = (v3 >> 6) & 1;
+ int x2 = (v4 >> 6) & 1;
+ int x3 = (v5 >> 6) & 1;
+ int x4 = (v4 >> 5) & 1;
+ int x5 = (v5 >> 5) & 1;
+
+ int ohm = 1 << mode;
+ if (ohm & 0xA4) va |= x0 << 9;
+ if (ohm & 0x08) va |= x2 << 9;
+ if (ohm & 0x50) va |= x4 << 9;
+ if (ohm & 0x50) va |= x5 << 10;
+ if (ohm & 0xA0) va |= x1 << 10;
+ if (ohm & 0xC0) va |= x2 << 11;
+ if (ohm & 0x04) vc |= x1 << 6;
+ if (ohm & 0xE8) vc |= x3 << 6;
+ if (ohm & 0x20) vc |= x2 << 7;
+ if (ohm & 0x5B) vb0 |= x0 << 6;
+ if (ohm & 0x5B) vb1 |= x1 << 6;
+ if (ohm & 0x12) vb0 |= x2 << 7;
+ if (ohm & 0x12) vb1 |= x3 << 7;
+
+ int shamt = (mode >> 1) ^ 3;
+ va = (uint32_t)va << shamt;
+ vb0 = (uint32_t)vb0 << shamt;
+ vb1 = (uint32_t)vb1 << shamt;
+ vc = (uint32_t)vc << shamt;
+ vd0 = (uint32_t)vd0 << shamt;
+ vd1 = (uint32_t)vd1 << shamt;
+
+ e1_r = clamp(va, 0, 0xFFF);
+ e1_g = clamp(va - vb0, 0, 0xFFF);
+ e1_b = clamp(va - vb1, 0, 0xFFF);
+
+ e0_r = clamp(va - vc, 0, 0xFFF);
+ e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);
+ e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);
+
+ if (majcomp == 1)
+ {
+ std::swap(e0_r, e0_g);
+ std::swap(e1_r, e1_g);
+ }
+ else if (majcomp == 2)
+ {
+ std::swap(e0_r, e0_b);
+ std::swap(e1_r, e1_b);
+ }
+ }
+
+ if (cem_index == CEM_HDR_RGB_LDR_ALPHA)
+ {
+ int v6 = pE[6], v7 = pE[7];
+
+ e0_a = v6;
+ e1_a = v7;
+ }
+ else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)
+ {
+ int v6 = pE[6], v7 = pE[7];
+
+ // Extract mode bits
+ int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
+ v6 &= 0x7F;
+ v7 &= 0x7F;
+
+ if (mode == 3)
+ {
+ e0_a = v6 << 5;
+ e1_a = v7 << 5;
+ }
+ else
+ {
+ v6 |= (v7 << (mode + 1)) & 0x780;
+ v7 &= (0x3F >> mode);
+ v7 ^= (0x20 >> mode);
+ v7 -= (0x20 >> mode);
+ v6 <<= (4 - mode);
+ v7 <<= (4 - mode);
+
+ v7 += v6;
+ v7 = clamp(v7, 0, 0xFFF);
+ e0_a = v6;
+ e1_a = v7;
+ }
+ }
+
+ break;
+ }
+ default:
+ {
+ assert(0);
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = 0;
+ pEndpoints[c][1] = 0;
+ }
+ break;
+ }
+ }
+ }
+
+ static inline bool is_half_inf_or_nan(half_float v)
+ {
+ return get_bits(v, 10, 14) == 31;
+ }
+
+ // This float->half conversion matches how "F32TO16" works on Intel GPU's.
+ half_float float_to_half(float val, bool toward_zero)
+ {
+ union { float f; int32_t i; uint32_t u; } fi = { val };
+ const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
+ int s = flt_s, e = 0, m = 0;
+
+ // inf/NaN
+ if (flt_e == 0xff)
+ {
+ e = 31;
+ if (flt_m != 0) // NaN
+ m = 1;
+ }
+ // not zero or denormal
+ else if (flt_e != 0)
+ {
+ int new_exp = flt_e - 127;
+ if (new_exp > 15)
+ e = 31;
+ else if (new_exp < -14)
+ {
+ if (toward_zero)
+ m = (int)truncf((1 << 24) * fabsf(fi.f));
+ else
+ m = lrintf((1 << 24) * fabsf(fi.f));
+ }
+ else
+ {
+ e = new_exp + 15;
+ if (toward_zero)
+ m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
+ else
+ m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
+ }
+ }
+
+ assert((0 <= m) && (m <= 1024));
+ if (m == 1024)
+ {
+ e++;
+ m = 0;
+ }
+
+ assert((s >= 0) && (s <= 1));
+ assert((e >= 0) && (e <= 31));
+ assert((m >= 0) && (m <= 1023));
+
+ half_float result = (half_float)((s << 15) | (e << 10) | m);
+ return result;
+ }
+
+ float half_to_float(half_float hval)
+ {
+ union { float f; uint32_t u; } x = { 0 };
+
+ uint32_t s = ((uint32_t)hval >> 15) & 1;
+ uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
+ uint32_t m = (uint32_t)hval & 0x3FF;
+
+ if (!e)
+ {
+ if (!m)
+ {
+ // +- 0
+ x.u = s << 31;
+ return x.f;
+ }
+ else
+ {
+ // denormalized
+ while (!(m & 0x00000400))
+ {
+ m <<= 1;
+ --e;
+ }
+
+ ++e;
+ m &= ~0x00000400;
+ }
+ }
+ else if (e == 31)
+ {
+ if (m == 0)
+ {
+ // +/- INF
+ x.u = (s << 31) | 0x7f800000;
+ return x.f;
+ }
+ else
+ {
+ // +/- NaN
+ x.u = (s << 31) | 0x7f800000 | (m << 13);
+ return x.f;
+ }
+ }
+
+ e = e + (127 - 15);
+ m = m << 13;
+
+ assert(s <= 1);
+ assert(m <= 0x7FFFFF);
+ assert(e <= 255);
+
+ x.u = m | (e << 23) | (s << 31);
+ return x.f;
+ }
+
+ static inline half_float qlog16_to_half(int k)
+ {
+ assert((k >= 0) && (k <= 0xFFFF));
+
+ int E = (k & 0xF800) >> 11;
+ int M = k & 0x7FF;
+
+ int Mt;
+ if (M < 512)
+ Mt = 3 * M;
+ else if (M >= 1536)
+ Mt = 5 * M - 2048;
+ else
+ Mt = 4 * M - 512;
+
+ return (half_float)((E << 10) + (Mt >> 3));
+ }
+
+ // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
+ const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
+ const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
+ const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);
+ const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);
+ //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));
+ const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));
+
+ void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)
+ {
+ int x = packed & 511;
+ int y = (packed >> 9) & 511;
+ int z = (packed >> 18) & 511;
+ int w = (packed >> 27) & 31;
+
+ const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
+
+ r = x * scale;
+ g = y * scale;
+ b = z * scale;
+ }
+
+ // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.
+ static inline int floor_log2(float x)
+ {
+ union float754
+ {
+ unsigned int raw;
+ float value;
+ };
+
+ float754 f;
+ f.value = x;
+ // Extract float exponent
+ return ((f.raw >> 23) & 0xFF) - 127;
+ }
+
+ static inline int maximumi(int a, int b) { return (a > b) ? a : b; }
+ static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
+
+ uint32_t pack_rgb9e5(float r, float g, float b)
+ {
+ r = clampf(r, 0.0f, MAX_RGB9E5);
+ g = clampf(g, 0.0f, MAX_RGB9E5);
+ b = clampf(b, 0.0f, MAX_RGB9E5);
+
+ float maxrgb = maximumf(maximumf(r, g), b);
+ int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
+ assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));
+
+ float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
+
+ int maxm = (int)floorf((maxrgb / denom) + 0.5f);
+ if (maxm == (MAX_RGB9E5_MANTISSA + 1))
+ {
+ denom *= 2;
+ exp_shared += 1;
+ assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
+ }
+ else
+ {
+ assert(maxm <= MAX_RGB9E5_MANTISSA);
+ }
+
+ int rm = (int)floorf((r / denom) + 0.5f);
+ int gm = (int)floorf((g / denom) + 0.5f);
+ int bm = (int)floorf((b / denom) + 0.5f);
+
+ assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));
+ assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));
+ assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));
+
+ return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);
+ }
+
+ static inline int clz17(uint32_t x)
+ {
+ assert(x <= 0x1FFFF);
+ x &= 0x1FFFF;
+
+ if (!x)
+ return 17;
+
+ uint32_t n = 0;
+ while ((x & 0x10000) == 0)
+ {
+ x <<= 1u;
+ n++;
+ }
+
+ return n;
+ }
+
+ static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)
+ {
+ int lz = clz17(Cr | Cg | Cb | 1);
+ if (Cr == 65535) { Cr = 65536; lz = 0; }
+ if (Cg == 65535) { Cg = 65536; lz = 0; }
+ if (Cb == 65535) { Cb = 65536; lz = 0; }
+ Cr <<= lz; Cg <<= lz; Cb <<= lz;
+ Cr = (Cr >> 8) & 0x1FF;
+ Cg = (Cg >> 8) & 0x1FF;
+ Cb = (Cb >> 8) & 0x1FF;
+ uint32_t exponent = 16 - lz;
+ uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;
+ return texel;
+ }
+
+ static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)
+ {
+ if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;
+ if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;
+ if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;
+ int Re = (Cr >> 10) & 0x1F;
+ int Ge = (Cg >> 10) & 0x1F;
+ int Be = (Cb >> 10) & 0x1F;
+ int Rex = (Re == 0) ? 1 : Re;
+ int Gex = (Ge == 0) ? 1 : Ge;
+ int Bex = (Be == 0) ? 1 : Be;
+ int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;
+ int Xe = Re | Ge | Be;
+ uint32_t rshift, gshift, bshift, expo;
+
+ if (Xe == 0)
+ {
+ expo = rshift = gshift = bshift = Xm;
+ }
+ else if (Re >= Ge && Re >= Be)
+ {
+ expo = Rex + 1;
+ rshift = 2;
+ gshift = Rex - Gex + 2;
+ bshift = Rex - Bex + 2;
+ }
+ else if (Ge >= Be)
+ {
+ expo = Gex + 1;
+ rshift = Gex - Rex + 2;
+ gshift = 2;
+ bshift = Gex - Bex + 2;
+ }
+ else
+ {
+ expo = Bex + 1;
+ rshift = Bex - Rex + 2;
+ gshift = Bex - Gex + 2;
+ bshift = 2;
+ }
+
+ int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);
+ int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);
+ int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);
+ Rm = (Rm >> rshift) & 0x1FF;
+ Gm = (Gm >> gshift) & 0x1FF;
+ Bm = (Bm >> bshift) & 0x1FF;
+
+ uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);
+ return texel;
+ }
+
+ // Important: pPixels is either 32-bit/texel or 64-bit/texel.
+ bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)
+ {
+ assert(is_valid_block_size(blk_width, blk_height));
+
+ assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
+ if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())
+ return false;
+
+ const uint32_t num_blk_pixels = blk_width * blk_height;
+
+ // Write block error color
+ if (dec_mode == cDecodeModeHDR16)
+ {
+ // NaN's
+ memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);
+ }
+ else if (dec_mode == cDecodeModeRGB9E5)
+ {
+ const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = purple_9e5;
+ }
+ else
+ {
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = 0xFFFF00FF;
+ }
+
+ if (log_blk.m_error_flag)
+ {
+ // Should this return false? It's not an invalid logical block config, though.
+ return false;
+ }
+
+ // Handle solid color blocks
+ if (log_blk.m_solid_color_flag_ldr)
+ {
+ // LDR solid block
+ if (dec_mode == cDecodeModeHDR16)
+ {
+ // Convert LDR pixels to half-float
+ half_float h[4];
+ for (uint32_t c = 0; c < 4; c++)
+ h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);
+ }
+ else if (dec_mode == cDecodeModeRGB9E5)
+ {
+ float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));
+ float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));
+ float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));
+
+ const uint32_t packed = pack_rgb9e5(r, g, b);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = packed;
+ }
+ else
+ {
+ // Convert LDR pixels to 8-bits
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ for (uint32_t c = 0; c < 4; c++)
+ ((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);
+ }
+
+ return true;
+ }
+ else if (log_blk.m_solid_color_flag_hdr)
+ {
+ // HDR solid block, decode mode must be half-float or RGB9E5
+ if (dec_mode == cDecodeModeHDR16)
+ {
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);
+ }
+ else if (dec_mode == cDecodeModeRGB9E5)
+ {
+ float r = half_to_float(log_blk.m_solid_color[0]);
+ float g = half_to_float(log_blk.m_solid_color[1]);
+ float b = half_to_float(log_blk.m_solid_color[2]);
+
+ const uint32_t packed = pack_rgb9e5(r, g, b);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = packed;
+ }
+ else
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ // Sanity check block's config
+ if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))
+ return false;
+ if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
+ return false;
+
+ if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
+ return false;
+ if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))
+ return false;
+ if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))
+ return false;
+ if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))
+ return false;
+ if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)
+ return false;
+ if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))
+ return false;
+ if (log_blk.m_color_component_selector > 3)
+ return false;
+
+ const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);
+ const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);
+
+ bool is_ldr_endpoints[MAX_PARTITIONS];
+
+ // Check CEM's
+ uint32_t total_cem_vals = 0;
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ {
+ if (log_blk.m_color_endpoint_modes[i] > 15)
+ return false;
+
+ total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);
+
+ is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);
+ }
+
+ if (total_cem_vals > MAX_ENDPOINTS)
+ return false;
+
+ const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
+ const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
+
+ // Dequantized endpoints to [0,255]
+ uint8_t dequantized_endpoints[MAX_ENDPOINTS];
+ for (uint32_t i = 0; i < total_cem_vals; i++)
+ {
+ if (log_blk.m_endpoints[i] >= total_endpoint_levels)
+ return false;
+ dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
+ }
+
+ // Dequantize weights to [0,64]
+ uint8_t dequantized_weights[2][12 * 12];
+
+ const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
+ const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
+
+ const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
+ for (uint32_t i = 0; i < total_weight_vals; i++)
+ {
+ if (log_blk.m_weights[i] >= total_weight_levels)
+ return false;
+
+ const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;
+ const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;
+
+ dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
+ }
+
+ // Upsample weight grid. [0,64] weights
+ uint8_t upsampled_weights[2][12 * 12];
+
+ upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);
+ if (log_blk.m_dual_plane)
+ upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);
+
+ // Decode CEM's
+ int endpoints[4][4][2]; // [subset][comp][l/h]
+
+ uint32_t endpoint_val_index = 0;
+ for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
+ {
+ const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];
+
+ decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
+
+ endpoint_val_index += get_num_cem_values(cem_index);
+ }
+
+ // Decode texels
+ const bool small_block = num_blk_pixels < 31;
+ const bool use_precomputed_texel_partitions = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
+ const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
+
+ bool success = true;
+
+ if (dec_mode == cDecodeModeRGB9E5)
+ {
+ // returns uint32_t's
+ for (uint32_t y = 0; y < blk_height; y++)
+ {
+ for (uint32_t x = 0; x < blk_width; x++)
+ {
+ const uint32_t pixel_index = x + y * blk_width;
+ const uint32_t subset = (log_blk.m_num_partitions > 1) ?
+ (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
+ : 0;
+
+ int comp[3];
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
+
+ if (is_ldr_endpoints[subset])
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
+
+ int le = endpoints[subset][c][0];
+ int he = endpoints[subset][c][1];
+
+ le = (le << 8) | le;
+ he = (he << 8) | he;
+
+ int k = weight_interpolate(le, he, w);
+ assert((k >= 0) && (k <= 0xFFFF));
+
+ comp[c] = k; // 1.0
+ }
+ else
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
+
+ int le = endpoints[subset][c][0] << 4;
+ int he = endpoints[subset][c][1] << 4;
+
+ int qlog16 = weight_interpolate(le, he, w);
+
+ comp[c] = qlog16_to_half(qlog16);
+
+ if (is_half_inf_or_nan((half_float)comp[c]))
+ comp[c] = 0x7BFF;
+ }
+
+ } // c
+
+ uint32_t packed;
+ if (is_ldr_endpoints[subset])
+ packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);
+ else
+ packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);
+
+ ((uint32_t*)pPixels)[pixel_index] = packed;
+
+ } // x
+ } // y
+ }
+ else if (dec_mode == cDecodeModeHDR16)
+ {
+ // Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)
+
+ // returns half floats
+ for (uint32_t y = 0; y < blk_height; y++)
+ {
+ for (uint32_t x = 0; x < blk_width; x++)
+ {
+ const uint32_t pixel_index = x + y * blk_width;
+ const uint32_t subset = (log_blk.m_num_partitions > 1) ?
+ (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
+ : 0;
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
+
+ half_float o;
+
+ if ( (is_ldr_endpoints[subset]) ||
+ ((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
+
+ int le = endpoints[subset][c][0];
+ int he = endpoints[subset][c][1];
+
+ le = (le << 8) | le;
+ he = (he << 8) | he;
+
+ int k = weight_interpolate(le, he, w);
+ assert((k >= 0) && (k <= 0xFFFF));
+
+ if (k == 0xFFFF)
+ o = 0x3C00; // 1.0
+ else
+ o = float_to_half((float)k * (1.0f / 65536.0f), true);
+ }
+ else
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
+
+ int le = endpoints[subset][c][0] << 4;
+ int he = endpoints[subset][c][1] << 4;
+
+ int qlog16 = weight_interpolate(le, he, w);
+
+ o = qlog16_to_half(qlog16);
+
+ if (is_half_inf_or_nan(o))
+ o = 0x7BFF;
+ }
+
+ ((half_float*)pPixels)[pixel_index * 4 + c] = o;
+ }
+
+ } // x
+ } // y
+ }
+ else
+ {
+ // returns uint8_t's
+ for (uint32_t y = 0; y < blk_height; y++)
+ {
+ for (uint32_t x = 0; x < blk_width; x++)
+ {
+ const uint32_t pixel_index = x + y * blk_width;
+
+ const uint32_t subset = (log_blk.m_num_partitions > 1) ?
+ (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
+ : 0;
+
+ if (!is_ldr_endpoints[subset])
+ {
+ ((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF;
+ success = false;
+ }
+ else
+ {
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
+
+ int le = endpoints[subset][c][0];
+ int he = endpoints[subset][c][1];
+
+ // FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder
+ //if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))
+ // See https://github.com/ARM-software/astc-encoder/issues/447
+ if (dec_mode == cDecodeModeSRGB8)
+ {
+ le = (le << 8) | 0x80;
+ he = (he << 8) | 0x80;
+ }
+ else
+ {
+ le = (le << 8) | le;
+ he = (he << 8) | he;
+ }
+
+ uint32_t k = weight_interpolate(le, he, w);
+
+ // FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does
+ // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16.
+ // It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.
+ ((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);
+ }
+ }
+
+ } // x
+ } // y
+ }
+
+ return success;
+ }
+
+ //------------------------------------------------
+ // Physical to logical block decoding
+
+ // unsigned 128-bit int, with some signed helpers
+ class uint128
+ {
+ uint64_t m_lo, m_hi;
+
+ public:
+ uint128() = default;
+ inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }
+ inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }
+ inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }
+
+ inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }
+ inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }
+
+ inline explicit operator uint8_t () const { return (uint8_t)m_lo; }
+ inline explicit operator uint16_t () const { return (uint16_t)m_lo; }
+ inline explicit operator uint32_t () const { return (uint32_t)m_lo; }
+ inline explicit operator uint64_t () const { return m_lo; }
+
+ inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }
+ inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }
+
+ inline uint64_t get_low() const { return m_lo; }
+ inline uint64_t& get_low() { return m_lo; }
+
+ inline uint64_t get_high() const { return m_hi; }
+ inline uint64_t& get_high() { return m_hi; }
+
+ inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }
+ inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }
+
+ inline bool operator< (const uint128& rhs) const
+ {
+ if (m_hi < rhs.m_hi)
+ return true;
+
+ if (m_hi == rhs.m_hi)
+ {
+ if (m_lo < rhs.m_lo)
+ return true;
+ }
+
+ return false;
+ }
+
+ inline bool operator> (const uint128& rhs) const { return (rhs < *this); }
+
+ inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }
+ inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }
+
+ inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }
+ inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }
+ inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }
+ inline explicit operator bool() const { return is_non_zero(); }
+ inline bool is_signed() const { return ((int64_t)m_hi) < 0; }
+
+ inline bool signed_less(const uint128& rhs) const
+ {
+ const bool l_signed = is_signed(), r_signed = rhs.is_signed();
+
+ if (l_signed == r_signed)
+ return *this < rhs;
+
+ if (l_signed && !r_signed)
+ return true;
+
+ assert(!l_signed && r_signed);
+ return false;
+ }
+
+ inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }
+ inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }
+ inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }
+
+ double get_double() const
+ {
+ double res = 0;
+
+ if (m_hi)
+ res = (double)m_hi * pow(2.0f, 64.0f);
+
+ res += (double)m_lo;
+
+ return res;
+ }
+
+ double get_signed_double() const
+ {
+ if (is_signed())
+ return -(uint128(*this).abs().get_double());
+ else
+ return get_double();
+ }
+
+ inline uint128 abs() const
+ {
+ uint128 res(*this);
+ if (res.is_signed())
+ res = -res;
+ return res;
+ }
+
+ inline uint128& operator<<= (int shift)
+ {
+ assert(shift >= 0);
+ if (shift < 0)
+ return *this;
+
+ m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);
+
+ if ((shift) && (shift < 64))
+ m_hi |= (m_lo >> (64 - shift));
+
+ m_lo = (shift >= 64) ? 0 : (m_lo << shift);
+
+ return *this;
+ }
+
+ inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }
+
+ inline uint128& operator>>= (int shift)
+ {
+ assert(shift >= 0);
+ if (shift < 0)
+ return *this;
+
+ m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);
+
+ if ((shift) && (shift < 64))
+ m_lo |= (m_hi << (64 - shift));
+
+ m_hi = (shift >= 64) ? 0 : (m_hi >> shift);
+
+ return *this;
+ }
+
+ inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }
+
+ inline uint128 signed_shift_right(int shift) const
+ {
+ uint128 res(*this);
+ res >>= shift;
+
+ if (is_signed())
+ {
+ uint128 x(0U);
+ x = ~x;
+ x >>= shift;
+ res |= (~x);
+ }
+
+ return res;
+ }
+
+ inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }
+ inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }
+
+ inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }
+ inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; }
+
+ inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }
+ inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; }
+
+ inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }
+
+ inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }
+
+ // prefix
+ inline uint128 operator ++()
+ {
+ if (++m_lo == 0)
+ ++m_hi;
+ return *this;
+ }
+
+ // postfix
+ inline uint128 operator ++(int)
+ {
+ uint128 res(*this);
+ if (++m_lo == 0)
+ ++m_hi;
+ return res;
+ }
+
+ // prefix
+ inline uint128 operator --()
+ {
+ const uint64_t t = m_lo;
+ if (--m_lo > t)
+ --m_hi;
+ return *this;
+ }
+
+ // postfix
+ inline uint128 operator --(int)
+ {
+ const uint64_t t = m_lo;
+ uint128 res(*this);
+ if (--m_lo > t)
+ --m_hi;
+ return res;
+ }
+
+ inline uint128& operator+= (const uint128& rhs)
+ {
+ const uint64_t t = m_lo + rhs.m_lo;
+ m_hi = m_hi + rhs.m_hi + (t < m_lo);
+ m_lo = t;
+ return *this;
+ }
+
+ inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }
+
+ inline uint128& operator-= (const uint128& rhs)
+ {
+ const uint64_t t = m_lo - rhs.m_lo;
+ m_hi = m_hi - rhs.m_hi - (t > m_lo);
+ m_lo = t;
+ return *this;
+ }
+
+ inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }
+
+ // computes bit by bit, very slow
+ uint128& operator*=(const uint128& rhs)
+ {
+ uint128 temp(*this), result(0U);
+
+ for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)
+ if (bitmask.get_low() & 1)
+ result += temp;
+
+ *this = result;
+ return *this;
+ }
+
+ uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }
+
+ // computes bit by bit, very slow
+ friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)
+ {
+ remainder = 0;
+
+ if (!divisor)
+ {
+ assert(0);
+ return ~uint128(0U);
+ }
+
+ uint128 quotient(0), one(1);
+
+ for (int i = 127; i >= 0; i--)
+ {
+ remainder = (remainder << 1) | ((dividend >> i) & one);
+ if (remainder >= divisor)
+ {
+ remainder -= divisor;
+ quotient |= (one << i);
+ }
+ }
+
+ return quotient;
+ }
+
+ uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }
+ uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }
+
+ uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }
+ uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }
+
+ void print_hex(FILE* pFile) const
+ {
+ fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);
+ }
+
+ void format_unsigned(std::string& res) const
+ {
+ basisu::vector<uint8_t> digits;
+ digits.reserve(39 + 1);
+
+ uint128 k(*this), ten(10);
+ do
+ {
+ uint128 r;
+ k = divide(k, ten, r);
+ digits.push_back((uint8_t)r);
+ } while (k);
+
+ for (int i = (int)digits.size() - 1; i >= 0; i--)
+ res += ('0' + digits[i]);
+ }
+
+ void format_signed(std::string& res) const
+ {
+ uint128 val(*this);
+
+ if (val.is_signed())
+ {
+ res.push_back('-');
+ val = -val;
+ }
+
+ val.format_unsigned(res);
+ }
+
+ void print_unsigned(FILE* pFile)
+ {
+ std::string str;
+ format_unsigned(str);
+ fprintf(pFile, "%s", str.c_str());
+ }
+
+ void print_signed(FILE* pFile)
+ {
+ std::string str;
+ format_signed(str);
+ fprintf(pFile, "%s", str.c_str());
+ }
+
+ uint128 get_reversed_bits() const
+ {
+ uint128 res;
+
+ const uint32_t* pSrc = (const uint32_t*)this;
+ uint32_t* pDst = (uint32_t*)&res;
+
+ pDst[0] = rev_dword(pSrc[3]);
+ pDst[1] = rev_dword(pSrc[2]);
+ pDst[2] = rev_dword(pSrc[1]);
+ pDst[3] = rev_dword(pSrc[0]);
+
+ return res;
+ }
+
+ uint128 get_byteswapped() const
+ {
+ uint128 res;
+
+ const uint8_t* pSrc = (const uint8_t*)this;
+ uint8_t* pDst = (uint8_t*)&res;
+
+ for (uint32_t i = 0; i < 16; i++)
+ pDst[i] = pSrc[15 - i];
+
+ return res;
+ }
+
+ inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const
+ {
+ assert(bit_ofs < 128);
+ assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));
+
+ uint128 res(*this);
+ res >>= bit_ofs;
+
+ const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);
+ return res.get_low() & bitmask;
+ }
+
+ inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const
+ {
+ assert(bit_len <= 32);
+ return (uint32_t)get_bits64(bit_ofs, bit_len);
+ }
+
+ inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const
+ {
+ assert(len && (len <= 32));
+ uint32_t x = get_bits(bit_ofs, len);
+ bit_ofs += len;
+ return x;
+ }
+
+ inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)
+ {
+ assert(bit_ofs < 128);
+ assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));
+
+ uint128 bitmask(1);
+ bitmask = (bitmask << num_bits) - 1;
+ assert(uint128(val) <= bitmask);
+
+ bitmask <<= bit_ofs;
+ *this &= ~bitmask;
+
+ *this = *this | (uint128(val) << bit_ofs);
+ return *this;
+ }
+ };
+
+ static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)
+ {
+ if (bits.get_bits(10, 2) != 0b11)
+ return false;
+
+ uint32_t bit_ofs = 12;
+ const uint32_t min_s = bits.next_bits(bit_ofs, 13);
+ const uint32_t max_s = bits.next_bits(bit_ofs, 13);
+ const uint32_t min_t = bits.next_bits(bit_ofs, 13);
+ const uint32_t max_t = bits.next_bits(bit_ofs, 13);
+ assert(bit_ofs == 64);
+
+ const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);
+
+ if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))
+ return false;
+
+ const bool hdr_flag = bits.get_bits(9, 1) != 0;
+
+ if (hdr_flag)
+ log_blk.m_solid_color_flag_hdr = true;
+ else
+ log_blk.m_solid_color_flag_ldr = true;
+
+ log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);
+ log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);
+ log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);
+ log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);
+
+ if (log_blk.m_solid_color_flag_hdr)
+ {
+ for (uint32_t c = 0; c < 4; c++)
+ if (is_half_inf_or_nan(log_blk.m_solid_color[c]))
+ return false;
+ }
+
+ return true;
+ }
+
+ struct astc_dec_row
+ {
+ int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
+ };
+
+ static const astc_dec_row s_dec_rows[10] =
+ {
+ // Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
+ { 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2
+ { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2
+ { 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8
+ { 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6
+
+ { 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2
+ { 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2
+ { 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12
+ { 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10
+
+ { 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6
+ { -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6
+ };
+
+ static bool decode_config(const uint128& bits, log_astc_block& log_blk)
+ {
+ // Reserved
+ if (bits.get_bits(0, 4) == 0)
+ return false;
+
+ // Reserved
+ if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))
+ {
+ if (bits.get_bits(2, 4) != 0b1111)
+ return false;
+ }
+
+ // Void extent
+ if (bits.get_bits(0, 9) == 0b111111100)
+ return decode_void_extent(bits, log_blk);
+
+ // Check rows
+ const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);
+ const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);
+ const uint32_t x7_2 = bits.get_bits(7, 2);
+
+ int row_index = -1;
+ if (x0_2 == 0)
+ {
+ if (x7_2 == 0b00)
+ row_index = 5;
+ else if (x7_2 == 0b01)
+ row_index = 6;
+ else if (x5_4 == 0b1100)
+ row_index = 7;
+ else if (x5_4 == 0b1101)
+ row_index = 8;
+ else if (x7_2 == 0b10)
+ row_index = 9;
+ }
+ else
+ {
+ if (x2_2 == 0b00)
+ row_index = 0;
+ else if (x2_2 == 0b01)
+ row_index = 1;
+ else if (x2_2 == 0b10)
+ row_index = 2;
+ else if ((x2_2 == 0b11) && (x8_1 == 0))
+ row_index = 3;
+ else if ((x2_2 == 0b11) && (x8_1 == 1))
+ row_index = 4;
+ }
+ if (row_index < 0)
+ return false;
+
+ const astc_dec_row& r = s_dec_rows[row_index];
+
+ bool P = false, Dp = false;
+ uint32_t W = r.W_bias, H = r.H_bias;
+
+ if (r.P_ofs >= 0)
+ P = bits.get_bits(r.P_ofs, 1) != 0;
+
+ if (r.Dp_ofs >= 0)
+ Dp = bits.get_bits(r.Dp_ofs, 1) != 0;
+
+ if (r.W_size)
+ W += bits.get_bits(r.W_ofs, r.W_size);
+
+ if (r.H_size)
+ H += bits.get_bits(r.H_ofs, r.H_size);
+
+ assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));
+ assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));
+
+ int p0 = bits.get_bits(r.p0_ofs, 1);
+ int p1 = bits.get_bits(r.p1_ofs, 1);
+ int p2 = bits.get_bits(r.p2_ofs, 1);
+
+ uint32_t p = p0 | (p1 << 1) | (p2 << 2);
+ if (p < 2)
+ return false;
+
+ log_blk.m_grid_width = W;
+ log_blk.m_grid_height = H;
+
+ log_blk.m_weight_ise_range = (p - 2) + (P * BISE_10_LEVELS);
+ assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
+
+ log_blk.m_dual_plane = Dp;
+
+ return true;
+ }
+
+ static inline uint32_t read_le_dword(const uint8_t* pBytes)
+ {
+ return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);
+ }
+
+ // See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.
+ static const uint8_t s_trit_decode[256][5] =
+ {
+ {0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},
+ {0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},
+ {0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},
+ {0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},
+ {0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},
+ {0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},
+ {0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},
+ {0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},
+ {0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},
+ {0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},
+ {0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},
+ {0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},
+ {0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},
+ {0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},
+ {0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},
+ {0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},
+ {0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},
+ {0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},
+ {0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},
+ {0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},
+ {0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},
+ {0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},
+ {0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},
+ {0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},
+ {0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},
+ {0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},
+ {0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},
+ {0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},
+ {0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},
+ {0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},
+ {0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},
+ {0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}
+ };
+
+ static const uint8_t s_quint_decode[128][3] =
+ {
+ {0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},
+ {0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},
+ {0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},
+ {0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},
+ {0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},
+ {0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},
+ {0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},
+ {0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},
+ {0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},
+ {0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},
+ {0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},
+ {0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},
+ {0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},
+ {0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},
+ {0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},
+ {0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}
+ };
+
+ static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
+ {
+ assert((num_vals >= 1) && (num_vals <= 5));
+ uint32_t m[5] = { 0 }, T = 0;
+
+ static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };
+
+ for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
+ {
+ if (bits_per_val)
+ m[c] = bits.next_bits(bit_ofs, bits_per_val);
+ T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
+ T_ofs += s_t_bits[c];
+ }
+
+ const uint8_t (&p_trits)[5] = s_trit_decode[T];
+
+ for (uint32_t i = 0; i < num_vals; i++)
+ pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);
+ }
+
+ static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
+ {
+ assert((num_vals >= 1) && (num_vals <= 3));
+ uint32_t m[3] = { 0 }, T = 0;
+
+ static const uint8_t s_t_bits[3] = { 3, 2, 2 };
+
+ for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
+ {
+ if (bits_per_val)
+ m[c] = bits.next_bits(bit_ofs, bits_per_val);
+ T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
+ T_ofs += s_t_bits[c];
+ }
+
+ const uint8_t (&p_quints)[3] = s_quint_decode[T];
+
+ for (uint32_t i = 0; i < num_vals; i++)
+ pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);
+ }
+
+ static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)
+ {
+ assert(num_vals && (ise_range < TOTAL_ISE_RANGES));
+
+ const uint32_t bits_per_val = g_ise_range_table[ise_range][0];
+
+ if (g_ise_range_table[ise_range][1])
+ {
+ // Trits+bits, 5 vals per block, 7 bits extra per block
+ const uint32_t total_blocks = (num_vals + 4) / 5;
+ for (uint32_t b = 0; b < total_blocks; b++)
+ {
+ const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);
+ decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
+ }
+ }
+ else if (g_ise_range_table[ise_range][2])
+ {
+ // Quints+bits, 3 vals per block, 8 bits extra per block
+ const uint32_t total_blocks = (num_vals + 2) / 3;
+ for (uint32_t b = 0; b < total_blocks; b++)
+ {
+ const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);
+ decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
+ }
+ }
+ else
+ {
+ assert(bits_per_val);
+
+ // Only bits
+ for (uint32_t i = 0; i < num_vals; i++)
+ pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);
+ }
+ }
+
+ void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)
+ {
+ const uint128 bits(
+ (uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),
+ (uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));
+
+ return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);
+ }
+
+ // Decodes a physical ASTC block to a logical ASTC block.
+ // blk_width/blk_height are only used to validate the weight grid's dimensions.
+ bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)
+ {
+ assert(is_valid_block_size(blk_width, blk_height));
+
+ const uint8_t* pS = (uint8_t*)pASTC_block;
+
+ log_blk.clear();
+ log_blk.m_error_flag = true;
+
+ const uint128 bits(
+ (uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),
+ (uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));
+
+ const uint128 rev_bits(bits.get_reversed_bits());
+
+ if (!decode_config(bits, log_blk))
+ return false;
+
+ if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)
+ {
+ // Void extent
+ log_blk.m_error_flag = false;
+ return true;
+ }
+
+ // Check grid dimensions
+ if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
+ return false;
+
+ // Now we have the grid width/height, dual plane, weight ISE range
+
+ const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);
+ const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);
+
+ // 18.24 Illegal Encodings
+ if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
+ return false;
+
+ const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;
+
+ uint32_t total_extra_bits = 0;
+
+ // Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.
+
+ log_blk.m_num_partitions = bits.get_bits(11, 2) + 1;
+ if (log_blk.m_num_partitions == 1)
+ log_blk.m_color_endpoint_modes[0] = bits.get_bits(13, 4); // read CEM bits
+ else
+ {
+ // 2 or more partitions
+ if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
+ return false;
+
+ log_blk.m_partition_id = bits.get_bits(13, 10);
+
+ uint32_t cem_bits = bits.get_bits(23, 6);
+
+ if ((cem_bits & 3) == 0)
+ {
+ // All CEM's the same
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ log_blk.m_color_endpoint_modes[i] = cem_bits >> 2;
+ }
+ else
+ {
+ // CEM's different, but within up to 2 adjacent classes
+ const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;
+
+ total_extra_bits = 3 * log_blk.m_num_partitions - 4;
+
+ if ((total_weight_bits + total_extra_bits) > 128)
+ return false;
+
+ uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
+
+ uint32_t c[4] = { 0 }, m[4] = { 0 };
+
+ cem_bits >>= 2;
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)
+ c[i] = cem_bits & 1;
+
+ switch (log_blk.m_num_partitions)
+ {
+ case 2:
+ {
+ m[0] = cem_bits & 3;
+ m[1] = bits.next_bits(cem_bit_pos, 2);
+ break;
+ }
+ case 3:
+ {
+ m[0] = cem_bits & 1;
+ m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);
+ m[1] = bits.next_bits(cem_bit_pos, 2);
+ m[2] = bits.next_bits(cem_bit_pos, 2);
+ break;
+ }
+ case 4:
+ {
+ for (uint32_t i = 0; i < 4; i++)
+ m[i] = bits.next_bits(cem_bit_pos, 2);
+ break;
+ }
+ default:
+ {
+ assert(0);
+ break;
+ }
+ }
+
+ assert(cem_bit_pos == end_of_weight_bit_ofs);
+
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ {
+ log_blk.m_color_endpoint_modes[i] = first_cem_index + (c[i] * 4) + m[i];
+ assert(log_blk.m_color_endpoint_modes[i] <= 15);
+ }
+ }
+ }
+
+ // Now we have all the CEM indices.
+
+ if (log_blk.m_dual_plane)
+ {
+ // Read CCS bits, beneath any CEM bits
+ total_extra_bits += 2;
+
+ if (total_extra_bits > end_of_weight_bit_ofs)
+ return false;
+
+ uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
+ log_blk.m_color_component_selector = bits.get_bits(ccs_bit_pos, 2);
+ }
+
+ uint32_t config_bit_pos = 11 + 2; // config+num_parts
+ if (log_blk.m_num_partitions == 1)
+ config_bit_pos += 4; // CEM bits
+ else
+ config_bit_pos += 10 + 6; // part_id+CEM bits
+
+ // config+num_parts+total_extra_bits (CEM extra+CCS)
+ uint32_t total_config_bits = config_bit_pos + total_extra_bits;
+
+ // Compute number of remaining bits in block
+ const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
+ if (num_remaining_bits < 0)
+ return false;
+
+ // Compute total number of ISE encoded color endpoint mode values
+ uint32_t total_cem_vals = 0;
+ for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)
+ total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);
+
+ if (total_cem_vals > MAX_ENDPOINTS)
+ return false;
+
+ // Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block
+ int endpoint_ise_range = -1;
+ for (int k = 20; k > 0; k--)
+ {
+ int b = get_ise_sequence_bits(total_cem_vals, k);
+ if (b <= num_remaining_bits)
+ {
+ endpoint_ise_range = k;
+ break;
+ }
+ }
+
+ // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
+ if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
+ return false;
+
+ log_blk.m_endpoint_ise_range = endpoint_ise_range;
+
+ // Decode endpoints forwards in block
+ decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
+
+ // Decode grid weights backwards in block
+ decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);
+
+ log_blk.m_error_flag = false;
+
+ return true;
+ }
+
+} // namespace astc_helpers
+
+#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION
diff --git a/thirdparty/basis_universal/transcoder/basisu_containers.h b/thirdparty/basis_universal/transcoder/basisu_containers.h
index d3e14369ba..bfc51bb499 100644
--- a/thirdparty/basis_universal/transcoder/basisu_containers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers.h
@@ -188,8 +188,9 @@ namespace basisu
#define BASISU_IS_SCALAR_TYPE(T) (scalar_type<T>::cFlag)
-#if defined(__GNUC__) && __GNUC__<5
- #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#if !defined(BASISU_HAVE_STD_TRIVIALLY_COPYABLE) && defined(__GNUC__) && __GNUC__<5
+ //#define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+ #define BASISU_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__)
#else
#define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
#endif
@@ -286,8 +287,19 @@ namespace basisu
if (BASISU_IS_BITWISE_COPYABLE(T))
{
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
if ((m_p) && (other.m_p))
memcpy(m_p, other.m_p, m_size * sizeof(T));
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
}
else
{
@@ -330,8 +342,19 @@ namespace basisu
if (BASISU_IS_BITWISE_COPYABLE(T))
{
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
if ((m_p) && (other.m_p))
memcpy(m_p, other.m_p, other.m_size * sizeof(T));
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
}
else
{
@@ -501,7 +524,7 @@ namespace basisu
if (new_capacity > m_capacity)
{
- if (!increase_capacity(new_capacity, false))
+ if (!increase_capacity(new_capacity, false, true))
return false;
}
else if (new_capacity < m_capacity)
@@ -509,7 +532,8 @@ namespace basisu
// Must work around the lack of a "decrease_capacity()" method.
// This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize.
vector tmp;
- tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false);
+ if (!tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false, true))
+ return false;
tmp = *this;
swap(tmp);
}
@@ -750,7 +774,21 @@ namespace basisu
}
// Copy "down" the objects to preserve, filling in the empty slots.
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
+
memmove(pDst, pSrc, num_to_move * sizeof(T));
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
}
else
{
@@ -1003,7 +1041,21 @@ namespace basisu
inline void set_all(const T& o)
{
if ((sizeof(T) == 1) && (scalar_type<T>::cFlag))
+ {
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
memset(m_p, *reinterpret_cast<const uint8_t*>(&o), m_size);
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
+ }
else
{
T* pDst = m_p;
@@ -1029,7 +1081,7 @@ namespace basisu
// Important: This method is used in Basis Universal. If you change how this container allocates memory, you'll need to change any users of this method.
inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity)
{
- // To to prevent the caller from obviously shooting themselves in the foot.
+ // To prevent the caller from obviously shooting themselves in the foot.
if (((p + capacity) > m_p) && (p < (m_p + m_capacity)))
{
// Can grant ownership of a block inside the container itself!
diff --git a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
index d5cb61569b..60c0b3d89f 100644
--- a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
@@ -19,23 +19,30 @@ namespace basisu
if (m_capacity >= min_new_capacity)
return true;
- size_t new_capacity = min_new_capacity;
- if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity)))
- {
- new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity);
-
- assert(new_capacity && (new_capacity > m_capacity));
+ uint64_t new_capacity_u64 = min_new_capacity;
+ if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64)))
+ new_capacity_u64 = helpers::next_pow2(new_capacity_u64);
- if (new_capacity < min_new_capacity)
- {
- if (nofail)
- return false;
- fprintf(stderr, "vector too large\n");
- abort();
- }
+ size_t new_capacity = (size_t)new_capacity_u64;
+ if (new_capacity != new_capacity_u64)
+ {
+ if (nofail)
+ return false;
+ fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
+ abort();
}
- const size_t desired_size = element_size * new_capacity;
+ const uint64_t desired_size_u64 = (uint64_t)element_size * new_capacity;
+
+ const size_t desired_size = (size_t)desired_size_u64;
+ if (desired_size_u64 != desired_size)
+ {
+ if (nofail)
+ return false;
+ fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
+ abort();
+ }
+
size_t actual_size = 0;
if (!pMover)
{
@@ -46,11 +53,7 @@ namespace basisu
return false;
char buf[256];
-#ifdef _MSC_VER
- sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
- sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+ snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size);
fprintf(stderr, "%s", buf);
abort();
}
@@ -75,11 +78,7 @@ namespace basisu
return false;
char buf[256];
-#ifdef _MSC_VER
- sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
- sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+ snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size);
fprintf(stderr, "%s", buf);
abort();
}
diff --git a/thirdparty/basis_universal/transcoder/basisu_file_headers.h b/thirdparty/basis_universal/transcoder/basisu_file_headers.h
index 4316d738e6..d29e3feb03 100644
--- a/thirdparty/basis_universal/transcoder/basisu_file_headers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_file_headers.h
@@ -1,5 +1,5 @@
// basis_file_headers.h
-// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -89,7 +89,8 @@ namespace basist
enum class basis_tex_format
{
cETC1S = 0,
- cUASTC4x4 = 1
+ cUASTC4x4 = 1,
+ cUASTC_HDR_4x4 = 2
};
struct basis_file_header
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
index c698861f3b..32018cd282 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
@@ -1,5 +1,5 @@
// basisu_transcoder.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -17,6 +17,11 @@
#include <limits.h>
#include "basisu_containers_impl.h"
+#define BASISU_ASTC_HELPERS_IMPLEMENTATION
+#include "basisu_astc_helpers.h"
+
+#include "basisu_astc_hdr_core.h"
+
#ifndef BASISD_IS_BIG_ENDIAN
// TODO: This doesn't work on OSX. How can this be so difficult?
//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
@@ -139,6 +144,10 @@
#endif
#endif
+#ifndef BASISD_SUPPORT_UASTC_HDR
+ #define BASISD_SUPPORT_UASTC_HDR 1
+#endif
+
#define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0
#define BASISD_WRITE_NEW_DXT1_TABLES 0
#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0
@@ -1908,17 +1917,24 @@ namespace basist
void basisu_transcoder_init()
{
if (g_transcoder_initialized)
- {
- BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
+ {
+ BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
return;
- }
+ }
- BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
+ BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
#if BASISD_SUPPORT_UASTC
uastc_init();
#endif
+#if BASISD_SUPPORT_UASTC_HDR
+ // TODO: Examine this, optimize for startup time/mem utilization.
+ astc_helpers::init_tables(false);
+
+ astc_hdr_core_init();
+#endif
+
#if BASISD_SUPPORT_ASTC
transcoder_init_astc();
#endif
@@ -2027,6 +2043,10 @@ namespace basist
transcoder_init_pvrtc2();
#endif
+#if BASISD_SUPPORT_UASTC_HDR
+ bc6h_enc_init();
+#endif
+
g_transcoder_initialized = true;
}
@@ -6928,7 +6948,7 @@ namespace basist
static inline int sq(int x) { return x * x; }
- // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0.
+ // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0.
// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha!
// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
@@ -7515,6 +7535,8 @@ namespace basist
}
#endif // BASISD_SUPPORT_PVRTC2
+ //------------------------------------------------------------------------------------------------
+
basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
m_pGlobal_codebook(nullptr),
m_selector_history_buf_size(0)
@@ -8620,7 +8642,7 @@ namespace basist
// Now make sure the output buffer is large enough, or we'll overwrite memory.
if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
return false;
}
}
@@ -8632,7 +8654,7 @@ namespace basist
if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
return false;
}
}
@@ -8640,7 +8662,7 @@ namespace basist
{
if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
return false;
}
}
@@ -9242,13 +9264,17 @@ namespace basist
return status;
}
+
+ //------------------------------------------------------------------------------------------------
basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
{
}
- bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
- uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+ bool basisu_lowlevel_uastc_transcoder::transcode_slice(
+ void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
+ const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
{
BASISU_NOTE_UNUSED(pState);
@@ -9784,6 +9810,317 @@ namespace basist
return status;
}
+
+ //------------------------------------------------------------------------------------------------
+
+ basisu_lowlevel_uastc_hdr_transcoder::basisu_lowlevel_uastc_hdr_transcoder()
+ {
+ }
+
+ bool basisu_lowlevel_uastc_hdr_transcoder::transcode_slice(
+ void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
+ const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+ basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
+ {
+ BASISU_NOTE_UNUSED(pState);
+ BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
+ BASISU_NOTE_UNUSED(has_alpha);
+ BASISU_NOTE_UNUSED(channel0);
+ BASISU_NOTE_UNUSED(channel1);
+ BASISU_NOTE_UNUSED(decode_flags);
+
+ assert(g_transcoder_initialized);
+ if (!g_transcoder_initialized)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder not globally initialized.\n");
+ return false;
+ }
+
+#if BASISD_SUPPORT_UASTC_HDR
+ const uint32_t total_blocks = num_blocks_x * num_blocks_y;
+
+ if (!output_row_pitch_in_blocks_or_pixels)
+ {
+ if (basis_block_format_is_uncompressed(fmt))
+ output_row_pitch_in_blocks_or_pixels = orig_width;
+ else
+ output_row_pitch_in_blocks_or_pixels = num_blocks_x;
+ }
+
+ if (basis_block_format_is_uncompressed(fmt))
+ {
+ if (!output_rows_in_pixels)
+ output_rows_in_pixels = orig_height;
+ }
+
+ uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
+ if (image_data_size < total_expected_block_bytes)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
+ return false;
+ }
+
+ const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
+
+ bool status = false;
+
+ // TODO: Optimize pure memcpy() case.
+
+ for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
+ {
+ void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
+
+ for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
+ {
+ switch (fmt)
+ {
+ case block_format::cUASTC_HDR_4x4:
+ case block_format::cASTC_HDR_4x4:
+ {
+ // Nothing to do, UASTC HDR is just ASTC.
+ memcpy(pDst_block, pSource_block, sizeof(uastc_block));
+ status = true;
+ break;
+ }
+ case block_format::cBC6H:
+ {
+ status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
+ break;
+ }
+ case block_format::cRGB_9E5:
+ {
+ astc_helpers::log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
+ static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
+ );
+
+ uint32_t blk_texels[4][4];
+
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
+
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels;
+ } // y
+ }
+ }
+
+ break;
+ }
+ case block_format::cRGBA_HALF:
+ {
+ astc_helpers::log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ half_float* pDst_pixels = reinterpret_cast<half_float*>(
+ static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
+ );
+
+ half_float blk_texels[4][4][4];
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ for (uint32_t x = 0; x < max_x; x++)
+ {
+ pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
+ pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
+ pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
+ pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
+ } // x
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
+ } // y
+ }
+ }
+
+ break;
+ }
+ case block_format::cRGB_HALF:
+ {
+ astc_helpers:: log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ half_float* pDst_pixels =
+ reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
+
+ half_float blk_texels[4][4][4];
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ for (uint32_t x = 0; x < max_x; x++)
+ {
+ pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
+ pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
+ pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
+ } // x
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
+ } // y
+ }
+ }
+
+ break;
+ }
+ default:
+ assert(0);
+ break;
+
+ }
+
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); return false;
+ }
+
+ } // block_x
+
+ } // block_y
+
+ return true;
+#else
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
+
+ BASISU_NOTE_UNUSED(decode_flags);
+ BASISU_NOTE_UNUSED(channel0);
+ BASISU_NOTE_UNUSED(channel1);
+ BASISU_NOTE_UNUSED(output_rows_in_pixels);
+ BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
+ BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
+ BASISU_NOTE_UNUSED(fmt);
+ BASISU_NOTE_UNUSED(image_data_size);
+ BASISU_NOTE_UNUSED(pImage_data);
+ BASISU_NOTE_UNUSED(num_blocks_x);
+ BASISU_NOTE_UNUSED(num_blocks_y);
+ BASISU_NOTE_UNUSED(pDst_blocks);
+
+ return false;
+#endif
+ }
+
+ bool basisu_lowlevel_uastc_hdr_transcoder::transcode_image(
+ transcoder_texture_format target_format,
+ void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+ const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+ uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+ uint32_t slice_offset, uint32_t slice_length,
+ uint32_t decode_flags,
+ bool has_alpha,
+ bool is_video,
+ uint32_t output_row_pitch_in_blocks_or_pixels,
+ basisu_transcoder_state* pState,
+ uint32_t output_rows_in_pixels,
+ int channel0, int channel1)
+ {
+ BASISU_NOTE_UNUSED(is_video);
+ BASISU_NOTE_UNUSED(level_index);
+ BASISU_NOTE_UNUSED(decode_flags);
+
+ if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: source data buffer too small\n");
+ return false;
+ }
+
+ const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
+ const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
+
+ if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: output buffer size too small\n");
+ return false;
+ }
+
+ bool status = false;
+
+ switch (target_format)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFBC6H:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGB_HALF:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGBA_HALF:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGB_9E5:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+ }
+ break;
+ }
+ default:
+ {
+ assert(0);
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: Invalid format\n");
+ break;
+ }
+ }
+
+ return status;
+ }
+
+ //------------------------------------------------------------------------------------------------
basisu_transcoder::basisu_transcoder() :
m_ready_to_transcode(false)
@@ -10390,7 +10727,7 @@ namespace basist
}
else
{
- // Nothing special to do for UASTC.
+ // Nothing special to do for UASTC/UASTC HDR.
if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
{
m_lowlevel_etc1s_decoder.clear();
@@ -10510,7 +10847,14 @@ namespace basist
return false;
}
- if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+ if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+ {
+ return m_lowlevel_uastc_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
+ pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
+ fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
+ output_rows_in_pixels, channel0, channel1, decode_flags);
+ }
+ else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
{
return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
@@ -10742,7 +11086,18 @@ namespace basist
memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
}
- if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+ if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+ {
+ const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
+
+ // Use the container independent image transcode method.
+ status = m_lowlevel_uastc_hdr_decoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
+ pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
+ decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ }
+ else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
{
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
@@ -10808,20 +11163,27 @@ namespace basist
return 8;
case transcoder_texture_format::cTFBC7_RGBA:
case transcoder_texture_format::cTFBC7_ALT:
+ case transcoder_texture_format::cTFBC6H:
case transcoder_texture_format::cTFETC2_RGBA:
case transcoder_texture_format::cTFBC3_RGBA:
case transcoder_texture_format::cTFBC5_RG:
case transcoder_texture_format::cTFASTC_4x4_RGBA:
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFFXT1_RGB:
case transcoder_texture_format::cTFETC2_EAC_RG11:
return 16;
case transcoder_texture_format::cTFRGBA32:
+ case transcoder_texture_format::cTFRGB_9E5:
return sizeof(uint32_t);
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
return sizeof(uint16_t);
+ case transcoder_texture_format::cTFRGB_HALF:
+ return sizeof(half_float) * 3;
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return sizeof(half_float) * 4;
default:
assert(0);
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10845,17 +11207,22 @@ namespace basist
case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_RGBA";
case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
case transcoder_texture_format::cTFRGBA32: return "RGBA32";
case transcoder_texture_format::cTFRGB565: return "RGB565";
case transcoder_texture_format::cTFBGR565: return "BGR565";
case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
+ case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
+ case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
+ case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
+ case transcoder_texture_format::cTFBC6H: return "BC6H";
default:
assert(0);
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10881,7 +11248,13 @@ namespace basist
case block_format::cRGB565: return "RGB565";
case block_format::cBGR565: return "BGR565";
case block_format::cRGBA4444: return "RGBA4444";
+ case block_format::cRGBA_HALF: return "RGBA_HALF";
+ case block_format::cRGB_HALF: return "RGB_HALF";
+ case block_format::cRGB_9E5: return "RGB_9E5";
case block_format::cUASTC_4x4: return "UASTC_4x4";
+ case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
+ case block_format::cBC6H: return "BC6H";
+ case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
case block_format::cFXT1_RGB: return "FXT1_RGB";
case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
@@ -10914,11 +11287,13 @@ namespace basist
bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
{
+ // TODO: Technically ASTC_HDR does support alpha, but UASTC_HDR doesn't yet support it. Unsure what to do here.
switch (fmt)
{
case transcoder_texture_format::cTFETC2_RGBA:
case transcoder_texture_format::cTFBC3_RGBA:
case transcoder_texture_format::cTFASTC_4x4_RGBA:
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
case transcoder_texture_format::cTFBC7_RGBA:
case transcoder_texture_format::cTFBC7_ALT:
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
@@ -10926,6 +11301,23 @@ namespace basist
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFRGBA32:
case transcoder_texture_format::cTFRGBA4444:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ }
+
+ bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
+ {
+ switch (fmt)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return true;
default:
break;
@@ -10947,13 +11339,18 @@ namespace basist
case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
- case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4;
+ case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
+ case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
+ case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
+ case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
+ case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
@@ -10975,6 +11372,9 @@ namespace basist
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return true;
default:
break;
@@ -10995,6 +11395,9 @@ namespace basist
case block_format::cRGBA4444_COLOR:
case block_format::cRGBA4444_ALPHA:
case block_format::cRGBA4444_COLOR_OPAQUE:
+ case block_format::cRGBA_HALF:
+ case block_format::cRGB_HALF:
+ case block_format::cRGB_9E5:
return true;
default:
break;
@@ -11007,11 +11410,16 @@ namespace basist
switch (fmt)
{
case transcoder_texture_format::cTFRGBA32:
+ case transcoder_texture_format::cTFRGB_9E5:
return sizeof(uint32_t);
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
return sizeof(uint16_t);
+ case transcoder_texture_format::cTFRGB_HALF:
+ return sizeof(half_float) * 3;
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return sizeof(half_float) * 4;
default:
break;
}
@@ -11038,8 +11446,26 @@ namespace basist
bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
{
- if (fmt == basis_tex_format::cUASTC4x4)
+ if (fmt == basis_tex_format::cUASTC_HDR_4x4)
+ {
+ // UASTC HDR
+#if BASISD_SUPPORT_UASTC_HDR
+ switch (tex_type)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
+ return true;
+ default:
+ break;
+ }
+#endif
+ }
+ else if (fmt == basis_tex_format::cUASTC4x4)
{
+ // UASTC LDR
#if BASISD_SUPPORT_UASTC
switch (tex_type)
{
@@ -11049,6 +11475,12 @@ namespace basist
case transcoder_texture_format::cTFATC_RGB:
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFFXT1_RGB:
+ // UASTC LDR doesn't support transcoding to HDR formats
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return false;
default:
return true;
@@ -11057,6 +11489,7 @@ namespace basist
}
else
{
+ // ETC1S
switch (tex_type)
{
// ETC1 and uncompressed are always supported.
@@ -11812,7 +12245,7 @@ namespace basist
// Encodes 3 values to output, usable for any range that uses quints and bits
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
{
- // First extract the trits and the bits from the 5 input values
+ // First extract the quints and the bits from the 3 input values
int quints = 0, bits[3];
const uint32_t bit_mask = (1 << n) - 1;
for (int i = 0; i < 3; i++)
@@ -12131,11 +12564,13 @@ namespace basist
return bits & ((1U << codesize) - 1U);
}
-
- uint32_t byte_bit_offset = bit_offset & 7U;
- const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
- bit_offset += codesize;
- return (w >> byte_bit_offset)& ((1U << codesize) - 1U);
+ else
+ {
+ uint32_t byte_bit_offset = bit_offset & 7U;
+ const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
+ bit_offset += codesize;
+ return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
+ }
}
bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
@@ -12170,6 +12605,7 @@ namespace basist
return false;
unpacked.m_mode = mode;
+ unpacked.m_common_pattern = 0;
uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
@@ -16663,10 +17099,12 @@ namespace basist
memcpy(&m_header, pData, sizeof(m_header));
- // We only support UASTC and ETC1S
- if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED)
+ // We only support UASTC LDR, UASTC HDR and ETC1S.
+ // Note the DFD's contents are what we are guided by for decoding the KTX2 file, not this format field (currently).
+ if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) &&
+ (m_header.m_vk_format != basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK))
{
- BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n");
+ BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
return false;
}
@@ -16890,6 +17328,16 @@ namespace basist
// We're assuming "DATA" means RGBA so it has alpha.
m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
}
+ else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR)
+ {
+ m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
+
+ m_dfd_samples = 1;
+ m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
+
+ // We're assuming "DATA" means RGBA so it has alpha.
+ m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
+ }
else
{
// Unsupported DFD color model.
@@ -17167,7 +17615,8 @@ namespace basist
return false;
}
}
- else if (m_format == basist::basis_tex_format::cUASTC4x4)
+ else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
+ (m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
{
// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
@@ -17188,14 +17637,29 @@ namespace basist
return false;
}
- if (!m_uastc_transcoder.transcode_image(fmt,
- pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
- (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
- 0, (uint32_t)total_2D_image_size,
- decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
{
- BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
- return false;
+ if (!m_uastc_hdr_transcoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+ 0, (uint32_t)total_2D_image_size,
+ decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ {
+ BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+ return false;
+ }
+ }
+ else
+ {
+ if (!m_uastc_transcoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+ 0, (uint32_t)total_2D_image_size,
+ decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ {
+ BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+ return false;
+ }
}
}
else
@@ -17476,4 +17940,1531 @@ namespace basist
#endif
}
+ //-------------------------------
+
+#ifdef BASISD_SUPPORT_UASTC_HDR
+ // This float->half conversion matches how "F32TO16" works on Intel GPU's.
+ basist::half_float float_to_half(float val)
+ {
+ union { float f; int32_t i; uint32_t u; } fi = { val };
+ const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
+ int s = flt_s, e = 0, m = 0;
+
+ // inf/NaN
+ if (flt_e == 0xff)
+ {
+ e = 31;
+ if (flt_m != 0) // NaN
+ m = 1;
+ }
+ // not zero or denormal
+ else if (flt_e != 0)
+ {
+ int new_exp = flt_e - 127;
+ if (new_exp > 15)
+ e = 31;
+ else if (new_exp < -14)
+ m = lrintf((1 << 24) * fabsf(fi.f));
+ else
+ {
+ e = new_exp + 15;
+ m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
+ }
+ }
+
+ assert((0 <= m) && (m <= 1024));
+ if (m == 1024)
+ {
+ e++;
+ m = 0;
+ }
+
+ assert((s >= 0) && (s <= 1));
+ assert((e >= 0) && (e <= 31));
+ assert((m >= 0) && (m <= 1023));
+
+ basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
+ return result;
+ }
+
+ //------------------------------------------------------------------------------------------------
+ // HDR support
+ //
+ // Originally from bc6h_enc.cpp
+ // BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
+
+ const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
+ {
+ // 2 subsets
+ { 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs
+ { 7, 6, 6, 6, }, // 1
+ { 11, 5, 4, 4, }, // 2
+ { 11, 4, 5, 4, }, // 3
+ { 11, 4, 4, 5, }, // 4
+ { 9, 5, 5, 5, }, // 5
+ { 8, 6, 5, 5, }, // 6
+ { 8, 5, 6, 5, }, // 7
+ { 8, 5, 5, 6, }, // 8
+ { 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs
+ // 1 subset
+ { 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
+ { 11, 9, 9, 9, }, // 11
+ { 12, 8, 8, 8, }, // 12
+ { 16, 4, 4, 4, } // 13, also useful for solid blocks
+ };
+
+ const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
+
+ const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
+ {
+ // comp_index, subset*2+lh_index, last_bit, first_bit
+ //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta
+ { { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
+ { 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
+ { 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
+ { { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
+ { 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
+ { 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
+ { 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
+ { 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
+ { 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
+ { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
+ { 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
+ { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
+ { { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
+ { 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
+ { 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
+ { { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+ { 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
+ { { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+ { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
+ { { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
+ { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
+ { { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
+ { 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
+ { 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
+ //------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
+ //------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
+ };
+
+ // The same as the first 32 2-subset patterns in BC7.
+ // Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
+ const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
+ {
+ { {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
+ { {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
+ { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} },
+ { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
+ { {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
+ { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
+ { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
+ };
+
+ const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+ const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+
+ struct bc6h_logical_block
+ {
+ uint32_t m_mode;
+ uint32_t m_partition_pattern; // must be 0 if 1 subset
+ uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed
+ uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0
+
+ void clear()
+ {
+ basisu::clear_obj(*this);
+ }
+ };
+
+ static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+ {
+ assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+ assert(val < (1ULL << num_bits));
+
+ if (bit_pos < 64)
+ {
+ l |= (val << bit_pos);
+
+ if ((bit_pos + num_bits) > 64)
+ h |= (val >> (64 - bit_pos));
+ }
+ else
+ {
+ h |= (val << (bit_pos - 64));
+ }
+
+ bit_pos += num_bits;
+ assert(bit_pos <= 128);
+ }
+
+ static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+ {
+ assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+ assert(val < (1ULL << num_bits));
+
+ for (uint32_t i = 0; i < num_bits; i++)
+ write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
+ }
+
+ static void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
+ {
+ const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
+
+ const uint32_t mode = log_blk.m_mode;
+ assert(mode < NUM_BC6H_MODES);
+
+ uint64_t l = s_mode_bits[mode], h = 0;
+ uint32_t bit_pos = (mode >= 2) ? 5 : 2;
+
+ const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
+
+ assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
+ ((num_subsets == 1) && (!log_blk.m_partition_pattern)));
+
+ // Sanity checks
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits
+ assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
+ assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
+ assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
+ }
+
+ const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
+
+ while (pLayout->m_comp != -1)
+ {
+ uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
+
+ if (pLayout->m_first_bit == -1)
+ {
+ write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
+ }
+ else
+ {
+ const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
+
+ v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
+ v &= ((1 << total_bits) - 1);
+
+ if (pLayout->m_first_bit > pLayout->m_last_bit)
+ write_rev_bits(v, total_bits, bit_pos, l, h);
+ else
+ write_bits(v, total_bits, bit_pos, l, h);
+ }
+
+ pLayout++;
+ }
+
+ const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ const uint32_t sel = log_blk.m_weights[i];
+
+ uint32_t num_bits = num_mode_sel_bits;
+ if (num_subsets == 2)
+ {
+ const uint32_t subset_index = pPat[i];
+ num_bits -= (subset_index >> 7);
+ }
+ else if (!i)
+ {
+ num_bits--;
+ }
+
+ assert(sel < (1u << num_bits));
+
+ write_bits(sel, num_bits, bit_pos, l, h);
+ }
+
+ assert(bit_pos == 128);
+
+ basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
+ basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
+ basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
+ basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
+ }
+
+#if 0
+ static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
+ {
+ int unq;
+
+ if (bits_per_comp >= 15)
+ unq = comp;
+ else if (comp == 0)
+ unq = 0;
+ else if (comp == ((1u << bits_per_comp) - 1u))
+ unq = 0xFFFFu;
+ else
+ unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
+
+ return unq;
+ }
+#endif
+
+ // Suboptimal, but very close.
+ static inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
+ {
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+ return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
+ }
+
+ // 6,7,8,9,10,11,12
+ const uint32_t BC6H_BLOG_TAB_MIN = 6;
+ const uint32_t BC6H_BLOG_TAB_MAX = 12;
+ //const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
+
+ // Handles 16, or 6-12 bits. Others assert.
+ static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
+ {
+ BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
+ BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
+
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+
+ if (num_bits == 16)
+ {
+ return bc6h_half_to_blog(h, 16);
+ }
+ else
+ {
+ assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
+
+ // Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
+ return bc6h_half_to_blog(h, num_bits);
+ }
+ }
+
+ bool g_bc6h_enc_initialized;
+
+ void bc6h_enc_init()
+ {
+ if (g_bc6h_enc_initialized)
+ return;
+
+ g_bc6h_enc_initialized = true;
+ }
+
+ // mode 10, 4-bit weights
+ void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 15);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
+ log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ if (log_blk.m_weights[0] & 8)
+ {
+ for (uint32_t i = 0; i < 16; i++)
+ log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+ }
+ }
+
+ log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ // Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
+ void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 15);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
+ {
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ const int delta_bitmask = (1 << num_delta_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ assert(num_delta_bits < num_base_bits);
+ assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
+
+ uint32_t blog_endpoints[3][2];
+
+ // Convert half endpoints to blog 16, 12, or 11
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+ assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+ blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+ assert((int)blog_endpoints[c][1] <= base_bitmask);
+ }
+
+ // Copy weights
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ // Ensure first weight MSB is 0
+ if (log_blk.m_weights[0] & 8)
+ {
+ // Invert weights
+ for (uint32_t i = 0; i < 16; i++)
+ log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+ // Swap blog quantized endpoints
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+ }
+ }
+
+ const int max_delta = (1 << (num_delta_bits - 1)) - 1;
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmask);
+
+ bool failed_flag = false;
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ if ((delta < min_delta) || (delta > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta & delta_bitmask;
+ }
+
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ return;
+ }
+
+ // Worst case fall back to mode 10, which can handle any endpoints
+ bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
+ }
+
+ // Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
+ void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
+ log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
+
+ log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
+ log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ const uint32_t pat_index = 0;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ if (log_blk.m_weights[0] & 4)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (log_blk.m_weights[15] & 4)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ log_blk.m_mode = 9;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ // Tries modes 0-8, falls back to mode 9
+ void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+ {
+ static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
+ const uint32_t mode = s_mode_order[mode_iter];
+
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+ const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+ uint32_t blog_endpoints[3][4];
+
+ // Convert half endpoints to blog 7-11
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+ blog_endpoints[c][2] = blog_endpoints[c][0];
+ assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+ blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+ blog_endpoints[c][3] = blog_endpoints[c][1];
+ assert((int)blog_endpoints[c][1] <= base_bitmask);
+ }
+
+ const uint32_t pat_index = 0;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ if (log_blk.m_weights[0] & 4)
+ {
+ // Swap part 0's endpoints/weights
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (log_blk.m_weights[15] & 4)
+ {
+ // Swap part 1's endpoints/weights
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ bool failed_flag = false;
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+ int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+ if ((delta0 < min_delta) || (delta0 > max_delta) ||
+ (delta1 < min_delta) || (delta1 > max_delta) ||
+ (delta2 < min_delta) || (delta2 > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+ if (failed_flag)
+ break;
+ }
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ return;
+
+ } // mode_iter
+
+ bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
+ }
+
+ // pEndpoints[subset][comp][lh_index]
+ void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+ assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
+ log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
+ }
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+ if (invert_flag)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
+ std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
+ }
+ }
+
+ const uint32_t pat_index = bc7_pattern;
+ assert(pat_index < 32);
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ bool swap_flags[2] = { false, false };
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if ((pPat[i] & 0x80) == 0)
+ continue;
+
+ if (log_blk.m_weights[i] & 4)
+ {
+ const uint32_t p = pPat[i] & 1;
+ swap_flags[p] = true;
+ }
+ }
+
+ if (swap_flags[0])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (swap_flags[1])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ log_blk.m_mode = 9;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+ {
+ static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
+ const uint32_t mode = s_mode_order[mode_iter];
+
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+ const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+ uint32_t blog_endpoints[3][4];
+
+ // Convert half endpoints to blog 7-11
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
+ blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
+ }
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+ if (invert_flag)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
+ std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
+ }
+ }
+
+ const uint32_t pat_index = bc7_pattern;
+ assert(pat_index < 32);
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ bool swap_flags[2] = { false, false };
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if ((pPat[i] & 0x80) == 0)
+ continue;
+
+ if (log_blk.m_weights[i] & 4)
+ {
+ const uint32_t p = pPat[i] & 1;
+ swap_flags[p] = true;
+ }
+ }
+
+ if (swap_flags[0])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (swap_flags[1])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ // Try packing the endpoints
+ bool failed_flag = false;
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+ int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+ if ((delta0 < min_delta) || (delta0 > max_delta) ||
+ (delta1 < min_delta) || (delta1 > max_delta) ||
+ (delta2 < min_delta) || (delta2 > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+ if (failed_flag)
+ break;
+ }
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ //half_float blk[16 * 3];
+ //unpack_bc6h(pPacked_block, blk, false);
+
+ return;
+ }
+
+ bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
+ }
+
+ bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
+ {
+ assert(g_bc6h_enc_initialized);
+
+ if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
+ return false;
+
+ // ASTC block unpacker won't allow Inf/NaN's to come through.
+ //if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
+ // return false;
+
+ uint8_t weights[16];
+ memset(weights, 0, sizeof(weights));
+
+ half_float endpoints[3][2];
+ endpoints[0][0] = pColor[0];
+ endpoints[0][1] = pColor[0];
+
+ endpoints[1][0] = pColor[1];
+ endpoints[1][1] = pColor[1];
+
+ endpoints[2][0] = pColor[2];
+ endpoints[2][1] = pColor[2];
+
+ bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // basisu_astc_hdr_core.cpp
+
+ static bool g_astc_hdr_core_initialized;
+ static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void astc_hdr_core_init()
+ {
+ if (g_astc_hdr_core_initialized)
+ return;
+
+ memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
+
+ for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
+ {
+ const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
+ //const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
+
+ assert(astc_pattern < 1024);
+ g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
+ }
+
+ g_astc_hdr_core_initialized = true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ static inline int astc_hdr_sign_extend(int src, int num_src_bits)
+ {
+ assert(basisu::in_range(num_src_bits, 2, 31));
+
+ const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
+ if (negative)
+ return src | ~((1 << num_src_bits) - 1);
+ else
+ return src & ((1 << num_src_bits) - 1);
+ }
+
+ static inline void astc_hdr_pack_bit(
+ int& dst, int dst_bit,
+ int src_val, int src_bit = 0)
+ {
+ assert(dst_bit >= 0 && dst_bit <= 31);
+ int bit = basisu::get_bit(src_val, src_bit);
+ dst |= (bit << dst_bit);
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void decode_mode7_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale)
+ {
+ assert(g_astc_hdr_core_initialized);
+
+ for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+ {
+ assert(pEndpoints[i] <= 255);
+ }
+
+ const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
+
+ // Extract mode bits and unpack to major component and mode.
+ const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
+
+ int majcomp, mode;
+ if ((modeval & 0xC) != 0xC)
+ {
+ majcomp = modeval >> 2;
+ mode = modeval & 3;
+ }
+ else if (modeval != 0xF)
+ {
+ majcomp = modeval & 3;
+ mode = 4;
+ }
+ else
+ {
+ majcomp = 0;
+ mode = 5;
+ }
+
+ // Extract low-order bits of r, g, b, and s.
+ int red = v0 & 0x3f;
+ int green = v1 & 0x1f;
+ int blue = v2 & 0x1f;
+ int scale = v3 & 0x1f;
+
+ // Extract high-order bits, which may be assigned depending on mode
+ int x0 = (v1 >> 6) & 1;
+ int x1 = (v1 >> 5) & 1;
+ int x2 = (v2 >> 6) & 1;
+ int x3 = (v2 >> 5) & 1;
+ int x4 = (v3 >> 7) & 1;
+ int x5 = (v3 >> 6) & 1;
+ int x6 = (v3 >> 5) & 1;
+
+ // Now move the high-order xs into the right place.
+ const int ohm = 1 << mode;
+ if (ohm & 0x30) green |= x0 << 6;
+ if (ohm & 0x3A) green |= x1 << 5;
+ if (ohm & 0x30) blue |= x2 << 6;
+ if (ohm & 0x3A) blue |= x3 << 5;
+ if (ohm & 0x3D) scale |= x6 << 5;
+ if (ohm & 0x2D) scale |= x5 << 6;
+ if (ohm & 0x04) scale |= x4 << 7;
+ if (ohm & 0x3B) red |= x4 << 6;
+ if (ohm & 0x04) red |= x3 << 6;
+ if (ohm & 0x10) red |= x5 << 7;
+ if (ohm & 0x0F) red |= x2 << 7;
+ if (ohm & 0x05) red |= x1 << 8;
+ if (ohm & 0x0A) red |= x0 << 8;
+ if (ohm & 0x05) red |= x0 << 9;
+ if (ohm & 0x02) red |= x6 << 9;
+ if (ohm & 0x01) red |= x3 << 10;
+ if (ohm & 0x02) red |= x5 << 10;
+
+ // Shift the bits to the top of the 12-bit result.
+ static const int s_shamts[6] = { 1,1,2,3,4,5 };
+
+ const int shamt = s_shamts[mode];
+ red <<= shamt;
+ green <<= shamt;
+ blue <<= shamt;
+ scale <<= shamt;
+
+ // Minor components are stored as differences
+ if (mode != 5)
+ {
+ green = red - green;
+ blue = red - blue;
+ }
+
+ // Swizzle major component into place
+ if (majcomp == 1)
+ std::swap(red, green);
+
+ if (majcomp == 2)
+ std::swap(red, blue);
+
+ // Clamp output values, set alpha to 1.0
+ e[1][0] = basisu::clamp(red, 0, 0xFFF);
+ e[1][1] = basisu::clamp(green, 0, 0xFFF);
+ e[1][2] = basisu::clamp(blue, 0, 0xFFF);
+
+ e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
+ e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
+ e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
+
+ if (pScale)
+ *pScale = scale;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool decode_mode7_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale,
+ uint32_t ise_endpoint_range)
+ {
+ assert(g_astc_hdr_core_initialized);
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
+ }
+ else
+ {
+ uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
+
+ for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+ dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+ decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
+ }
+
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ if (e[i][0] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][1] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][2] > (int)MAX_QLOG12)
+ return false;
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void decode_mode11_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3])
+ {
+#ifdef _DEBUG
+ for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+ {
+ assert(pEndpoints[i] <= 255);
+ }
+#endif
+
+ const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
+
+ if (maj_comp == 3)
+ {
+ // Direct, qlog8 and qlog7
+ e[0][0] = pEndpoints[0] << 4;
+ e[1][0] = pEndpoints[1] << 4;
+
+ e[0][1] = pEndpoints[2] << 4;
+ e[1][1] = pEndpoints[3] << 4;
+
+ e[0][2] = (pEndpoints[4] & 127) << 5;
+ e[1][2] = (pEndpoints[5] & 127) << 5;
+ }
+ else
+ {
+ int v0 = pEndpoints[0];
+ int v1 = pEndpoints[1];
+ int v2 = pEndpoints[2];
+ int v3 = pEndpoints[3];
+ int v4 = pEndpoints[4];
+ int v5 = pEndpoints[5];
+
+ int mode = 0;
+ astc_hdr_pack_bit(mode, 0, v1, 7);
+ astc_hdr_pack_bit(mode, 1, v2, 7);
+ astc_hdr_pack_bit(mode, 2, v3, 7);
+
+ int va = v0;
+ astc_hdr_pack_bit(va, 8, v1, 6);
+
+ int vb0 = v2 & 63;
+ int vb1 = v3 & 63;
+ int vc = v1 & 63;
+
+ int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
+ int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
+ static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
+ vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
+ vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
+
+ int x0 = basisu::get_bit(v2, 6);
+ int x1 = basisu::get_bit(v3, 6);
+ int x2 = basisu::get_bit(v4, 6);
+ int x3 = basisu::get_bit(v5, 6);
+ int x4 = basisu::get_bit(v4, 5);
+ int x5 = basisu::get_bit(v5, 5);
+
+ const uint32_t ohm = 1U << mode;
+ if (ohm & 0xA4) va |= (x0 << 9);
+ if (ohm & 0x08) va |= (x2 << 9);
+ if (ohm & 0x50) va |= (x4 << 9);
+ if (ohm & 0x50) va |= (x5 << 10);
+ if (ohm & 0xA0) va |= (x1 << 10);
+ if (ohm & 0xC0) va |= (x2 << 11);
+ if (ohm & 0x04) vc |= (x1 << 6);
+ if (ohm & 0xE8) vc |= (x3 << 6);
+ if (ohm & 0x20) vc |= (x2 << 7);
+ if (ohm & 0x5B) vb0 |= (x0 << 6);
+ if (ohm & 0x5B) vb1 |= (x1 << 6);
+ if (ohm & 0x12) vb0 |= (x2 << 7);
+ if (ohm & 0x12) vb1 |= (x3 << 7);
+
+ const int shamt = (mode >> 1) ^ 3;
+
+ va = (uint32_t)va << shamt;
+ vb0 = (uint32_t)vb0 << shamt;
+ vb1 = (uint32_t)vb1 << shamt;
+ vc = (uint32_t)vc << shamt;
+ vd0 = (uint32_t)vd0 << shamt;
+ vd1 = (uint32_t)vd1 << shamt;
+
+ // qlog12
+ e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
+ e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
+ e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
+
+ e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
+ e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
+ e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
+
+ if (maj_comp)
+ {
+ std::swap(e[0][0], e[0][maj_comp]);
+ std::swap(e[1][0], e[1][maj_comp]);
+ }
+ }
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool decode_mode11_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ uint32_t ise_endpoint_range)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ decode_mode11_to_qlog12_ise20(pEndpoints, e);
+ }
+ else
+ {
+ uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
+
+ for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+ dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+ decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
+ }
+
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ if (e[i][0] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][1] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][2] > (int)MAX_QLOG12)
+ return false;
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
+
+ if (best_blk.m_weight_ise_range == 5)
+ {
+ // Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
+ bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
+ }
+ else
+ {
+ uint8_t bc6h_weights[16];
+
+ if (best_blk.m_weight_ise_range == 1)
+ {
+ // weight ISE 1: 3 levels
+ static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 2)
+ {
+ // weight ISE 2: 4 levels
+ static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 3)
+ {
+ // weight ISE 3: 5 levels
+ static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 4)
+ {
+ // weight ISE 4: 6 levels
+ static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 6)
+ {
+ // weight ISE 6: 10 levels
+ static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 7)
+ {
+ // weight ISE 7: 12 levels
+ static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 8)
+ {
+ // 16 levels
+ memcpy(bc6h_weights, best_blk.m_weights, 16);
+ }
+ else
+ {
+ assert(0);
+ return false;
+ }
+
+ bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert(best_blk.m_num_partitions == 2);
+ assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
+
+ // UASTC HDR checks
+ // Both CEM's must be equal in 2-subset UASTC HDR.
+ if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
+ return false;
+ if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
+ return false;
+
+ if (best_blk.m_color_endpoint_modes[0] == 7)
+ {
+ if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
+ ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
+ ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
+ ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
+ ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
+ ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
+ {
+ return false;
+ }
+ }
+
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ int e[2][3];
+ if (best_blk.m_color_endpoint_modes[0] == 7)
+ {
+ bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
+ if (!success)
+ return false;
+ }
+ else
+ {
+ bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
+ if (!success)
+ return false;
+ }
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ bc6h_endpoints[s][c][0] = qlog_to_half_slow(e[0][c], 12);
+ if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
+ return false;
+
+ bc6h_endpoints[s][c][1] = qlog_to_half_slow(e[1][c], 12);
+ if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
+ return false;
+ }
+ }
+
+ uint8_t bc6h_weights[16];
+ if (best_blk.m_weight_ise_range == 1)
+ {
+ static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 2)
+ {
+ static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 3)
+ {
+ static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 4)
+ {
+ static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 5)
+ {
+ memcpy(bc6h_weights, best_blk.m_weights, 16);
+ }
+ else
+ {
+ assert(0);
+ return false;
+ }
+
+ bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+ bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ if (!g_astc_hdr_core_initialized)
+ {
+ assert(0);
+ return false;
+ }
+
+ astc_helpers::log_astc_block log_blk;
+
+ if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
+ {
+ // Failed unpacking ASTC data
+ return false;
+ }
+
+ return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+ bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ if (!g_astc_hdr_core_initialized)
+ {
+ assert(0);
+ return false;
+ }
+
+ if (log_blk.m_solid_color_flag_ldr)
+ {
+ // Don't support LDR solid colors.
+ return false;
+ }
+
+ if (log_blk.m_solid_color_flag_hdr)
+ {
+ // Solid color HDR block
+ return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
+ }
+
+ // Only support 4x4 grid sizes
+ if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
+ return false;
+
+ // Don't support dual plane encoding
+ if (log_blk.m_dual_plane)
+ return false;
+
+ if (log_blk.m_num_partitions == 1)
+ {
+ // Handle 1 partition (or subset)
+
+ // UASTC HDR checks
+ if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
+ return false;
+
+ int e[2][3];
+ bool success;
+
+ if (log_blk.m_color_endpoint_modes[0] == 7)
+ {
+ if (log_blk.m_endpoint_ise_range != 20)
+ return false;
+
+ success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
+ }
+ else if (log_blk.m_color_endpoint_modes[0] == 11)
+ {
+ // UASTC HDR checks
+ if (log_blk.m_weight_ise_range <= 7)
+ {
+ if (log_blk.m_endpoint_ise_range != 20)
+ return false;
+ }
+ else if (log_blk.m_endpoint_ise_range != 19)
+ {
+ return false;
+ }
+
+ success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
+ }
+ else
+ {
+ return false;
+ }
+
+ if (!success)
+ return false;
+
+ // Transform endpoints to half float
+ half_float h_e[3][2] =
+ {
+ { qlog_to_half_slow(e[0][0], 12), qlog_to_half_slow(e[1][0], 12) },
+ { qlog_to_half_slow(e[0][1], 12), qlog_to_half_slow(e[1][1], 12) },
+ { qlog_to_half_slow(e[0][2], 12), qlog_to_half_slow(e[1][2], 12) }
+ };
+
+ // Sanity check for NaN/Inf
+ for (uint32_t i = 0; i < 2; i++)
+ if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
+ return false;
+
+ // Transcode to bc6h
+ if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
+ return false;
+ }
+ else if (log_blk.m_num_partitions == 2)
+ {
+ // Handle 2 partition (or subset)
+ int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
+ if (common_bc7_pat_index < 0)
+ return false;
+
+ assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
+ return false;
+ }
+ else
+ {
+ // Only supports 1 or 2 partitions (or subsets)
+ return false;
+ }
+
+ return true;
+ }
+#endif // BASISD_SUPPORT_UASTC_HDR
+
} // namespace basist
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.h b/thirdparty/basis_universal/transcoder/basisu_transcoder.h
index 3327e8ddb7..8324e99698 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.h
@@ -1,5 +1,5 @@
// basisu_transcoder.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -29,6 +29,7 @@
// Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development.
#ifndef BASISU_FORCE_DEVEL_MESSAGES
+ // TODO - disable before checking in
#define BASISU_FORCE_DEVEL_MESSAGES 0
#endif
@@ -55,7 +56,7 @@ namespace basist
cTFETC2_RGBA = 1, // Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files
// BC1-5, BC7 (desktop, some mobile devices)
- cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
+ cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
cTFBC3_RGBA = 3, // Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files
cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's)
@@ -63,10 +64,11 @@ namespace basist
// PVRTC1 4bpp (mobile, PowerVR devices)
cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format.
- cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
+ cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
// ASTC (mobile, Intel devices, hopefully all desktop GPU's one day)
- cTFASTC_4x4_RGBA = 10, // Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
+ cTFASTC_4x4_RGBA = 10, // LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files.
+ // LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
// ATC (mobile, Adreno devices, this is a niche format)
cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD)
@@ -74,8 +76,8 @@ namespace basist
// FXT1 (desktop, Intel devices, this is a super obscure format)
cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630).
- // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
- // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
+ // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
+ // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
cTFPVRTC2_4_RGB = 18, // Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB).
cTFPVRTC2_4_RGBA = 19, // Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks.
@@ -83,13 +85,22 @@ namespace basist
cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned)
cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps
+ cTFBC6H = 22, // HDR, RGB only, unsigned
+ cTFASTC_HDR_4x4_RGBA = 23, // HDR, RGBA (currently UASTC HDR is only RGB), unsigned
+
// Uncompressed (raw pixel) formats
+ // Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR).
cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte.
cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11
cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0
- cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
+ cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
+
+ // Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR).
+ cTFRGB_HALF = 24, // 48bpp RGB half (16-bits/component, 3 components)
+ cTFRGBA_HALF = 25, // 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha)
+ cTFRGB_9E5 = 26, // 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent)
- cTFTotalTextureFormats = 22,
+ cTFTotalTextureFormats = 27,
// Old enums for compatibility with code compiled against previous versions
cTFETC1 = cTFETC1_RGB,
@@ -124,6 +135,9 @@ namespace basist
// Returns true if the format supports an alpha channel.
bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt);
+ // Returns true if the format is HDR.
+ bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt);
+
// Returns the basisu::texture_format corresponding to the specified transcoder_texture_format.
basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt);
@@ -142,7 +156,7 @@ namespace basist
// Returns the block height for the specified texture format, which is currently always 4.
uint32_t basis_get_block_height(transcoder_texture_format tex_type);
- // Returns true if the specified format was enabled at compile time.
+ // Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR).
bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S);
// Validates that the output buffer is large enough to hold the entire transcoded texture.
@@ -317,6 +331,42 @@ namespace basist
int channel0 = -1, int channel1 = -1);
};
+ class basisu_lowlevel_uastc_hdr_transcoder
+ {
+ friend class basisu_transcoder;
+
+ public:
+ basisu_lowlevel_uastc_hdr_transcoder();
+
+ bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+ basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0);
+
+ bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+ basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0)
+ {
+ return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt,
+ output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels,
+ pState, output_rows_in_pixels, channel0, channel1, decode_flags);
+ }
+
+ // Container independent transcoding
+ bool transcode_image(
+ transcoder_texture_format target_format,
+ void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+ const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+ uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+ uint32_t slice_offset, uint32_t slice_length,
+ uint32_t decode_flags = 0,
+ bool has_alpha = false,
+ bool is_video = false,
+ uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+ basisu_transcoder_state* pState = nullptr,
+ uint32_t output_rows_in_pixels = 0,
+ int channel0 = -1, int channel1 = -1);
+ };
+
struct basisu_slice_info
{
uint32_t m_orig_width;
@@ -530,6 +580,7 @@ namespace basist
private:
mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder;
mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder;
+ mutable basisu_lowlevel_uastc_hdr_transcoder m_lowlevel_uastc_hdr_decoder;
bool m_ready_to_transcode;
@@ -612,10 +663,12 @@ namespace basist
#pragma pack(pop)
const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0;
+ const uint32_t KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK = 1000066000; // TODO, is this correct?
const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166;
+ const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR = 167;
const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163;
const uint32_t KTX2_IMAGE_IS_P_FRAME = 2;
- const uint32_t KTX2_UASTC_BLOCK_SIZE = 16;
+ const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR
const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased
// The KTX2 transfer functions supported by KTX2
@@ -800,13 +853,15 @@ namespace basist
// Returns 0 or the number of layers in the texture array or texture video. Valid after init().
uint32_t get_layers() const { return m_header.m_layer_count; }
- // Returns cETC1S or cUASTC4x4. Valid after init().
+ // Returns cETC1S, cUASTC4x4, or cUASTC_HDR_4x4. Valid after init().
basist::basis_tex_format get_format() const { return m_format; }
-
+
bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; }
bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; }
+ bool is_hdr() const { return get_format() == basist::basis_tex_format::cUASTC_HDR_4x4; }
+
// Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init().
uint32_t get_has_alpha() const { return m_has_alpha; }
@@ -913,6 +968,7 @@ namespace basist
basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder;
basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder;
+ basist::basisu_lowlevel_uastc_hdr_transcoder m_uastc_hdr_transcoder;
ktx2_transcoder_state m_def_transcoder_state;
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
index 0505df6ea6..17c9dc7c8c 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
@@ -1,5 +1,5 @@
// basisu_transcoder_internal.h - Universal texture format transcoder library.
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
//
@@ -20,8 +20,9 @@
#pragma warning (disable: 4127) // conditional expression is constant
#endif
-#define BASISD_LIB_VERSION 116
-#define BASISD_VERSION_STRING "01.16"
+// v1.50: Added UASTC HDR support
+#define BASISD_LIB_VERSION 150
+#define BASISD_VERSION_STRING "01.50"
#ifdef _DEBUG
#define BASISD_BUILD_DEBUG
@@ -82,9 +83,15 @@ namespace basist
cRGBA4444_ALPHA,
cRGBA4444_COLOR_OPAQUE,
cRGBA4444,
-
- cUASTC_4x4,
-
+ cRGBA_HALF,
+ cRGB_HALF,
+ cRGB_9E5,
+
+ cUASTC_4x4, // LDR, universal
+ cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
+ cBC6H,
+ cASTC_HDR_4x4,
+
cTotalBlockFormats
};
@@ -264,8 +271,8 @@ namespace basist
}
const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
- const basisu::int_vec get_lookup() const { return m_lookup; }
- const basisu::int16_vec get_tree() const { return m_tree; }
+ const basisu::int_vec &get_lookup() const { return m_lookup; }
+ const basisu::int16_vec &get_tree() const { return m_tree; }
bool is_valid() const { return m_code_sizes.size() > 0; }
@@ -789,7 +796,198 @@ namespace basist
};
bool basis_block_format_is_uncompressed(block_format tex_type);
-
+
+ //------------------------------------
+
+ typedef uint16_t half_float;
+
+ const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
+ const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
+ const double MAX_HALF_FLOAT = 65504.0; // largest normal number
+
+ inline uint32_t get_bits(uint32_t val, int low, int high)
+ {
+ const int num_bits = (high - low) + 1;
+ assert((num_bits >= 1) && (num_bits <= 32));
+
+ val >>= low;
+ if (num_bits != 32)
+ val &= ((1u << num_bits) - 1);
+
+ return val;
+ }
+
+ inline bool is_half_inf_or_nan(half_float v)
+ {
+ return get_bits(v, 10, 14) == 31;
+ }
+
+ inline bool is_half_denorm(half_float v)
+ {
+ int e = (v >> 10) & 31;
+ return !e;
+ }
+
+ inline int get_half_exp(half_float v)
+ {
+ int e = ((v >> 10) & 31);
+ return e ? (e - 15) : -14;
+ }
+
+ inline int get_half_mantissa(half_float v)
+ {
+ if (is_half_denorm(v))
+ return v & 0x3FF;
+ return (v & 0x3FF) | 0x400;
+ }
+
+ inline float get_half_mantissaf(half_float v)
+ {
+ return ((float)get_half_mantissa(v)) / 1024.0f;
+ }
+
+ inline int get_half_sign(half_float v)
+ {
+ return v ? ((v & 0x8000) ? -1 : 1) : 0;
+ }
+
+ inline bool half_is_signed(half_float v)
+ {
+ return (v & 0x8000) != 0;
+ }
+
+#if 0
+ int hexp = get_half_exp(Cf);
+ float hman = get_half_mantissaf(Cf);
+ int hsign = get_half_sign(Cf);
+ float k = powf(2.0f, hexp) * hman * hsign;
+ if (is_half_inf_or_nan(Cf))
+ k = std::numeric_limits<float>::quiet_NaN();
+#endif
+
+ half_float float_to_half(float val);
+
+ inline float half_to_float(half_float hval)
+ {
+ union { float f; uint32_t u; } x = { 0 };
+
+ uint32_t s = ((uint32_t)hval >> 15) & 1;
+ uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
+ uint32_t m = (uint32_t)hval & 0x3FF;
+
+ if (!e)
+ {
+ if (!m)
+ {
+ // +- 0
+ x.u = s << 31;
+ return x.f;
+ }
+ else
+ {
+ // denormalized
+ while (!(m & 0x00000400))
+ {
+ m <<= 1;
+ --e;
+ }
+
+ ++e;
+ m &= ~0x00000400;
+ }
+ }
+ else if (e == 31)
+ {
+ if (m == 0)
+ {
+ // +/- INF
+ x.u = (s << 31) | 0x7f800000;
+ return x.f;
+ }
+ else
+ {
+ // +/- NaN
+ x.u = (s << 31) | 0x7f800000 | (m << 13);
+ return x.f;
+ }
+ }
+
+ e = e + (127 - 15);
+ m = m << 13;
+
+ assert(s <= 1);
+ assert(m <= 0x7FFFFF);
+ assert(e <= 255);
+
+ x.u = m | (e << 23) | (s << 31);
+ return x.f;
+ }
+
+ // Originally from bc6h_enc.h
+
+ void bc6h_enc_init();
+
+ const uint32_t MAX_BLOG16_VAL = 0xFFFF;
+
+ // BC6H internals
+ const uint32_t NUM_BC6H_MODES = 14;
+ const uint32_t BC6H_LAST_MODE_INDEX = 13;
+ const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights
+ const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;
+
+ extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b
+
+ struct bc6h_bit_layout
+ {
+ int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)
+ int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)
+ int8_t m_last_bit;
+ int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed
+ };
+
+ const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;
+ extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];
+
+ extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]
+
+ extern const uint8_t g_bc6h_weight3[8];
+ extern const uint8_t g_bc6h_weight4[16];
+
+ extern const int8_t g_bc6h_mode_lookup[32];
+
+ // Converts b16 to half float
+ inline half_float bc6h_blog16_to_half(uint32_t comp)
+ {
+ assert(comp <= 0xFFFF);
+
+ // scale the magnitude by 31/64
+ comp = (comp * 31u) >> 6u;
+ return (half_float)comp;
+ }
+
+ const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;
+
+ // Inverts bc6h_blog16_to_half().
+ // Returns the nearest blog16 given a half value.
+ inline uint32_t bc6h_half_to_blog16(half_float h)
+ {
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+ return (h * 64 + 30) / 31;
+ }
+
+ struct bc6h_block
+ {
+ uint8_t m_bytes[16];
+ };
+
+ void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
+ void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
+ bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);
+
} // namespace basist
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
index 8244550959..205758b3d7 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
@@ -1,4 +1,4 @@
-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
index fad45fe22d..f2d324fcc3 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
@@ -1,4 +1,4 @@
-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
index f91314f4ff..457bd51e30 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
@@ -13,6 +13,7 @@ namespace basist
const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8;
const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30;
+ const uint32_t TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 = 27; // BC6H only supports only 5-bit pattern indices, BC7 supports 4-bit or 6-bit
const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11;
const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19;