summaryrefslogtreecommitdiffstats
path: root/thirdparty
diff options
context:
space:
mode:
authorSpartan322 <Megacake1234@gmail.com>2024-10-15 06:49:10 -0400
committerSpartan322 <Megacake1234@gmail.com>2024-10-15 06:49:10 -0400
commit8043cc1e0d288d7b517d13f14c912c636293cc8b (patch)
tree79fdac1a76ad232c040f25a4fbae974ac245edda /thirdparty
parent7894cd1a5a680020c51e0df96ce8675ad647d91b (diff)
parentaf77100e394dcaca609b15bef815ed17475e51ed (diff)
downloadredot-engine-8043cc1e0d288d7b517d13f14c912c636293cc8b.tar.gz
Merge commit godotengine/godot@af77100e394dcaca609b15bef815ed17475e51ed
Diffstat (limited to 'thirdparty')
-rw-r--r--thirdparty/README.md7
-rw-r--r--thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp2052
-rw-r--r--thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h45
-rw-r--r--thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp3310
-rw-r--r--thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h224
-rw-r--r--thirdparty/basis_universal/encoder/basisu_backend.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_backend.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_basis_file.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_basis_file.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_bc7enc.cpp5
-rw-r--r--thirdparty/basis_universal/encoder/basisu_bc7enc.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_comp.cpp2028
-rw-r--r--thirdparty/basis_universal/encoder/basisu_comp.h104
-rw-r--r--thirdparty/basis_universal/encoder/basisu_enc.cpp1634
-rw-r--r--thirdparty/basis_universal/encoder/basisu_enc.h653
-rw-r--r--thirdparty/basis_universal/encoder/basisu_etc.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_etc.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_frontend.cpp3
-rw-r--r--thirdparty/basis_universal/encoder/basisu_frontend.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp561
-rw-r--r--thirdparty/basis_universal/encoder/basisu_gpu_texture.h50
-rw-r--r--thirdparty/basis_universal/encoder/basisu_kernels_declares.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_kernels_imp.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp18
-rw-r--r--thirdparty/basis_universal/encoder/basisu_miniz.h10
-rw-r--r--thirdparty/basis_universal/encoder/basisu_opencl.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_opencl.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h13
-rw-r--r--thirdparty/basis_universal/encoder/basisu_resample_filters.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_resampler.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_resampler.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_resampler_filters.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_ssim.cpp2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_ssim.h2
-rw-r--r--thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp21
-rw-r--r--thirdparty/basis_universal/encoder/basisu_uastc_enc.h2
-rw-r--r--thirdparty/basis_universal/encoder/cppspmd_flow.h2
-rw-r--r--thirdparty/basis_universal/encoder/cppspmd_math.h4
-rw-r--r--thirdparty/basis_universal/encoder/cppspmd_math_declares.h2
-rw-r--r--thirdparty/basis_universal/encoder/cppspmd_sse.h28
-rw-r--r--thirdparty/basis_universal/encoder/cppspmd_type_aliases.h2
-rw-r--r--thirdparty/basis_universal/encoder/pvpngreader.cpp18
-rw-r--r--thirdparty/basis_universal/patches/external-jpgd.patch13
-rw-r--r--thirdparty/basis_universal/patches/external-tinyexr.patch23
-rw-r--r--thirdparty/basis_universal/patches/remove-tinydds-qoi.patch446
-rw-r--r--thirdparty/basis_universal/transcoder/basisu.h105
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h102
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_astc_helpers.h3587
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_containers.h62
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_containers_impl.h47
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_file_headers.h5
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder.cpp2057
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder.h80
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h216
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc2
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc2
-rw-r--r--thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h1
58 files changed, 16923 insertions, 659 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 58226261f4..2ce82e82df 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -59,12 +59,13 @@ Files extracted from upstream source:
## basis_universal
- Upstream: https://github.com/BinomialLLC/basis_universal
-- Version: 1.16.4 (900e40fb5d2502927360fe2f31762bdbb624455f, 2023)
+- Version: 1.50.0 (051ad6d8a64bb95a79e8601c317055fd1782ad3e, 2024)
- License: Apache 2.0
Files extracted from upstream source:
-- `encoder/` and `transcoder/` folders, minus `jpgd.{cpp,h}`
+- `encoder/` and `transcoder/` folders, with the following files removed from `encoder`:
+ `jpgd.{cpp,h}`, `3rdparty/{qoi.h,tinydds.h,tinyexr.cpp,tinyexr.h}`
- `LICENSE`
Applied upstream PR https://github.com/BinomialLLC/basis_universal/pull/344 to
@@ -78,7 +79,7 @@ fix build with our own copy of zstd (patch in `patches`).
Files extracted from upstream source:
-- `bc6h.glsl`, `bc1.glsl`, `CrossPlatformSettings_piece_all.glsl` and `UavCrossPlatform_piece_all.glsl`.
+- `bc6h.glsl`, `bc1.glsl`, `bc4.glsl`, `CrossPlatformSettings_piece_all.glsl` and `UavCrossPlatform_piece_all.glsl`.
- `LICENSE.md`
diff --git a/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
new file mode 100644
index 0000000000..5abfe2faf9
--- /dev/null
+++ b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
@@ -0,0 +1,2052 @@
+// File: android_astc_decomp.cpp
+
+/*-------------------------------------------------------------------------
+ * drawElements Quality Program Tester Core
+ * ----------------------------------------
+ *
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * rg: Removed external dependencies, minor fix to decompress() so it converts non-sRGB
+ * output to 8-bits correctly. I've compared this decoder's output
+ * vs. astc-codec with random inputs.
+ *
+ *//*!
+ * \file
+ * \brief ASTC Utilities.
+ *//*--------------------------------------------------------------------*/
+#include "android_astc_decomp.h"
+#include <assert.h>
+#include <algorithm>
+#include <fenv.h>
+#include <math.h>
+
+#define DE_LENGTH_OF_ARRAY(x) (sizeof(x)/sizeof(x[0]))
+#define DE_UNREF(x) (void)x
+
+typedef uint8_t deUint8;
+typedef int8_t deInt8;
+typedef uint32_t deUint32;
+typedef int32_t deInt32;
+typedef uint16_t deUint16;
+typedef int16_t deInt16;
+typedef int64_t deInt64;
+typedef uint64_t deUint64;
+
+#define DE_ASSERT assert
+
+#ifdef _MSC_VER
+#pragma warning (disable:4505) // unreferenced local function has been removed
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+namespace basisu_astc
+{
+ template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
+ template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
+ template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
+
+ static bool inBounds(int v, int l, int h)
+ {
+ return (v >= l) && (v < h);
+ }
+
+ static bool inRange(int v, int l, int h)
+ {
+ return (v >= l) && (v <= h);
+ }
+
+ template<typename T>
+ static inline T max(T a, T b)
+ {
+ return (a > b) ? a : b;
+ }
+
+ template<typename T>
+ static inline T min(T a, T b)
+ {
+ return (a < b) ? a : b;
+ }
+
+ template<typename T>
+ static inline T clamp(T a, T l, T h)
+ {
+ if (a < l)
+ return l;
+ else if (a > h)
+ return h;
+ return a;
+ }
+
+ struct UVec4
+ {
+ uint32_t m_c[4];
+
+ UVec4()
+ {
+ m_c[0] = 0;
+ m_c[1] = 0;
+ m_c[2] = 0;
+ m_c[3] = 0;
+ }
+
+ UVec4(uint32_t x, uint32_t y, uint32_t z, uint32_t w)
+ {
+ m_c[0] = x;
+ m_c[1] = y;
+ m_c[2] = z;
+ m_c[3] = w;
+ }
+
+ uint32_t x() const { return m_c[0]; }
+ uint32_t y() const { return m_c[1]; }
+ uint32_t z() const { return m_c[2]; }
+ uint32_t w() const { return m_c[3]; }
+
+ uint32_t& x() { return m_c[0]; }
+ uint32_t& y() { return m_c[1]; }
+ uint32_t& z() { return m_c[2]; }
+ uint32_t& w() { return m_c[3]; }
+
+ uint32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; }
+ uint32_t& operator[] (uint32_t idx) { assert(idx < 4); return m_c[idx]; }
+ };
+
+ struct IVec4
+ {
+ int32_t m_c[4];
+
+ IVec4()
+ {
+ m_c[0] = 0;
+ m_c[1] = 0;
+ m_c[2] = 0;
+ m_c[3] = 0;
+ }
+
+ IVec4(int32_t x, int32_t y, int32_t z, int32_t w)
+ {
+ m_c[0] = x;
+ m_c[1] = y;
+ m_c[2] = z;
+ m_c[3] = w;
+ }
+
+ int32_t x() const { return m_c[0]; }
+ int32_t y() const { return m_c[1]; }
+ int32_t z() const { return m_c[2]; }
+ int32_t w() const { return m_c[3]; }
+
+ int32_t& x() { return m_c[0]; }
+ int32_t& y() { return m_c[1]; }
+ int32_t& z() { return m_c[2]; }
+ int32_t& w() { return m_c[3]; }
+
+ UVec4 asUint() const
+ {
+ return UVec4(maximum(0, m_c[0]), maximum(0, m_c[1]), maximum(0, m_c[2]), maximum(0, m_c[3]));
+ }
+
+ int32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; }
+ int32_t& operator[] (uint32_t idx) { assert(idx < 4); return m_c[idx]; }
+ };
+
+ struct IVec3
+ {
+ int32_t m_c[3];
+
+ IVec3()
+ {
+ m_c[0] = 0;
+ m_c[1] = 0;
+ m_c[2] = 0;
+ }
+
+ IVec3(int32_t x, int32_t y, int32_t z)
+ {
+ m_c[0] = x;
+ m_c[1] = y;
+ m_c[2] = z;
+ }
+
+ int32_t x() const { return m_c[0]; }
+ int32_t y() const { return m_c[1]; }
+ int32_t z() const { return m_c[2]; }
+
+ int32_t& x() { return m_c[0]; }
+ int32_t& y() { return m_c[1]; }
+ int32_t& z() { return m_c[2]; }
+
+ int32_t operator[] (uint32_t idx) const { assert(idx < 3); return m_c[idx]; }
+ int32_t& operator[] (uint32_t idx) { assert(idx < 3); return m_c[idx]; }
+ };
+
+ static uint32_t deDivRoundUp32(uint32_t a, uint32_t b)
+ {
+ return (a + b - 1) / b;
+ }
+
+ static bool deInBounds32(uint32_t v, uint32_t l, uint32_t h)
+ {
+ return (v >= l) && (v < h);
+ }
+
+namespace astc
+{
+
+using std::vector;
+
+namespace
+{
+
+// Common utilities
+enum
+{
+ MAX_BLOCK_WIDTH = 12,
+ MAX_BLOCK_HEIGHT = 12
+};
+
+inline deUint32 getBit (deUint32 src, int ndx)
+{
+ DE_ASSERT(basisu_astc::inBounds(ndx, 0, 32));
+ return (src >> ndx) & 1;
+}
+
+inline deUint32 getBits (deUint32 src, int low, int high)
+{
+ const int numBits = (high-low) + 1;
+ DE_ASSERT(basisu_astc::inRange(numBits, 1, 32));
+
+ if (numBits < 32)
+ return (deUint32)((src >> low) & ((1u<<numBits)-1));
+ else
+ return (deUint32)((src >> low) & 0xFFFFFFFFu);
+}
+
+inline bool isBitSet (deUint32 src, int ndx)
+{
+ return getBit(src, ndx) != 0;
+}
+
+inline deUint32 reverseBits (deUint32 src, int numBits)
+{
+ DE_ASSERT(basisu_astc::inRange(numBits, 0, 32));
+
+ deUint32 result = 0;
+ for (int i = 0; i < numBits; i++)
+ result |= ((src >> i) & 1) << (numBits-1-i);
+
+ return result;
+}
+
+inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits)
+{
+ DE_ASSERT(numSrcBits <= numDstBits);
+ DE_ASSERT((src & ((1<<numSrcBits)-1)) == src);
+
+ deUint32 dst = 0;
+ for (int shift = numDstBits-numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
+ dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
+
+ return dst;
+}
+
+inline deInt32 signExtend (deInt32 src, int numSrcBits)
+{
+ DE_ASSERT(basisu_astc::inRange(numSrcBits, 2, 31));
+
+ const bool negative = (src & (1 << (numSrcBits-1))) != 0;
+ return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
+}
+
+typedef uint16_t deFloat16;
+
+inline bool isFloat16InfOrNan (deFloat16 v)
+{
+ return getBits(v, 10, 14) == 31;
+}
+
+float deFloat16To32(deFloat16 val16)
+{
+ deUint32 sign;
+ deUint32 expotent;
+ deUint32 mantissa;
+
+ union
+ {
+ float f;
+ deUint32 u;
+ } x;
+
+ x.u = 0u;
+
+ sign = ((deUint32)val16 >> 15u) & 0x00000001u;
+ expotent = ((deUint32)val16 >> 10u) & 0x0000001fu;
+ mantissa = (deUint32)val16 & 0x000003ffu;
+
+ if (expotent == 0u)
+ {
+ if (mantissa == 0u)
+ {
+ /* +/- 0 */
+ x.u = sign << 31u;
+ return x.f;
+ }
+ else
+ {
+ /* Denormalized, normalize it. */
+
+ while (!(mantissa & 0x00000400u))
+ {
+ mantissa <<= 1u;
+ expotent -= 1u;
+ }
+
+ expotent += 1u;
+ mantissa &= ~0x00000400u;
+ }
+ }
+ else if (expotent == 31u)
+ {
+ if (mantissa == 0u)
+ {
+ /* +/- InF */
+ x.u = (sign << 31u) | 0x7f800000u;
+ return x.f;
+ }
+ else
+ {
+ /* +/- NaN */
+ x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u);
+ return x.f;
+ }
+ }
+
+ expotent = expotent + (127u - 15u);
+ mantissa = mantissa << 13u;
+
+ x.u = (sign << 31u) | (expotent << 23u) | mantissa;
+ return x.f;
+}
+
+enum ISEMode
+{
+ ISEMODE_TRIT = 0,
+ ISEMODE_QUINT,
+ ISEMODE_PLAIN_BIT,
+ ISEMODE_LAST
+};
+
+struct ISEParams
+{
+ ISEMode mode;
+ int numBits;
+ ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {}
+};
+
+inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues)
+{
+ switch (iseParams.mode)
+ {
+ case ISEMODE_TRIT: return deDivRoundUp32(numValues*8, 5) + numValues*iseParams.numBits;
+ case ISEMODE_QUINT: return deDivRoundUp32(numValues*7, 3) + numValues*iseParams.numBits;
+ case ISEMODE_PLAIN_BIT: return numValues*iseParams.numBits;
+ default:
+ DE_ASSERT(false);
+ return -1;
+ }
+}
+
+ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence)
+{
+ int curBitsForTritMode = 6;
+ int curBitsForQuintMode = 5;
+ int curBitsForPlainBitMode = 8;
+
+ while (true)
+ {
+ DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
+ const int tritRange = (curBitsForTritMode > 0) ? (3 << curBitsForTritMode) - 1 : -1;
+ const int quintRange = (curBitsForQuintMode > 0) ? (5 << curBitsForQuintMode) - 1 : -1;
+ const int plainBitRange = (curBitsForPlainBitMode > 0) ? (1 << curBitsForPlainBitMode) - 1 : -1;
+ const int maxRange = basisu_astc::max(basisu_astc::max(tritRange, quintRange), plainBitRange);
+
+ if (maxRange == tritRange)
+ {
+ const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
+
+ if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
+ return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
+
+ curBitsForTritMode--;
+ }
+ else if (maxRange == quintRange)
+ {
+ const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
+
+ if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
+ return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
+
+ curBitsForQuintMode--;
+ }
+ else
+ {
+ const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
+ DE_ASSERT(maxRange == plainBitRange);
+
+ if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
+ return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
+
+ curBitsForPlainBitMode--;
+ }
+ }
+}
+
+inline int computeNumColorEndpointValues (deUint32 endpointMode)
+{
+ DE_ASSERT(endpointMode < 16);
+ return (endpointMode/4 + 1) * 2;
+}
+
+// Decompression utilities
+enum DecompressResult
+{
+ DECOMPRESS_RESULT_VALID_BLOCK = 0, //!< Decompressed valid block
+ DECOMPRESS_RESULT_ERROR, //!< Encountered error while decompressing, error color written
+ DECOMPRESS_RESULT_LAST
+};
+
+// A helper for getting bits from a 128-bit block.
+class Block128
+{
+private:
+ typedef deUint64 Word;
+
+ enum
+ {
+ WORD_BYTES = sizeof(Word),
+ WORD_BITS = 8*WORD_BYTES,
+ NUM_WORDS = 128 / WORD_BITS
+ };
+ //DE_STATIC_ASSERT(128 % WORD_BITS == 0);
+
+public:
+ Block128 (const deUint8* src)
+ {
+ for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
+ {
+ m_words[wordNdx] = 0;
+ for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
+ m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx);
+ }
+ }
+
+ deUint32 getBit (int ndx) const
+ {
+ DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128));
+ return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
+ }
+
+ deUint32 getBits (int low, int high) const
+ {
+ DE_ASSERT(basisu_astc::inBounds(low, 0, 128));
+ DE_ASSERT(basisu_astc::inBounds(high, 0, 128));
+ DE_ASSERT(basisu_astc::inRange(high-low+1, 0, 32));
+
+ if (high-low+1 == 0)
+ return 0;
+
+ const int word0Ndx = low / WORD_BITS;
+ const int word1Ndx = high / WORD_BITS;
+ // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
+ if (word0Ndx == word1Ndx)
+ return (deUint32)((m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS));
+ else
+ {
+ DE_ASSERT(word1Ndx == word0Ndx + 1);
+ return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) |
+ (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS));
+ }
+ }
+
+ bool isBitSet (int ndx) const
+ {
+ DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128));
+ return getBit(ndx) != 0;
+ }
+
+private:
+ Word m_words[NUM_WORDS];
+};
+
+// A helper for sequential access into a Block128.
+class BitAccessStream
+{
+public:
+ BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward)
+ : m_src (src)
+ , m_startNdxInSrc (startNdxInSrc)
+ , m_length (length)
+ , m_forward (forward)
+ , m_ndx (0)
+ {
+ }
+
+ // Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
+ deUint32 getNext (int num)
+ {
+ if (num == 0 || m_ndx >= m_length)
+ return 0;
+ const int end = m_ndx + num;
+ const int numBitsFromSrc = basisu_astc::max(0, basisu_astc::min(m_length, end) - m_ndx);
+ const int low = m_ndx;
+ const int high = m_ndx + numBitsFromSrc - 1;
+
+ m_ndx += num;
+
+ return m_forward ? m_src.getBits(m_startNdxInSrc + low, m_startNdxInSrc + high)
+ : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
+ }
+
+private:
+ const Block128& m_src;
+ const int m_startNdxInSrc;
+ const int m_length;
+ const bool m_forward;
+ int m_ndx;
+};
+
+struct ISEDecodedResult
+{
+ deUint32 m;
+ deUint32 tq; //!< Trit or quint value, depending on ISE mode.
+ deUint32 v;
+};
+
+// Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
+struct ASTCBlockMode
+{
+ bool isError;
+ // \note Following fields only relevant if !isError.
+ bool isVoidExtent;
+ // \note Following fields only relevant if !isVoidExtent.
+ bool isDualPlane;
+ int weightGridWidth;
+ int weightGridHeight;
+ ISEParams weightISEParams;
+
+ ASTCBlockMode (void)
+ : isError (true)
+ , isVoidExtent (true)
+ , isDualPlane (true)
+ , weightGridWidth (-1)
+ , weightGridHeight (-1)
+ , weightISEParams (ISEMODE_LAST, -1)
+ {
+ }
+};
+
+inline int computeNumWeights (const ASTCBlockMode& mode)
+{
+ return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
+}
+
+struct ColorEndpointPair
+{
+ UVec4 e0;
+ UVec4 e1;
+};
+
+struct TexelWeightPair
+{
+ deUint32 w[2];
+};
+
+ASTCBlockMode getASTCBlockMode (deUint32 blockModeData)
+{
+ ASTCBlockMode blockMode;
+ blockMode.isError = true; // \note Set to false later, if not error.
+ blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
+ if (!blockMode.isVoidExtent)
+ {
+ if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0)
+ return blockMode; // Invalid ("reserved").
+
+ deUint32 r = (deUint32)-1; // \note Set in the following branches.
+
+ if (getBits(blockModeData, 0, 1) == 0)
+ {
+ const deUint32 r0 = getBit(blockModeData, 4);
+ const deUint32 r1 = getBit(blockModeData, 2);
+ const deUint32 r2 = getBit(blockModeData, 3);
+ const deUint32 i78 = getBits(blockModeData, 7, 8);
+
+ r = (r2 << 2) | (r1 << 1) | (r0 << 0);
+
+ if (i78 == 3)
+ {
+ const bool i5 = isBitSet(blockModeData, 5);
+ blockMode.weightGridWidth = i5 ? 10 : 6;
+ blockMode.weightGridHeight = i5 ? 6 : 10;
+ }
+ else
+ {
+ const deUint32 a = getBits(blockModeData, 5, 6);
+
+ switch (i78)
+ {
+ case 0: blockMode.weightGridWidth = 12; blockMode.weightGridHeight = a + 2; break;
+ case 1: blockMode.weightGridWidth = a + 2; blockMode.weightGridHeight = 12; break;
+ case 2: blockMode.weightGridWidth = a + 6; blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6; break;
+ default: DE_ASSERT(false);
+ }
+ }
+ }
+ else
+ {
+ const deUint32 r0 = getBit(blockModeData, 4);
+ const deUint32 r1 = getBit(blockModeData, 0);
+ const deUint32 r2 = getBit(blockModeData, 1);
+ const deUint32 i23 = getBits(blockModeData, 2, 3);
+ const deUint32 a = getBits(blockModeData, 5, 6);
+
+ r = (r2 << 2) | (r1 << 1) | (r0 << 0);
+ if (i23 == 3)
+ {
+ const deUint32 b = getBit(blockModeData, 7);
+ const bool i8 = isBitSet(blockModeData, 8);
+ blockMode.weightGridWidth = i8 ? b+2 : a+2;
+ blockMode.weightGridHeight = i8 ? a+2 : b+6;
+ }
+ else
+ {
+ const deUint32 b = getBits(blockModeData, 7, 8);
+ switch (i23)
+ {
+ case 0: blockMode.weightGridWidth = b + 4; blockMode.weightGridHeight = a + 2; break;
+ case 1: blockMode.weightGridWidth = b + 8; blockMode.weightGridHeight = a + 2; break;
+ case 2: blockMode.weightGridWidth = a + 2; blockMode.weightGridHeight = b + 8; break;
+ default: DE_ASSERT(false);
+ }
+ }
+ }
+
+ const bool zeroDH = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
+ const bool h = zeroDH ? 0 : isBitSet(blockModeData, 9);
+ blockMode.isDualPlane = zeroDH ? 0 : isBitSet(blockModeData, 10);
+
+ {
+ ISEMode& m = blockMode.weightISEParams.mode;
+ int& b = blockMode.weightISEParams.numBits;
+ m = ISEMODE_PLAIN_BIT;
+ b = 0;
+ if (h)
+ {
+ switch (r)
+ {
+ case 2: m = ISEMODE_QUINT; b = 1; break;
+ case 3: m = ISEMODE_TRIT; b = 2; break;
+ case 4: b = 4; break;
+ case 5: m = ISEMODE_QUINT; b = 2; break;
+ case 6: m = ISEMODE_TRIT; b = 3; break;
+ case 7: b = 5; break;
+ default: DE_ASSERT(false);
+ }
+ }
+ else
+ {
+ switch (r)
+ {
+ case 2: b = 1; break;
+ case 3: m = ISEMODE_TRIT; break;
+ case 4: b = 2; break;
+ case 5: m = ISEMODE_QUINT; break;
+ case 6: m = ISEMODE_TRIT; b = 1; break;
+ case 7: b = 3; break;
+ default: DE_ASSERT(false);
+ }
+ }
+ }
+ }
+
+ blockMode.isError = false;
+ return blockMode;
+}
+
+inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB)
+{
+ if (isSRGB)
+ {
+ deUint8* const dstU = (deUint8*)dst;
+ for (int i = 0; i < blockWidth*blockHeight; i++)
+ {
+ dstU[4*i + 0] = 0xff;
+ dstU[4*i + 1] = 0;
+ dstU[4*i + 2] = 0xff;
+ dstU[4*i + 3] = 0xff;
+ }
+ }
+ else
+ {
+ float* const dstF = (float*)dst;
+ for (int i = 0; i < blockWidth*blockHeight; i++)
+ {
+ dstF[4*i + 0] = 1.0f;
+ dstF[4*i + 1] = 0.0f;
+ dstF[4*i + 2] = 1.0f;
+ dstF[4*i + 3] = 1.0f;
+ }
+ }
+}
+
+DecompressResult decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode)
+{
+ const deUint32 minSExtent = blockData.getBits(12, 24);
+ const deUint32 maxSExtent = blockData.getBits(25, 37);
+ const deUint32 minTExtent = blockData.getBits(38, 50);
+ const deUint32 maxTExtent = blockData.getBits(51, 63);
+ const bool allExtentsAllOnes = (minSExtent == 0x1fff) && (maxSExtent == 0x1fff) && (minTExtent == 0x1fff) && (maxTExtent == 0x1fff);
+ const bool isHDRBlock = blockData.isBitSet(9);
+
+ if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
+ {
+ setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
+ return DECOMPRESS_RESULT_ERROR;
+ }
+
+ const deUint32 rgba[4] =
+ {
+ blockData.getBits(64, 79),
+ blockData.getBits(80, 95),
+ blockData.getBits(96, 111),
+ blockData.getBits(112, 127)
+ };
+
+ if (isSRGB)
+ {
+ deUint8* const dstU = (deUint8*)dst;
+ for (int i = 0; i < blockWidth * blockHeight; i++)
+ {
+ for (int c = 0; c < 4; c++)
+ dstU[i * 4 + c] = (deUint8)((rgba[c] & 0xff00) >> 8);
+ }
+ }
+ else
+ {
+ float* const dstF = (float*)dst;
+
+ if (isHDRBlock)
+ {
+ for (int c = 0; c < 4; c++)
+ {
+ if (isFloat16InfOrNan((deFloat16)rgba[c]))
+ {
+ //throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)");
+ setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
+ return DECOMPRESS_RESULT_ERROR;
+ }
+ }
+
+ for (int i = 0; i < blockWidth * blockHeight; i++)
+ {
+ for (int c = 0; c < 4; c++)
+ dstF[i * 4 + c] = deFloat16To32((deFloat16)rgba[c]);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < blockWidth * blockHeight; i++)
+ {
+ for (int c = 0; c < 4; c++)
+ dstF[i * 4 + c] = (rgba[c] == 65535) ? 1.0f : ((float)rgba[c] / 65536.0f);
+ }
+ }
+ }
+
+ return DECOMPRESS_RESULT_VALID_BLOCK;
+}
+
+void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart)
+{
+ if (numPartitions == 1)
+ endpointModesDst[0] = blockData.getBits(13, 16);
+ else
+ {
+ const deUint32 highLevelSelector = blockData.getBits(23, 24);
+
+ if (highLevelSelector == 0)
+ {
+ const deUint32 mode = blockData.getBits(25, 28);
+
+ for (int i = 0; i < numPartitions; i++)
+ endpointModesDst[i] = mode;
+ }
+ else
+ {
+ for (int partNdx = 0; partNdx < numPartitions; partNdx++)
+ {
+ const deUint32 cemClass = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
+ const deUint32 lowBit0Ndx = numPartitions + 2*partNdx;
+ const deUint32 lowBit1Ndx = numPartitions + 2*partNdx + 1;
+ const deUint32 lowBit0 = blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4);
+ const deUint32 lowBit1 = blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4);
+
+ endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
+ }
+ }
+ }
+}
+
+int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions)
+{
+ int result = 0;
+
+ for (int i = 0; i < numPartitions; i++)
+ result += computeNumColorEndpointValues(endpointModes[i]);
+
+ return result;
+}
+
+void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
+{
+ DE_ASSERT(basisu_astc::inRange(numValues, 1, 5));
+
+ deUint32 m[5];
+ m[0] = data.getNext(numBits);
+ deUint32 T01 = data.getNext(2);
+ m[1] = data.getNext(numBits);
+ deUint32 T23 = data.getNext(2);
+ m[2] = data.getNext(numBits);
+ deUint32 T4 = data.getNext(1);
+ m[3] = data.getNext(numBits);
+ deUint32 T56 = data.getNext(2);
+ m[4] = data.getNext(numBits);
+ deUint32 T7 = data.getNext(1);
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough="
+#endif
+ switch (numValues)
+ {
+ // \note Fall-throughs.
+ case 1: T23 = 0;
+ case 2: T4 = 0;
+ case 3: T56 = 0;
+ case 4: T7 = 0;
+ case 5: break;
+ default:
+ DE_ASSERT(false);
+ }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+ const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
+
+ static const deUint32 tritsFromT[256][5] =
+ {
+ { 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 },
+ { 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 },
+ { 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 },
+ { 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 },
+ { 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 },
+ { 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 },
+ { 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 },
+ { 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 },
+ { 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 },
+ { 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 },
+ { 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 },
+ { 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 },
+ { 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 },
+ { 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 },
+ { 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 },
+ { 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 }
+ };
+
+ const deUint32 (& trits)[5] = tritsFromT[T];
+ for (int i = 0; i < numValues; i++)
+ {
+ dst[i].m = m[i];
+ dst[i].tq = trits[i];
+ dst[i].v = (trits[i] << numBits) + m[i];
+ }
+}
+
+void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
+{
+ DE_ASSERT(basisu_astc::inRange(numValues, 1, 3));
+
+ deUint32 m[3];
+ m[0] = data.getNext(numBits);
+ deUint32 Q012 = data.getNext(3);
+ m[1] = data.getNext(numBits);
+ deUint32 Q34 = data.getNext(2);
+ m[2] = data.getNext(numBits);
+ deUint32 Q56 = data.getNext(2);
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough="
+#endif
+ switch (numValues)
+ {
+ // \note Fall-throughs.
+ case 1: Q34 = 0;
+ case 2: Q56 = 0;
+ case 3: break;
+ default:
+ DE_ASSERT(false);
+ }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+ const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
+
+ static const deUint32 quintsFromQ[256][3] =
+ {
+ { 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 },
+ { 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 },
+ { 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 },
+ { 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 },
+ { 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 },
+ { 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 },
+ { 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 },
+ { 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 }
+ };
+
+ const deUint32 (& quints)[3] = quintsFromQ[Q];
+ for (int i = 0; i < numValues; i++)
+ {
+ dst[i].m = m[i];
+ dst[i].tq = quints[i];
+ dst[i].v = (quints[i] << numBits) + m[i];
+ }
+}
+
+inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits)
+{
+ dst[0].m = data.getNext(numBits);
+ dst[0].v = dst[0].m;
+}
+
+void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params)
+{
+ if (params.mode == ISEMODE_TRIT)
+ {
+ const int numBlocks = deDivRoundUp32(numValues, 5);
+ for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
+ {
+ const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5;
+ decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits);
+ }
+ }
+ else if (params.mode == ISEMODE_QUINT)
+ {
+ const int numBlocks = deDivRoundUp32(numValues, 3);
+ for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
+ {
+ const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3;
+ decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits);
+ }
+ }
+ else
+ {
+ DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
+ for (int i = 0; i < numValues; i++)
+ decodeISEBitBlock(&dst[i], data, params.numBits);
+ }
+}
+
+void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams)
+{
+ if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT))
+ {
+ const int rangeCase = iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
+ DE_ASSERT(basisu_astc::inRange(rangeCase, 0, 10));
+
+ static const deUint32 Ca[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
+ const deUint32 C = Ca[rangeCase];
+
+ for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
+ {
+ const deUint32 a = getBit(iseResults[endpointNdx].m, 0);
+ const deUint32 b = getBit(iseResults[endpointNdx].m, 1);
+ const deUint32 c = getBit(iseResults[endpointNdx].m, 2);
+ const deUint32 d = getBit(iseResults[endpointNdx].m, 3);
+ const deUint32 e = getBit(iseResults[endpointNdx].m, 4);
+ const deUint32 f = getBit(iseResults[endpointNdx].m, 5);
+ const deUint32 A = (a == 0) ? 0 : (1<<9)-1;
+
+ const deUint32 B = (rangeCase == 0) ? 0
+ : (rangeCase == 1) ? 0
+ : (rangeCase == 2) ? ((b << 8) | (b << 4) | (b << 2) | (b << 1))
+ : (rangeCase == 3) ? ((b << 8) | (b << 3) | (b << 2))
+ : (rangeCase == 4) ? ((c << 8) | (b << 7) | (c << 3) | (b << 2) | (c << 1) | (b << 0))
+ : (rangeCase == 5) ? ((c << 8) | (b << 7) | (c << 2) | (b << 1) | (c << 0))
+ : (rangeCase == 6) ? ((d << 8) | (c << 7) | (b << 6) | (d << 2) | (c << 1) | (b << 0))
+ : (rangeCase == 7) ? ((d << 8) | (c << 7) | (b << 6) | (d << 1) | (c << 0))
+ : (rangeCase == 8) ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 1) | (d << 0))
+ : (rangeCase == 9) ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 0))
+ : (rangeCase == 10) ? ((f << 8) | (e << 7) | (d << 6) | (c << 5) | (b << 4) | (f << 0))
+ : (deUint32)-1;
+
+ DE_ASSERT(B != (deUint32)-1);
+ dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80);
+ }
+ }
+ else
+ {
+ DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
+ for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
+ dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
+ }
+}
+
+inline void bitTransferSigned (deInt32& a, deInt32& b)
+{
+ b >>= 1;
+ b |= a & 0x80;
+ a >>= 1;
+ a &= 0x3f;
+ if (isBitSet(a, 5))
+ a -= 0x40;
+}
+
+inline UVec4 clampedRGBA (const IVec4& rgba)
+{
+ return UVec4(basisu_astc::clamp(rgba.x(), 0, 0xff),
+ basisu_astc::clamp(rgba.y(), 0, 0xff),
+ basisu_astc::clamp(rgba.z(), 0, 0xff),
+ basisu_astc::clamp(rgba.w(), 0, 0xff));
+}
+
+inline IVec4 blueContract (int r, int g, int b, int a)
+{
+ return IVec4((r+b)>>1, (g+b)>>1, b, a);
+}
+
+inline bool isColorEndpointModeHDR (deUint32 mode)
+{
+ return (mode == 2) ||
+ (mode == 3) ||
+ (mode == 7) ||
+ (mode == 11) ||
+ (mode == 14) ||
+ (mode == 15);
+}
+
+void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3)
+{
+ const deUint32 m10 = getBit(v1, 7) | (getBit(v2, 7) << 1);
+ const deUint32 m23 = getBits(v0, 6, 7);
+
+ const deUint32 majComp = (m10 != 3) ? m10
+ : (m23 != 3) ? m23
+ : 0;
+
+ const deUint32 mode = (m10 != 3) ? m23
+ : (m23 != 3) ? 4
+ : 5;
+
+ deInt32 red = (deInt32)getBits(v0, 0, 5);
+ deInt32 green = (deInt32)getBits(v1, 0, 4);
+ deInt32 blue = (deInt32)getBits(v2, 0, 4);
+ deInt32 scale = (deInt32)getBits(v3, 0, 4);
+
+ {
+#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
+#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false)
+
+ const deUint32 x0 = getBit(v1, 6);
+ const deUint32 x1 = getBit(v1, 5);
+ const deUint32 x2 = getBit(v2, 6);
+ const deUint32 x3 = getBit(v2, 5);
+ const deUint32 x4 = getBit(v3, 7);
+ const deUint32 x5 = getBit(v3, 6);
+ const deUint32 x6 = getBit(v3, 5);
+
+ deInt32& R = red;
+ deInt32& G = green;
+ deInt32& B = blue;
+ deInt32& S = scale;
+
+ switch (mode)
+ {
+ case 0: ASSIGN_X_BITS(R,9, R,8, R,7, R,10, R,6, S,6, S,5); break;
+ case 1: ASSIGN_X_BITS(R,8, G,5, R,7, B,5, R,6, R,10, R,9); break;
+ case 2: ASSIGN_X_BITS(R,9, R,8, R,7, R,6, S,7, S,6, S,5); break;
+ case 3: ASSIGN_X_BITS(R,8, G,5, R,7, B,5, R,6, S,6, S,5); break;
+ case 4: ASSIGN_X_BITS(G,6, G,5, B,6, B,5, R,6, R,7, S,5); break;
+ case 5: ASSIGN_X_BITS(G,6, G,5, B,6, B,5, R,6, S,6, S,5); break;
+ default:
+ DE_ASSERT(false);
+ }
+#undef ASSIGN_X_BITS
+#undef SHOR
+ }
+
+ static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 };
+ DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
+
+ red <<= shiftAmounts[mode];
+ green <<= shiftAmounts[mode];
+ blue <<= shiftAmounts[mode];
+ scale <<= shiftAmounts[mode];
+
+ if (mode != 5)
+ {
+ green = red - green;
+ blue = red - blue;
+ }
+
+ if (majComp == 1)
+ std::swap(red, green);
+ else if (majComp == 2)
+ std::swap(red, blue);
+
+ e0 = UVec4(basisu_astc::clamp(red - scale, 0, 0xfff),
+ basisu_astc::clamp(green - scale, 0, 0xfff),
+ basisu_astc::clamp(blue - scale, 0, 0xfff),
+ 0x780);
+
+ e1 = UVec4(basisu_astc::clamp(red, 0, 0xfff),
+ basisu_astc::clamp(green, 0, 0xfff),
+ basisu_astc::clamp(blue, 0, 0xfff),
+ 0x780);
+}
+
+void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5)
+{
+ const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7);
+
+ if (major == 3)
+ {
+ e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780);
+ e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780);
+ }
+ else
+ {
+ const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
+
+ deInt32 a = (deInt32)((getBit(v1, 6) << 8) | v0);
+ deInt32 c = (deInt32)(getBits(v1, 0, 5));
+ deInt32 b0 = (deInt32)(getBits(v2, 0, 5));
+ deInt32 b1 = (deInt32)(getBits(v3, 0, 5));
+ deInt32 d0 = (deInt32)(getBits(v4, 0, 4));
+ deInt32 d1 = (deInt32)(getBits(v5, 0, 4));
+
+ {
+#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
+#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false)
+ const deUint32 x0 = getBit(v2, 6);
+ const deUint32 x1 = getBit(v3, 6);
+ const deUint32 x2 = getBit(v4, 6);
+ const deUint32 x3 = getBit(v5, 6);
+ const deUint32 x4 = getBit(v4, 5);
+ const deUint32 x5 = getBit(v5, 5);
+
+ switch (mode)
+ {
+ case 0: ASSIGN_X_BITS(b0,6, b1,6, d0,6, d1,6, d0,5, d1,5); break;
+ case 1: ASSIGN_X_BITS(b0,6, b1,6, b0,7, b1,7, d0,5, d1,5); break;
+ case 2: ASSIGN_X_BITS(a,9, c,6, d0,6, d1,6, d0,5, d1,5); break;
+ case 3: ASSIGN_X_BITS(b0,6, b1,6, a,9, c,6, d0,5, d1,5); break;
+ case 4: ASSIGN_X_BITS(b0,6, b1,6, b0,7, b1,7, a,9, a,10); break;
+ case 5: ASSIGN_X_BITS(a,9, a,10, c,7, c,6, d0,5, d1,5); break;
+ case 6: ASSIGN_X_BITS(b0,6, b1,6, a,11, c,6, a,9, a,10); break;
+ case 7: ASSIGN_X_BITS(a,9, a,10, a,11, c,6, d0,5, d1,5); break;
+ default:
+ DE_ASSERT(false);
+ }
+#undef ASSIGN_X_BITS
+#undef SHOR
+ }
+
+ static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 };
+ DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
+ d0 = signExtend(d0, numDBits[mode]);
+ d1 = signExtend(d1, numDBits[mode]);
+
+ const int shiftAmount = (mode >> 1) ^ 3;
+ a = (uint32_t)a << shiftAmount;
+ c = (uint32_t)c << shiftAmount;
+ b0 = (uint32_t)b0 << shiftAmount;
+ b1 = (uint32_t)b1 << shiftAmount;
+ d0 = (uint32_t)d0 << shiftAmount;
+ d1 = (uint32_t)d1 << shiftAmount;
+
+ e0 = UVec4(basisu_astc::clamp(a-c, 0, 0xfff), basisu_astc::clamp(a-b0-c-d0, 0, 0xfff), basisu_astc::clamp(a-b1-c-d1, 0, 0xfff), 0x780);
+ e1 = UVec4(basisu_astc::clamp(a, 0, 0xfff), basisu_astc::clamp(a-b0, 0, 0xfff), basisu_astc::clamp(a-b1, 0, 0xfff), 0x780);
+
+ if (major == 1)
+ {
+ std::swap(e0.x(), e0.y());
+ std::swap(e1.x(), e1.y());
+ }
+ else if (major == 2)
+ {
+ std::swap(e0.x(), e0.z());
+ std::swap(e1.x(), e1.z());
+ }
+ }
+}
+
+void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In)
+{
+ decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
+
+ const deUint32 mode = (getBit(v7In, 7) << 1) | getBit(v6In, 7);
+ deInt32 v6 = (deInt32)getBits(v6In, 0, 6);
+ deInt32 v7 = (deInt32)getBits(v7In, 0, 6);
+
+ if (mode == 3)
+ {
+ e0.w() = v6 << 5;
+ e1.w() = v7 << 5;
+ }
+ else
+ {
+ v6 |= (v7 << (mode+1)) & 0x780;
+ v7 &= (0x3f >> mode);
+ v7 ^= 0x20 >> mode;
+ v7 -= 0x20 >> mode;
+ v6 <<= 4-mode;
+ v7 <<= 4-mode;
+ v7 += v6;
+ v7 = basisu_astc::clamp(v7, 0, 0xfff);
+ e0.w() = v6;
+ e1.w() = v7;
+ }
+}
+
+void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions)
+{
+ int unquantizedNdx = 0;
+
+ for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
+ {
+ const deUint32 endpointMode = endpointModes[partitionNdx];
+ const deUint32* v = &unquantizedEndpoints[unquantizedNdx];
+
+ UVec4& e0 = dst[partitionNdx].e0;
+ UVec4& e1 = dst[partitionNdx].e1;
+ unquantizedNdx += computeNumColorEndpointValues(endpointMode);
+
+ switch (endpointMode)
+ {
+ case 0:
+ {
+ e0 = UVec4(v[0], v[0], v[0], 0xff);
+ e1 = UVec4(v[1], v[1], v[1], 0xff);
+ break;
+ }
+ case 1:
+ {
+ const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
+ const deUint32 L1 = basisu_astc::min(0xffu, L0 + getBits(v[1], 0, 5));
+ e0 = UVec4(L0, L0, L0, 0xff);
+ e1 = UVec4(L1, L1, L1, 0xff);
+ break;
+ }
+ case 2:
+ {
+ const deUint32 v1Gr = v[1] >= v[0];
+ const deUint32 y0 = v1Gr ? v[0]<<4 : (v[1]<<4) + 8;
+ const deUint32 y1 = v1Gr ? v[1]<<4 : (v[0]<<4) - 8;
+ e0 = UVec4(y0, y0, y0, 0x780);
+ e1 = UVec4(y1, y1, y1, 0x780);
+ break;
+ }
+ case 3:
+ {
+ const bool m = isBitSet(v[0], 7);
+ const deUint32 y0 = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2)
+ : (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
+ const deUint32 d = m ? getBits(v[1], 0, 4) << 2
+ : getBits(v[1], 0, 3) << 1;
+ const deUint32 y1 = basisu_astc::min(0xfffu, y0+d);
+ e0 = UVec4(y0, y0, y0, 0x780);
+ e1 = UVec4(y1, y1, y1, 0x780);
+ break;
+ }
+ case 4:
+ {
+ e0 = UVec4(v[0], v[0], v[0], v[2]);
+ e1 = UVec4(v[1], v[1], v[1], v[3]);
+ break;
+ }
+ case 5:
+ {
+ deInt32 v0 = (deInt32)v[0];
+ deInt32 v1 = (deInt32)v[1];
+ deInt32 v2 = (deInt32)v[2];
+ deInt32 v3 = (deInt32)v[3];
+ bitTransferSigned(v1, v0);
+ bitTransferSigned(v3, v2);
+ e0 = clampedRGBA(IVec4(v0, v0, v0, v2));
+ e1 = clampedRGBA(IVec4(v0+v1, v0+v1, v0+v1, v2+v3));
+ break;
+ }
+ case 6:
+ e0 = UVec4((v[0]*v[3]) >> 8, (v[1]*v[3]) >> 8, (v[2]*v[3]) >> 8, 0xff);
+ e1 = UVec4(v[0], v[1], v[2], 0xff);
+ break;
+ case 7:
+ decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
+ break;
+ case 8:
+ {
+ if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
+ {
+ e0 = UVec4(v[0], v[2], v[4], 0xff);
+ e1 = UVec4(v[1], v[3], v[5], 0xff);
+ }
+ else
+ {
+ e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
+ e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
+ }
+ break;
+ }
+ case 9:
+ {
+ deInt32 v0 = (deInt32)v[0];
+ deInt32 v1 = (deInt32)v[1];
+ deInt32 v2 = (deInt32)v[2];
+ deInt32 v3 = (deInt32)v[3];
+ deInt32 v4 = (deInt32)v[4];
+ deInt32 v5 = (deInt32)v[5];
+ bitTransferSigned(v1, v0);
+ bitTransferSigned(v3, v2);
+ bitTransferSigned(v5, v4);
+ if (v1+v3+v5 >= 0)
+ {
+ e0 = clampedRGBA(IVec4(v0, v2, v4, 0xff));
+ e1 = clampedRGBA(IVec4(v0+v1, v2+v3, v4+v5, 0xff));
+ }
+ else
+ {
+ e0 = clampedRGBA(blueContract(v0+v1, v2+v3, v4+v5, 0xff));
+ e1 = clampedRGBA(blueContract(v0, v2, v4, 0xff));
+ }
+ break;
+ }
+ case 10:
+ {
+ e0 = UVec4((v[0]*v[3]) >> 8, (v[1]*v[3]) >> 8, (v[2]*v[3]) >> 8, v[4]);
+ e1 = UVec4(v[0], v[1], v[2], v[5]);
+ break;
+ }
+ case 11:
+ {
+ decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
+ break;
+ }
+ case 12:
+ {
+ if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4])
+ {
+ e0 = UVec4(v[0], v[2], v[4], v[6]);
+ e1 = UVec4(v[1], v[3], v[5], v[7]);
+ }
+ else
+ {
+ e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
+ e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
+ }
+ break;
+ }
+ case 13:
+ {
+ deInt32 v0 = (deInt32)v[0];
+ deInt32 v1 = (deInt32)v[1];
+ deInt32 v2 = (deInt32)v[2];
+ deInt32 v3 = (deInt32)v[3];
+ deInt32 v4 = (deInt32)v[4];
+ deInt32 v5 = (deInt32)v[5];
+ deInt32 v6 = (deInt32)v[6];
+ deInt32 v7 = (deInt32)v[7];
+ bitTransferSigned(v1, v0);
+ bitTransferSigned(v3, v2);
+ bitTransferSigned(v5, v4);
+ bitTransferSigned(v7, v6);
+ if (v1+v3+v5 >= 0)
+ {
+ e0 = clampedRGBA(IVec4(v0, v2, v4, v6));
+ e1 = clampedRGBA(IVec4(v0+v1, v2+v3, v4+v5, v6+v7));
+ }
+ else
+ {
+ e0 = clampedRGBA(blueContract(v0+v1, v2+v3, v4+v5, v6+v7));
+ e1 = clampedRGBA(blueContract(v0, v2, v4, v6));
+ }
+ break;
+ }
+ case 14:
+ decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
+ e0.w() = v[6];
+ e1.w() = v[7];
+ break;
+ case 15:
+ {
+ decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+ break;
+ }
+ default:
+ DE_ASSERT(false);
+ }
+ }
+}
+
+void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable)
+{
+ const int colorEndpointDataStart = (numPartitions == 1) ? 17 : 29;
+ ISEDecodedResult colorEndpointData[18];
+
+ {
+ BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
+ decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
+ }
+
+ {
+ deUint32 unquantizedEndpoints[18];
+ unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
+ decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
+ }
+}
+
+void unquantizeWeights (deUint32 dst[64], const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode)
+{
+ const int numWeights = computeNumWeights(blockMode);
+ const ISEParams& iseParams = blockMode.weightISEParams;
+
+ if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT))
+ {
+ const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
+
+ if ((rangeCase == 0) || (rangeCase == 1))
+ {
+ static const deUint32 map0[3] = { 0, 32, 63 };
+ static const deUint32 map1[5] = { 0, 16, 32, 47, 63 };
+ const deUint32* const map = (rangeCase == 0) ? &map0[0] : &map1[0];
+
+ for (int i = 0; i < numWeights; i++)
+ {
+ DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
+ dst[i] = map[weightGrid[i].v];
+ }
+ }
+ else
+ {
+ DE_ASSERT(rangeCase <= 6);
+ static const deUint32 Ca[5] = { 50, 28, 23, 13, 11 };
+ const deUint32 C = Ca[rangeCase-2];
+
+ for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
+ {
+ const deUint32 a = getBit(weightGrid[weightNdx].m, 0);
+ const deUint32 b = getBit(weightGrid[weightNdx].m, 1);
+ const deUint32 c = getBit(weightGrid[weightNdx].m, 2);
+
+ const deUint32 A = (a == 0) ? 0 : (1<<7)-1;
+ const deUint32 B = (rangeCase == 2) ? 0
+ : (rangeCase == 3) ? 0
+ : (rangeCase == 4) ? (b << 6) | (b << 2) | (b << 0)
+ : (rangeCase == 5) ? (b << 6) | (b << 1)
+ : (rangeCase == 6) ? (c << 6) | (b << 5) | (c << 1) | (b << 0)
+ : (deUint32)-1;
+
+ dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20);
+ }
+ }
+ }
+ else
+ {
+ DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
+ for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
+ dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
+ }
+
+ for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
+ dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
+
+ // Initialize nonexistent weights to poison values
+ for (int weightNdx = numWeights; weightNdx < 64; weightNdx++)
+ dst[weightNdx] = ~0u;
+}
+
+void interpolateWeights (TexelWeightPair* dst, const deUint32 (&unquantizedWeights) [64], int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
+{
+ const int numWeightsPerTexel = blockMode.isDualPlane ? 2 : 1;
+ const deUint32 scaleX = (1024 + blockWidth/2) / (blockWidth-1);
+ const deUint32 scaleY = (1024 + blockHeight/2) / (blockHeight-1);
+ DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= (int)DE_LENGTH_OF_ARRAY(unquantizedWeights));
+
+ for (int texelY = 0; texelY < blockHeight; texelY++)
+ {
+ for (int texelX = 0; texelX < blockWidth; texelX++)
+ {
+ const deUint32 gX = (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6;
+ const deUint32 gY = (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6;
+ const deUint32 jX = gX >> 4;
+ const deUint32 jY = gY >> 4;
+ const deUint32 fX = gX & 0xf;
+ const deUint32 fY = gY & 0xf;
+ const deUint32 w11 = (fX*fY + 8) >> 4;
+ const deUint32 w10 = fY - w11;
+ const deUint32 w01 = fX - w11;
+ const deUint32 w00 = 16 - fX - fY + w11;
+ const deUint32 i00 = jY*blockMode.weightGridWidth + jX;
+ const deUint32 i01 = i00 + 1;
+ const deUint32 i10 = i00 + blockMode.weightGridWidth;
+ const deUint32 i11 = i00 + blockMode.weightGridWidth + 1;
+
+ // These addresses can be out of bounds, but respective weights will be 0 then.
+ DE_ASSERT(deInBounds32(i00, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w00 == 0);
+ DE_ASSERT(deInBounds32(i01, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w01 == 0);
+ DE_ASSERT(deInBounds32(i10, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w10 == 0);
+ DE_ASSERT(deInBounds32(i11, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w11 == 0);
+
+ for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
+ {
+ // & 0x3f clamps address to bounds of unquantizedWeights
+ const deUint32 p00 = unquantizedWeights[(i00 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
+ const deUint32 p01 = unquantizedWeights[(i01 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
+ const deUint32 p10 = unquantizedWeights[(i10 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
+ const deUint32 p11 = unquantizedWeights[(i11 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
+
+ dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
+ }
+ }
+ }
+}
+
+void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
+{
+ ISEDecodedResult weightGrid[64];
+
+ {
+ BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
+ decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
+ }
+
+ {
+ deUint32 unquantizedWeights[64];
+ unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
+
+ interpolateWeights(dst, unquantizedWeights, blockWidth, blockHeight, blockMode);
+ }
+}
+
+inline deUint32 hash52 (deUint32 v)
+{
+ deUint32 p = v;
+ p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
+ p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
+ p ^= p << 6; p ^= p >> 17;
+ return p;
+}
+
+int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock)
+{
+ DE_ASSERT(zIn == 0);
+
+ const deUint32 x = smallBlock ? xIn << 1 : xIn;
+ const deUint32 y = smallBlock ? yIn << 1 : yIn;
+ const deUint32 z = smallBlock ? zIn << 1 : zIn;
+ const deUint32 seed = seedIn + 1024*(numPartitions-1);
+ const deUint32 rnum = hash52(seed);
+
+ deUint8 seed1 = (deUint8)( rnum & 0xf);
+ deUint8 seed2 = (deUint8)((rnum >> 4) & 0xf);
+ deUint8 seed3 = (deUint8)((rnum >> 8) & 0xf);
+ deUint8 seed4 = (deUint8)((rnum >> 12) & 0xf);
+ deUint8 seed5 = (deUint8)((rnum >> 16) & 0xf);
+ deUint8 seed6 = (deUint8)((rnum >> 20) & 0xf);
+ deUint8 seed7 = (deUint8)((rnum >> 24) & 0xf);
+ deUint8 seed8 = (deUint8)((rnum >> 28) & 0xf);
+ deUint8 seed9 = (deUint8)((rnum >> 18) & 0xf);
+ deUint8 seed10 = (deUint8)((rnum >> 22) & 0xf);
+ deUint8 seed11 = (deUint8)((rnum >> 26) & 0xf);
+ deUint8 seed12 = (deUint8)(((rnum >> 30) | (rnum << 2)) & 0xf);
+
+ seed1 = (deUint8)(seed1 * seed1 );
+ seed2 = (deUint8)(seed2 * seed2 );
+ seed3 = (deUint8)(seed3 * seed3 );
+ seed4 = (deUint8)(seed4 * seed4 );
+ seed5 = (deUint8)(seed5 * seed5 );
+ seed6 = (deUint8)(seed6 * seed6 );
+ seed7 = (deUint8)(seed7 * seed7 );
+ seed8 = (deUint8)(seed8 * seed8 );
+ seed9 = (deUint8)(seed9 * seed9 );
+ seed10 = (deUint8)(seed10 * seed10);
+ seed11 = (deUint8)(seed11 * seed11);
+ seed12 = (deUint8)(seed12 * seed12);
+
+ const int shA = (seed & 2) != 0 ? 4 : 5;
+ const int shB = numPartitions == 3 ? 6 : 5;
+ const int sh1 = (seed & 1) != 0 ? shA : shB;
+ const int sh2 = (seed & 1) != 0 ? shB : shA;
+ const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
+
+ seed1 = (deUint8)(seed1 >> sh1);
+ seed2 = (deUint8)(seed2 >> sh2);
+ seed3 = (deUint8)(seed3 >> sh1);
+ seed4 = (deUint8)(seed4 >> sh2);
+ seed5 = (deUint8)(seed5 >> sh1);
+ seed6 = (deUint8)(seed6 >> sh2);
+ seed7 = (deUint8)(seed7 >> sh1);
+ seed8 = (deUint8)(seed8 >> sh2);
+ seed9 = (deUint8)(seed9 >> sh3);
+ seed10 = (deUint8)(seed10 >> sh3);
+ seed11 = (deUint8)(seed11 >> sh3);
+ seed12 = (deUint8)(seed12 >> sh3);
+
+ const int a = 0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14));
+ const int b = 0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10));
+ const int c = (numPartitions >= 3) ? 0x3f & (seed5*x + seed6*y + seed9*z + (rnum >> 6)) : 0;
+ const int d = (numPartitions >= 4) ? 0x3f & (seed7*x + seed8*y + seed10*z + (rnum >> 2)) : 0;
+
+ return (a >= b && a >= c && a >= d) ? 0
+ : (b >= c && b >= d) ? 1
+ : (c >= d) ? 2
+ : 3;
+}
+
+DecompressResult setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed,
+ int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes)
+{
+ const bool smallBlock = blockWidth*blockHeight < 31;
+ DecompressResult result = DECOMPRESS_RESULT_VALID_BLOCK;
+ bool isHDREndpoint[4];
+
+ for (int i = 0; i < numPartitions; i++)
+ {
+ isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
+ }
+
+ for (int texelY = 0; texelY < blockHeight; texelY++)
+ {
+ for (int texelX = 0; texelX < blockWidth; texelX++)
+ {
+ const int texelNdx = texelY * blockWidth + texelX;
+ const int colorEndpointNdx = (numPartitions == 1) ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock);
+
+ DE_ASSERT(colorEndpointNdx < numPartitions);
+ const UVec4& e0 = colorEndpoints[colorEndpointNdx].e0;
+ const UVec4& e1 = colorEndpoints[colorEndpointNdx].e1;
+ const TexelWeightPair& weight = texelWeights[texelNdx];
+
+ if (isLDRMode && isHDREndpoint[colorEndpointNdx])
+ {
+ if (isSRGB)
+ {
+ ((deUint8*)dst)[texelNdx * 4 + 0] = 0xff;
+ ((deUint8*)dst)[texelNdx * 4 + 1] = 0;
+ ((deUint8*)dst)[texelNdx * 4 + 2] = 0xff;
+ ((deUint8*)dst)[texelNdx * 4 + 3] = 0xff;
+ }
+ else
+ {
+ ((float*)dst)[texelNdx * 4 + 0] = 1.0f;
+ ((float*)dst)[texelNdx * 4 + 1] = 0;
+ ((float*)dst)[texelNdx * 4 + 2] = 1.0f;
+ ((float*)dst)[texelNdx * 4 + 3] = 1.0f;
+ }
+ result = DECOMPRESS_RESULT_ERROR;
+ }
+ else
+ {
+ for (int channelNdx = 0; channelNdx < 4; channelNdx++)
+ {
+ if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR.
+ {
+ const deUint32 c0 = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
+ const deUint32 c1 = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
+ const deUint32 w = weight.w[ccs == channelNdx ? 1 : 0];
+ const deUint32 c = (c0 * (64 - w) + c1 * w + 32) / 64;
+
+ if (isSRGB)
+ ((deUint8*)dst)[texelNdx * 4 + channelNdx] = (deUint8)((c & 0xff00) >> 8);
+ else
+ ((float*)dst)[texelNdx * 4 + channelNdx] = (c == 65535) ? 1.0f : (float)c / 65536.0f;
+ }
+ else
+ {
+ DE_ASSERT(!isSRGB);
+ //DE_STATIC_ASSERT((basisu_astc::meta::TypesSame<deFloat16, deUint16>::Value));
+
+ const deUint32 c0 = e0[channelNdx] << 4;
+ const deUint32 c1 = e1[channelNdx] << 4;
+ const deUint32 w = weight.w[(ccs == channelNdx) ? 1 : 0];
+ const deUint32 c = (c0 * (64 - w) + c1 * w + 32) / 64;
+ const deUint32 e = getBits(c, 11, 15);
+ const deUint32 m = getBits(c, 0, 10);
+ const deUint32 mt = (m < 512) ? (3 * m)
+ : (m >= 1536) ? (5 * m - 2048)
+ : (4 * m - 512);
+
+ const deFloat16 cf = (deFloat16)((e << 10) + (mt >> 3));
+
+ ((float*)dst)[texelNdx * 4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
+ }
+
+ } // channelNdx
+ }
+ } // texelX
+ } // texelY
+
+ return result;
+}
+
+DecompressResult decompressBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR)
+{
+ DE_ASSERT(isLDR || !isSRGB);
+
+ // Decode block mode.
+ const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
+
+ // Check for block mode errors.
+ if (blockMode.isError)
+ {
+ setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
+ return DECOMPRESS_RESULT_ERROR;
+ }
+
+ // Separate path for void-extent.
+ if (blockMode.isVoidExtent)
+ return decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
+
+ // Compute weight grid values.
+ const int numWeights = computeNumWeights(blockMode);
+ const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
+ const int numPartitions = (int)blockData.getBits(11, 12) + 1;
+
+ // Check for errors in weight grid, partition and dual-plane parameters.
+ if ((numWeights > 64) ||
+ (numWeightDataBits > 96) ||
+ (numWeightDataBits < 24) ||
+ (blockMode.weightGridWidth > blockWidth) ||
+ (blockMode.weightGridHeight > blockHeight) ||
+ ((numPartitions == 4) && blockMode.isDualPlane))
+ {
+ setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
+ return DECOMPRESS_RESULT_ERROR;
+ }
+
+ // Compute number of bits available for color endpoint data.
+ const bool isSingleUniqueCem = (numPartitions == 1) || (blockData.getBits(23, 24) == 0);
+
+ const int numConfigDataBits = ((numPartitions == 1) ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) +
+ (blockMode.isDualPlane ? 2 : 0);
+
+ const int numBitsForColorEndpoints = 128 - numWeightDataBits - numConfigDataBits;
+
+ const int extraCemBitsStart = 127 - numWeightDataBits - (isSingleUniqueCem ? -1
+ : (numPartitions == 4) ? 7
+ : (numPartitions == 3) ? 4
+ : (numPartitions == 2) ? 1
+ : 0);
+
+ // Decode color endpoint modes.
+ deUint32 colorEndpointModes[4];
+ decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
+ const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
+
+ // Check for errors in color endpoint value count.
+ if ((numColorEndpointValues > 18) || (numBitsForColorEndpoints < (int)deDivRoundUp32(13*numColorEndpointValues, 5)))
+ {
+ setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
+ return DECOMPRESS_RESULT_ERROR;
+ }
+
+ // Compute color endpoints.
+ ColorEndpointPair colorEndpoints[4];
+ computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
+ computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints);
+
+ // Compute texel weights.
+ TexelWeightPair texelWeights[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT];
+ computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
+
+ // Set texel colors.
+ const int ccs = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1;
+ const deUint32 partitionIndexSeed = (numPartitions > 1) ? blockData.getBits(13, 22) : (deUint32)-1;
+
+ return setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
+}
+
+// Returns -1 on error, 0 if LDR, 1 if HDR
+int isHDR(const Block128& blockData, int blockWidth, int blockHeight)
+{
+ // Decode block mode.
+ const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
+
+ // Check for block mode errors.
+ if (blockMode.isError)
+ return -1;
+
+ // Separate path for void-extent.
+ if (blockMode.isVoidExtent)
+ {
+ const bool isHDRBlock = blockData.isBitSet(9);
+ return isHDRBlock ? 1 : 0;
+ }
+
+ // Compute weight grid values.
+ const int numWeights = computeNumWeights(blockMode);
+ const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
+ const int numPartitions = (int)blockData.getBits(11, 12) + 1;
+
+ // Check for errors in weight grid, partition and dual-plane parameters.
+ if ((numWeights > 64) ||
+ (numWeightDataBits > 96) ||
+ (numWeightDataBits < 24) ||
+ (blockMode.weightGridWidth > blockWidth) ||
+ (blockMode.weightGridHeight > blockHeight) ||
+ ((numPartitions == 4) && blockMode.isDualPlane))
+ {
+ return -1;
+ }
+
+ // Compute number of bits available for color endpoint data.
+ const bool isSingleUniqueCem = (numPartitions == 1) || (blockData.getBits(23, 24) == 0);
+
+ const int extraCemBitsStart = 127 - numWeightDataBits - (isSingleUniqueCem ? -1
+ : (numPartitions == 4) ? 7
+ : (numPartitions == 3) ? 4
+ : (numPartitions == 2) ? 1
+ : 0);
+
+ // Decode color endpoint modes.
+ deUint32 colorEndpointModes[4];
+ decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
+
+ for (int i = 0; i < numPartitions; i++)
+ {
+ if (isColorEndpointModeHDR(colorEndpointModes[i]))
+ return 1;
+ }
+
+ return 0;
+}
+
+typedef uint16_t half_float;
+
+half_float float_to_half(float val, bool toward_zero)
+{
+ union { float f; int32_t i; uint32_t u; } fi = { val };
+ const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
+ int s = flt_s, e = 0, m = 0;
+
+ // inf/NaN
+ if (flt_e == 0xff)
+ {
+ e = 31;
+ if (flt_m != 0) // NaN
+ m = 1;
+ }
+ // not zero or denormal
+ else if (flt_e != 0)
+ {
+ int new_exp = flt_e - 127;
+ if (new_exp > 15)
+ e = 31;
+ else if (new_exp < -14)
+ {
+ if (toward_zero)
+ m = (int)truncf((1 << 24) * fabsf(fi.f));
+ else
+ m = lrintf((1 << 24) * fabsf(fi.f));
+ }
+ else
+ {
+ e = new_exp + 15;
+ if (toward_zero)
+ m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
+ else
+ m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
+ }
+ }
+
+ assert((0 <= m) && (m <= 1024));
+ if (m == 1024)
+ {
+ e++;
+ m = 0;
+ }
+
+ assert((s >= 0) && (s <= 1));
+ assert((e >= 0) && (e <= 31));
+ assert((m >= 0) && (m <= 1023));
+
+ half_float result = (half_float)((s << 15) | (e << 10) | m);
+ return result;
+}
+
+float half_to_float(half_float hval)
+{
+ union { float f; uint32_t u; } x = { 0 };
+
+ uint32_t s = ((uint32_t)hval >> 15) & 1;
+ uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
+ uint32_t m = (uint32_t)hval & 0x3FF;
+
+ if (!e)
+ {
+ if (!m)
+ {
+ // +- 0
+ x.u = s << 31;
+ return x.f;
+ }
+ else
+ {
+ // denormalized
+ while (!(m & 0x00000400))
+ {
+ m <<= 1;
+ --e;
+ }
+
+ ++e;
+ m &= ~0x00000400;
+ }
+ }
+ else if (e == 31)
+ {
+ if (m == 0)
+ {
+ // +/- INF
+ x.u = (s << 31) | 0x7f800000;
+ return x.f;
+ }
+ else
+ {
+ // +/- NaN
+ x.u = (s << 31) | 0x7f800000 | (m << 13);
+ return x.f;
+ }
+ }
+
+ e = e + (127 - 15);
+ m = m << 13;
+
+ assert(s <= 1);
+ assert(m <= 0x7FFFFF);
+ assert(e <= 255);
+
+ x.u = m | (e << 23) | (s << 31);
+ return x.f;
+}
+
+} // anonymous
+
+// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.inline.html#_hdr_endpoint_decoding
+static void convert_to_half_prec(uint32_t n, float* pVals)
+{
+#if 0
+ const int prev_dir = fesetround(FE_TOWARDZERO);
+
+ for (uint32_t i = 0; i < n; i++)
+ pVals[i] = half_to_float(float_to_half(pVals[i]));
+
+ fesetround(prev_dir);
+
+ for (uint32_t i = 0; i < n; i++)
+ {
+ assert(pVals[i] == half_to_float(float_to_half(pVals[i], true)));
+ }
+#else
+ // This ensures the values are rounded towards zero as half floats.
+ for (uint32_t i = 0; i < n; i++)
+ {
+ pVals[i] = half_to_float(float_to_half(pVals[i], true));
+ }
+#endif
+}
+
+bool decompress_ldr(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth, int blockHeight)
+{
+ float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4];
+
+ const Block128 blockData(data);
+
+ // isSRGB is true, this writes uint8_t's. Otherwise it writes floats.
+ if (decompressBlock(isSRGB ? (void*)pDst : (void*)&linear[0], blockData, blockWidth, blockHeight, isSRGB, true) != DECOMPRESS_RESULT_VALID_BLOCK)
+ {
+ return false;
+ }
+
+ if (!isSRGB)
+ {
+ // Convert the floats to 8-bits with rounding.
+ int pix = 0;
+ for (int i = 0; i < blockHeight; i++)
+ {
+ for (int j = 0; j < blockWidth; j++, pix++)
+ {
+ pDst[4 * pix + 0] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 0] * 65536.0f + .5f), 0, 65535) >> 8);
+ pDst[4 * pix + 1] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 1] * 65536.0f + .5f), 0, 65535) >> 8);
+ pDst[4 * pix + 2] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 2] * 65536.0f + .5f), 0, 65535) >> 8);
+ pDst[4 * pix + 3] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 3] * 65536.0f + .5f), 0, 65535) >> 8);
+ }
+ }
+ }
+
+ return true;
+}
+
+bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight)
+{
+ const Block128 blockData(data);
+
+ if (decompressBlock(pDstRGBA, blockData, blockWidth, blockHeight, false, false) != DECOMPRESS_RESULT_VALID_BLOCK)
+ {
+ return false;
+ }
+
+ convert_to_half_prec(blockWidth * blockHeight * 4, pDstRGBA);
+
+ return true;
+}
+
+bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool &is_hdr)
+{
+ is_hdr = false;
+
+ const Block128 blockData(data);
+
+ int status = isHDR(blockData, blockWidth, blockHeight);
+ if (status < 0)
+ {
+ return false;
+ }
+
+ is_hdr = (status == 1);
+
+ return true;
+}
+
+} // astc
+
+} // basisu_astc
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
diff --git a/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h
new file mode 100644
index 0000000000..ad13093a6c
--- /dev/null
+++ b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h
@@ -0,0 +1,45 @@
+// File: android_astc_decomp.h
+#ifndef _TCUASTCUTIL_HPP
+#define _TCUASTCUTIL_HPP
+/*-------------------------------------------------------------------------
+ * drawElements Quality Program Tester Core
+ * ----------------------------------------
+ *
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief ASTC Utilities.
+ *//*--------------------------------------------------------------------*/
+
+#include <vector>
+#include <stdint.h>
+
+namespace basisu_astc
+{
+namespace astc
+{
+
+// Unpacks a single ASTC block to pDst
+// If isSRGB is true, the spec requires the decoder to scale the LDR 8-bit endpoints to 16-bit before interpolation slightly differently,
+// which will lead to different outputs. So be sure to set it correctly (ideally it should match whatever the encoder did).
+bool decompress_ldr(uint8_t* pDst, const uint8_t* data, bool isSRGB, int blockWidth, int blockHeight);
+bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight);
+bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool& is_hdr);
+
+} // astc
+} // basisu
+
+#endif
diff --git a/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
new file mode 100644
index 0000000000..d698a7ff87
--- /dev/null
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
@@ -0,0 +1,3310 @@
+// basisu_astc_hdr_enc.cpp
+#include "basisu_astc_hdr_enc.h"
+#include "../transcoder/basisu_transcoder.h"
+
+using namespace basist;
+
+namespace basisu
+{
+
+const float DEF_R_ERROR_SCALE = 2.0f;
+const float DEF_G_ERROR_SCALE = 3.0f;
+
+static inline uint32_t get_max_qlog(uint32_t bits)
+{
+ switch (bits)
+ {
+ case 7: return MAX_QLOG7;
+ case 8: return MAX_QLOG8;
+ case 9: return MAX_QLOG9;
+ case 10: return MAX_QLOG10;
+ case 11: return MAX_QLOG11;
+ case 12: return MAX_QLOG12;
+ case 16: return MAX_QLOG16;
+ default: assert(0); break;
+ }
+ return 0;
+}
+
+#if 0
+static inline float get_max_qlog_val(uint32_t bits)
+{
+ switch (bits)
+ {
+ case 7: return MAX_QLOG7_VAL;
+ case 8: return MAX_QLOG8_VAL;
+ case 9: return MAX_QLOG9_VAL;
+ case 10: return MAX_QLOG10_VAL;
+ case 11: return MAX_QLOG11_VAL;
+ case 12: return MAX_QLOG12_VAL;
+ case 16: return MAX_QLOG16_VAL;
+ default: assert(0); break;
+ }
+ return 0;
+}
+#endif
+
+static inline int get_bit(
+ int src_val, int src_bit)
+{
+ assert(src_bit >= 0 && src_bit <= 31);
+ int bit = (src_val >> src_bit) & 1;
+ return bit;
+}
+
+static inline void pack_bit(
+ int& dst, int dst_bit,
+ int src_val, int src_bit = 0)
+{
+ assert(dst_bit >= 0 && dst_bit <= 31);
+ int bit = get_bit(src_val, src_bit);
+ dst |= (bit << dst_bit);
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+astc_hdr_codec_options::astc_hdr_codec_options()
+{
+ init();
+}
+
+void astc_hdr_codec_options::init()
+{
+ m_bc6h_err_weight = .85f;
+ m_r_err_scale = DEF_R_ERROR_SCALE;
+ m_g_err_scale = DEF_G_ERROR_SCALE;
+
+ // Disabling by default to avoid transcoding outliers (try kodim26). The quality lost is very low. TODO: Could include the uber result in the output.
+ m_allow_uber_mode = false;
+
+ // Must set best quality level first to set defaults.
+ set_quality_best();
+
+ set_quality_level(cDefaultLevel);
+}
+
+void astc_hdr_codec_options::set_quality_best()
+{
+ m_mode11_direct_only = false;
+
+ // highest achievable quality
+ m_use_solid = true;
+
+ m_use_mode11 = true;
+ m_mode11_uber_mode = true;
+ m_first_mode11_weight_ise_range = MODE11_FIRST_ISE_RANGE;
+ m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
+ m_first_mode11_submode = -1;
+ m_last_mode11_submode = 7;
+
+ m_use_mode7_part1 = true;
+ m_first_mode7_part1_weight_ise_range = MODE7_PART1_FIRST_ISE_RANGE;
+ m_last_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE;
+
+ m_use_mode7_part2 = true;
+ m_mode7_part2_part_masks = UINT32_MAX;
+ m_first_mode7_part2_weight_ise_range = MODE7_PART2_FIRST_ISE_RANGE;
+ m_last_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE;
+
+ m_use_mode11_part2 = true;
+ m_mode11_part2_part_masks = UINT32_MAX;
+ m_first_mode11_part2_weight_ise_range = MODE11_PART2_FIRST_ISE_RANGE;
+ m_last_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE;
+
+ m_refine_weights = true;
+
+ m_use_estimated_partitions = false;
+ m_max_estimated_partitions = 0;
+}
+
+void astc_hdr_codec_options::set_quality_normal()
+{
+ m_use_solid = true;
+
+ // We'll allow uber mode in normal if the user allows it.
+ m_use_mode11 = true;
+ m_mode11_uber_mode = true;
+ m_first_mode11_weight_ise_range = 6;
+ m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
+
+ m_use_mode7_part1 = true;
+ m_first_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE;
+ m_last_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE;
+
+ m_use_mode7_part2 = true;
+ m_mode7_part2_part_masks = UINT32_MAX;
+ m_first_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE;
+ m_last_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE;
+
+ m_use_mode11_part2 = true;
+ m_mode11_part2_part_masks = UINT32_MAX;
+ m_first_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE;
+ m_last_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE;
+
+ m_refine_weights = true;
+}
+
+void astc_hdr_codec_options::set_quality_fastest()
+{
+ m_use_solid = true;
+
+ m_use_mode11 = true;
+ m_mode11_uber_mode = false;
+ m_first_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
+ m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
+
+ m_use_mode7_part1 = false;
+ m_use_mode7_part2 = false;
+ m_use_mode11_part2 = false;
+
+ m_refine_weights = false;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+void astc_hdr_codec_options::set_quality_level(int level)
+{
+ level = clamp(level, cMinLevel, cMaxLevel);
+
+ m_level = level;
+
+ switch (level)
+ {
+ case 0:
+ {
+ set_quality_fastest();
+ break;
+ }
+ case 1:
+ {
+ set_quality_normal();
+
+ m_first_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE - 1;
+ m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE;
+
+ m_use_mode7_part1 = false;
+ m_use_mode7_part2 = false;
+
+ m_use_estimated_partitions = true;
+ m_max_estimated_partitions = 1;
+
+ m_mode11_part2_part_masks = 1 | 2;
+ m_mode7_part2_part_masks = 1 | 2;
+ break;
+ }
+ case 2:
+ {
+ set_quality_normal();
+
+ m_use_estimated_partitions = true;
+ m_max_estimated_partitions = 2;
+
+ m_mode11_part2_part_masks = 1 | 2;
+ m_mode7_part2_part_masks = 1 | 2;
+
+ break;
+ }
+ case 3:
+ {
+ set_quality_best();
+
+ m_use_estimated_partitions = true;
+ m_max_estimated_partitions = 2;
+
+ m_mode11_part2_part_masks = 1 | 2 | 4 | 8;
+ m_mode7_part2_part_masks = 1 | 2 | 4 | 8;
+
+ break;
+ }
+ case 4:
+ {
+ set_quality_best();
+
+ break;
+ }
+ }
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+#if 0
+static inline half_float qlog12_to_half_slow(uint32_t qlog12)
+{
+ return qlog_to_half_slow(qlog12, 12);
+}
+#endif
+
+// max usable qlog8 value is 247, 248=inf, >=249 is nan
+// max usable qlog7 value is 123, 124=inf, >=125 is nan
+
+// To go from a smaller qlog to an larger one, shift left by X bits.
+
+//const uint32_t TOTAL_USABLE_QLOG8 = 248; // 0-247 are usable, 0=0, 247=60416.0, 246=55296.0
+
+// for qlog7's shift left by 1
+//half_float g_qlog8_to_half[256];
+//float g_qlog8_to_float[256];
+
+//half_float g_qlog12_to_half[4096];
+//float g_qlog12_to_float[4096];
+
+static half_float g_qlog16_to_half[65536];
+
+inline half_float qlog_to_half(uint32_t val, uint32_t bits)
+{
+ assert((bits >= 5) && (bits <= 16));
+ assert(val < (1U << bits));
+ return g_qlog16_to_half[val << (16 - bits)];
+}
+
+// nearest values given a positive half float value (only)
+static uint16_t g_half_to_qlog7[32768], g_half_to_qlog8[32768], g_half_to_qlog9[32768], g_half_to_qlog10[32768], g_half_to_qlog11[32768], g_half_to_qlog12[32768];
+
+const uint32_t HALF_TO_QLOG_TABS_BASE = 7;
+static uint16_t* g_pHalf_to_qlog_tabs[8] =
+{
+ g_half_to_qlog7,
+ g_half_to_qlog8,
+
+ g_half_to_qlog9,
+ g_half_to_qlog10,
+
+ g_half_to_qlog11,
+ g_half_to_qlog12
+};
+
+static inline uint32_t half_to_qlog7_12(half_float h, uint32_t bits)
+{
+ assert((bits >= HALF_TO_QLOG_TABS_BASE) && (bits <= 12));
+ assert(h < 32768);
+
+ return g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_BASE][h];
+}
+
+#if 0
+// Input is the low 11 bits of the qlog
+// Returns the 10-bit mantissa of the half float value
+static int qlog11_to_half_float_mantissa(int M)
+{
+ assert(M <= 0x7FF);
+ int Mt;
+ if (M < 512)
+ Mt = 3 * M;
+ else if (M >= 1536)
+ Mt = 5 * M - 2048;
+ else
+ Mt = 4 * M - 512;
+ return (Mt >> 3);
+}
+#endif
+
+// Input is the 10-bit mantissa of the half float value
+// Output is the 11-bit qlog value
+// Inverse of qlog11_to_half_float_mantissa()
+static inline int half_float_mantissa_to_qlog11(int hf)
+{
+ int q0 = (hf * 8 + 2) / 3;
+ int q1 = (hf * 8 + 2048 + 4) / 5;
+
+ if (q0 < 512)
+ return q0;
+ else if (q1 >= 1536)
+ return q1;
+
+ int q2 = (hf * 8 + 512 + 2) / 4;
+ return q2;
+}
+
+static inline int half_to_qlog16(int hf)
+{
+ // extract 5 bits exponent, which is carried through to qlog16 unchanged
+ const int exp = (hf >> 10) & 0x1F;
+
+ // extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless)
+ const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF);
+ assert(mantissa <= 0x7FF);
+
+ // Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights.
+ uint32_t qlog16 = (exp << 11) | mantissa;
+
+ // should be a lossless operation
+ assert(qlog16_to_half_slow(qlog16) == hf);
+
+ return qlog16;
+}
+
+static inline uint32_t quant_qlog16(uint32_t q16, uint32_t desired_bits)
+{
+ assert((desired_bits >= 7) && (desired_bits <= 12));
+ assert(q16 <= 65535);
+
+ const uint32_t shift = 16 - desired_bits;
+ uint32_t e = (q16 + (1U << (shift - 1U)) - 1U) >> shift;
+
+ uint32_t max_val = (1U << desired_bits) - 1U;
+ e = minimum<uint32_t>(e, max_val);
+
+ return e;
+}
+
+static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const basisu::vector<float> &qlog16_to_float)
+{
+ assert(bits >= 5 && bits <= 12);
+ const uint32_t max_val = (1 << bits) - 1;
+
+ // For all positive half-floats
+ for (uint32_t h = 0; h < 32768; h++)
+ {
+ // Skip invalid values
+ if (is_half_inf_or_nan((half_float)h))
+ continue;
+ const float desired_val = half_to_float((half_float)h);
+
+ float best_err = 1e+30f;
+ uint32_t best_qlog = 0;
+
+ // For all possible qlog's
+ for (uint32_t i = 0; i <= max_val; i++)
+ {
+ // Skip invalid values
+ float v = qlog16_to_float[i << (16 - bits)];
+ if (std::isnan(v))
+ continue;
+
+ // Compute error
+ float err = fabs(v - desired_val);
+
+ // Find best
+ if (err < best_err)
+ {
+ best_err = err;
+ best_qlog = i;
+ }
+ }
+
+ pTable[h] = (uint16_t)best_qlog;
+ }
+
+#if 0
+ uint32_t t = 0;
+
+ const uint32_t nb = 12;
+ int nb_shift = 16 - nb;
+
+ for (uint32_t q16 = 0; q16 < 65536; q16++)
+ {
+ half_float h = qlog16_to_half_slow(q16);
+ if (is_half_inf_or_nan(h))
+ continue;
+
+ int q7 = half_to_qlog7_12(h, nb);
+
+ uint32_t best_err = UINT32_MAX, best_l = 0;
+ for (int l = 0; l < (1 << nb); l++)
+ {
+ int dec_q16 = l << nb_shift;
+ int err = iabs(dec_q16 - q16);
+ if (err < best_err)
+ {
+ best_err = err;
+ best_l = l;
+ }
+ }
+
+ //int e = (q16 + 253) >> 9; // 345
+
+ int e = (q16 + (1 << (nb_shift - 1)) - 1) >> nb_shift; // 285
+ if (best_l != e)
+ //if (q7 != best_l)
+ {
+ printf("q16=%u, h=%u, q7=%u, e=%u, best_l=%u\n", q16, h, q7, e, best_l);
+ t++;
+ }
+ }
+
+ printf("Mismatches: %u\n", t);
+ exit(0);
+#endif
+}
+
+static void init_qlog_tables()
+{
+ basisu::vector<float> qlog16_to_float(65536);
+
+ // for all possible qlog16, compute the corresponding half float
+ for (uint32_t i = 0; i <= 65535; i++)
+ {
+ half_float h = qlog16_to_half_slow(i);
+ g_qlog16_to_half[i] = h;
+
+ qlog16_to_float[i] = half_to_float(h);
+ }
+
+ // for all possible half floats, find the nearest qlog5-12 float
+ for (uint32_t bits = HALF_TO_QLOG_TABS_BASE; bits <= 12; bits++)
+ {
+ compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_BASE], qlog16_to_float);
+ }
+}
+
+// [ise_range][0] = # levels
+// [ise_range][1...] = lerp value [0,64]
+// in ASTC order
+// Supported ISE weight ranges: 0 to 10, 11 total
+const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = 1; // ISE 1=3 levels
+const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = 10; // ISE 10=24 levels
+
+static const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][32] =
+{
+ { 0 }, // ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block)
+ { 3, 0, 32, 64 }, // 1
+ { 4, 0, 21, 43, 64 }, // 2
+ { 5, 0, 16, 32, 48, 64 }, // 3
+ { 6, 0, 64, 12, 52, 25, 39 }, // 4
+ { 8, 0, 9, 18, 27, 37, 46, 55, 64 }, // 5
+ { 10, 0, 64, 7, 57, 14, 50, 21, 43, 28, 36 }, // 6
+ { 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7
+ { 16, 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }, // 8
+ { 20, 0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35 }, // 9
+ { 24, 0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34 } // 10
+};
+
+//{ 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7
+//static const uint8_t g_weight_order_7[12] = { 0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1 };
+
+static vec3F calc_mean(uint32_t num_pixels, const vec4F* pPixels)
+{
+ vec3F mean(0.0f);
+
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ const vec4F& p = pPixels[i];
+
+ mean[0] += p[0];
+ mean[1] += p[1];
+ mean[2] += p[2];
+ }
+
+ return mean / static_cast<float>(num_pixels);
+}
+
+static vec3F calc_rgb_pca(uint32_t num_pixels, const vec4F* pPixels, const vec3F& mean_color)
+{
+ float cov[6] = { 0, 0, 0, 0, 0, 0 };
+
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ const vec4F& v = pPixels[i];
+
+ float r = v[0] - mean_color[0];
+ float g = v[1] - mean_color[1];
+ float b = v[2] - mean_color[2];
+
+ cov[0] += r * r;
+ cov[1] += r * g;
+ cov[2] += r * b;
+ cov[3] += g * g;
+ cov[4] += g * b;
+ cov[5] += b * b;
+ }
+
+ float xr = .9f, xg = 1.0f, xb = .7f;
+ for (uint32_t iter = 0; iter < 3; iter++)
+ {
+ float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
+ float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
+ float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
+
+ float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b));
+
+ if (m > 1e-10f)
+ {
+ m = 1.0f / m;
+
+ r *= m;
+ g *= m;
+ b *= m;
+ }
+
+ xr = r;
+ xg = g;
+ xb = b;
+ }
+
+ float len = xr * xr + xg * xg + xb * xb;
+
+ vec3F axis;
+ if (len < 1e-10f)
+ axis.set(0.0f);
+ else
+ {
+ len = 1.0f / sqrtf(len);
+
+ xr *= len;
+ xg *= len;
+ xb *= len;
+
+ axis.set(xr, xg, xb, 0);
+ }
+
+ if (axis.dot(axis) < .5f)
+ {
+ axis.set(1.0f, 1.0f, 1.0f, 0.0f);
+ axis.normalize_in_place();
+ }
+
+ return axis;
+}
+
+static vec3F interp_color(const vec3F& mean, const vec3F& dir, float df, const aabb3F& colorspace_box, const aabb3F& input_box, bool* pInside = nullptr)
+{
+#if 0
+ assert(mean[0] >= input_box[0][0]);
+ assert(mean[1] >= input_box[0][1]);
+ assert(mean[2] >= input_box[0][2]);
+ assert(mean[0] <= input_box[1][0]);
+ assert(mean[1] <= input_box[1][1]);
+ assert(mean[2] <= input_box[1][2]);
+#endif
+
+ if (pInside)
+ *pInside = false;
+
+ vec3F k(mean + dir * df);
+ if (colorspace_box.contains(k))
+ {
+ if (pInside)
+ *pInside = true;
+
+ return k;
+ }
+
+ // starts inside
+ vec3F s(mean);
+
+ // ends outside
+ vec3F e(mean + dir * df);
+
+ // a ray guaranteed to go from the outside to inside
+ ray3F r(e, (s - e).normalize_in_place());
+ vec3F c;
+ float t = 0.0f;
+
+ intersection::result res = intersection::ray_aabb(c, t, r, input_box);
+ if (res != intersection::cSuccess)
+ c = k;
+
+ return c;
+}
+
+// all in Q16 space, 0-65535
+static bool compute_least_squares_endpoints_rgb(
+ uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
+ vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)
+{
+ // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
+ // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
+ // I did this in matrix form first, expanded out all the ops, then optimized it a bit.
+ float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
+ float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
+ float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
+ float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
+
+ for (uint32_t i = 0; i < N; i++)
+ {
+ const uint32_t sel = pSelectors[i];
+ z00 += pSelector_weights[sel][0];
+ z10 += pSelector_weights[sel][1];
+ z11 += pSelector_weights[sel][2];
+
+ float w = pSelector_weights[sel][3];
+ q00_r += w * pColors[i][0];
+ t_r += pColors[i][0];
+
+ q00_g += w * pColors[i][1];
+ t_g += pColors[i][1];
+
+ q00_b += w * pColors[i][2];
+ t_b += pColors[i][2];
+ }
+
+ q10_r = t_r - q00_r;
+ q10_g = t_g - q00_g;
+ q10_b = t_b - q00_b;
+
+ z01 = z10;
+
+ float det = z00 * z11 - z01 * z10;
+ if (det == 0.0f)
+ return false;
+
+ det = 1.0f / det;
+
+ float iz00, iz01, iz10, iz11;
+ iz00 = z11 * det;
+ iz01 = -z01 * det;
+ iz10 = -z10 * det;
+ iz11 = z00 * det;
+
+ (*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
+ (*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
+
+ (*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
+ (*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
+
+ (*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);
+ (*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ float l = (*pXl)[c], h = (*pXh)[c];
+
+ if (input_box.get_dim(c) < .0000125f)
+ {
+ l = input_box[0][c];
+ h = input_box[1][c];
+ }
+
+ (*pXl)[c] = l;
+ (*pXh)[c] = h;
+ }
+
+ vec3F mean((*pXl + *pXh) * .5f);
+ vec3F dir(*pXh - *pXl);
+
+ float ln = dir.length();
+ if (ln)
+ {
+ dir /= ln;
+
+ float ld = (*pXl - mean).dot(dir);
+ float hd = (*pXh - mean).dot(dir);
+
+ aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));
+
+ bool was_inside1 = false;
+
+ vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);
+ if (!was_inside1)
+ *pXl = l;
+
+ bool was_inside2 = false;
+ vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);
+ if (!was_inside2)
+ *pXh = h;
+ }
+
+ pXl->clamp(0.0f, MAX_QLOG16_VAL);
+ pXh->clamp(0.0f, MAX_QLOG16_VAL);
+
+ return true;
+}
+
+static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24];
+
+static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24]; // [ise_range][astc_index] -> linear index
+static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24]; // [ise_range][linear_index] -> astc_index
+
+static void encode_astc_hdr_init()
+{
+ // Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w
+ for (uint32_t range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; range++)
+ {
+ const uint32_t num_levels = g_ise_weight_lerps[range][0];
+ assert((num_levels >= 3) && (num_levels <= 24));
+
+ for (uint32_t i = 0; i < num_levels; i++)
+ {
+ float w = g_ise_weight_lerps[range][1 + i] * (1.0f / 64.0f);
+
+ g_astc_ls_weights_ise[range][i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
+ }
+ }
+
+ for (uint32_t ise_range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; ise_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; ise_range++)
+ {
+ const uint32_t num_levels = g_ise_weight_lerps[ise_range][0];
+ assert((num_levels >= 3) && (num_levels <= 24));
+
+ uint32_t s[32];
+ for (uint32_t i = 0; i < num_levels; i++)
+ s[i] = (g_ise_weight_lerps[ise_range][1 + i] << 8) + i;
+
+ std::sort(s, s + num_levels);
+
+ for (uint32_t i = 0; i < num_levels; i++)
+ g_map_linear_to_astc_order[ise_range][i] = (uint8_t)(s[i] & 0xFF);
+
+ for (uint32_t i = 0; i < num_levels; i++)
+ g_map_astc_to_linear_order[ise_range][g_map_linear_to_astc_order[ise_range][i]] = (uint8_t)i;
+ }
+}
+
+void interpolate_qlog12_colors(
+ const int e[2][3],
+ half_float* pDecoded_half,
+ vec3F* pDecoded_float,
+ uint32_t n, uint32_t ise_weight_range)
+{
+ assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
+
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ for (uint32_t j = 0; j < 3; j++)
+ {
+ assert(in_range(e[i][j], 0, 0xFFF));
+ }
+ }
+
+ for (uint32_t i = 0; i < n; i++)
+ {
+ const int c = g_ise_weight_lerps[ise_weight_range][1 + i];
+ assert(c == (int)astc_helpers::dequant_bise_weight(i, ise_weight_range));
+
+ half_float rf, gf, bf;
+
+ {
+ uint32_t r0 = e[0][0] << 4;
+ uint32_t r1 = e[1][0] << 4;
+ int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
+ rf = qlog16_to_half_slow(ri);
+ }
+
+ {
+ uint32_t g0 = e[0][1] << 4;
+ uint32_t g1 = e[1][1] << 4;
+ int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
+ gf = qlog16_to_half_slow(gi);
+ }
+
+ {
+ uint32_t b0 = e[0][2] << 4;
+ uint32_t b1 = e[1][2] << 4;
+ int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
+ bf = qlog16_to_half_slow(bi);
+ }
+
+ if (pDecoded_half)
+ {
+ pDecoded_half[i * 3 + 0] = rf;
+ pDecoded_half[i * 3 + 1] = gf;
+ pDecoded_half[i * 3 + 2] = bf;
+ }
+
+ if (pDecoded_float)
+ {
+ pDecoded_float[i][0] = half_to_float(rf);
+ pDecoded_float[i][1] = half_to_float(gf);
+ pDecoded_float[i][2] = half_to_float(bf);
+ }
+ }
+}
+
+// decoded in ASTC order, not linear order
+// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
+bool get_astc_hdr_mode_11_block_colors(
+ const uint8_t* pEndpoints,
+ half_float* pDecoded_half,
+ vec3F* pDecoded_float,
+ uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
+{
+ assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
+
+ int e[2][3];
+ if (!decode_mode11_to_qlog12(pEndpoints, e, ise_endpoint_range))
+ return false;
+
+ interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
+
+ return true;
+}
+
+// decoded in ASTC order, not linear order
+// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
+bool get_astc_hdr_mode_7_block_colors(
+ const uint8_t* pEndpoints,
+ half_float* pDecoded_half,
+ vec3F* pDecoded_float,
+ uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
+{
+ assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
+
+ int e[2][3];
+ if (!decode_mode7_to_qlog12(pEndpoints, e, nullptr, ise_endpoint_range))
+ return false;
+
+ interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
+
+ return true;
+}
+
+// Fast high precision piecewise linear approximation of log2(bias+x).
+// Half may be zero, positive or denormal. No NaN/Inf/negative.
+static inline double q(half_float x)
+{
+ union { float f; int32_t i; uint32_t u; } fi;
+
+ fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
+
+ assert(fi.f >= 0.0f);
+
+ fi.f += .125f;
+
+ return (double)fi.u; // approx log2f(fi.f), need to return double for the precision
+}
+
+double eval_selectors(
+ uint32_t num_pixels,
+ uint8_t* pWeights,
+ const half_float* pBlock_pixels_half,
+ uint32_t num_weight_levels,
+ const half_float* pDecoded_half,
+ const astc_hdr_codec_options& coptions,
+ uint32_t usable_selector_bitmask)
+{
+ assert((num_pixels >= 1) && (num_pixels <= 16));
+ assert(usable_selector_bitmask);
+
+ const float R_WEIGHT = coptions.m_r_err_scale;
+ const float G_WEIGHT = coptions.m_g_err_scale;
+
+ double total_error = 0;
+
+#ifdef _DEBUG
+ for (uint32_t i = 0; i < num_weight_levels; i++)
+ {
+ assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
+ assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
+ assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
+ }
+#endif
+
+ for (uint32_t p = 0; p < num_pixels; p++)
+ {
+ const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
+
+ double lowest_e = 1e+30f;
+
+ // this is an approximation of MSLE
+ for (uint32_t i = 0; i < num_weight_levels; i++)
+ {
+ if (((1 << i) & usable_selector_bitmask) == 0)
+ continue;
+
+ // compute piecewise linear approximation of log2(a+eps)-log2(b+eps), for each component, then MSLE
+ double rd = q(pDecoded_half[i * 3 + 0]) - q(pDesired_half[0]);
+ double gd = q(pDecoded_half[i * 3 + 1]) - q(pDesired_half[1]);
+ double bd = q(pDecoded_half[i * 3 + 2]) - q(pDesired_half[2]);
+
+ double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
+
+ if (e < lowest_e)
+ {
+ lowest_e = e;
+ pWeights[p] = (uint8_t)i;
+ }
+ }
+
+ total_error += lowest_e;
+
+ } // p
+
+ return total_error;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+double compute_block_error(const half_float* pOrig_block, const half_float* pPacked_block, const astc_hdr_codec_options& coptions)
+{
+ const float R_WEIGHT = coptions.m_r_err_scale;
+ const float G_WEIGHT = coptions.m_g_err_scale;
+
+ double total_error = 0;
+
+ for (uint32_t p = 0; p < 16; p++)
+ {
+ double rd = q(pOrig_block[p * 3 + 0]) - q(pPacked_block[p * 3 + 0]);
+ double gd = q(pOrig_block[p * 3 + 1]) - q(pPacked_block[p * 3 + 1]);
+ double bd = q(pOrig_block[p * 3 + 2]) - q(pPacked_block[p * 3 + 2]);
+
+ double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
+
+ total_error += e;
+ }
+
+ return total_error;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static inline int compute_clamped_val(int v, int l, int h, bool& did_clamp, int& max_clamp_mag)
+{
+ assert(l < h);
+
+ if (v < l)
+ {
+ max_clamp_mag = basisu::maximum<int>(max_clamp_mag, l - v);
+
+ v = l;
+ did_clamp = true;
+ }
+ else if (v > h)
+ {
+ max_clamp_mag = basisu::maximum<int>(max_clamp_mag, v - h);
+
+ v = h;
+ did_clamp = true;
+ }
+
+ return v;
+}
+
+static bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag)
+{
+ assert(submode <= 7);
+
+ const uint8_t s_b_bits[8] = { 7, 8, 6, 7, 8, 6, 7, 6 };
+ const uint8_t s_c_bits[8] = { 6, 6, 7, 7, 6, 7, 7, 7 };
+ const uint8_t s_d_bits[8] = { 7, 6, 7, 6, 5, 6, 5, 6 };
+
+ const uint32_t a_bits = 9 + (submode >> 1);
+ const uint32_t b_bits = s_b_bits[submode];
+ const uint32_t c_bits = s_c_bits[submode];
+ const uint32_t d_bits = s_d_bits[submode];
+
+ const int max_a_val = (1 << a_bits) - 1;
+ const int max_b_val = (1 << b_bits) - 1;
+ const int max_c_val = (1 << c_bits) - 1;
+
+ // The maximum usable value before it turns to NaN/Inf
+ const int max_a_qlog = get_max_qlog(a_bits);
+
+ const int min_d_val = -(1 << (d_bits - 1));
+ const int max_d_val = -min_d_val - 1;
+ assert((max_d_val - min_d_val + 1) == (1 << d_bits));
+
+ int val_q[2][3];
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+#if 1
+ // this is better
+ const half_float l = qlog16_to_half_slow((uint32_t)std::round(low_q16[c]));
+ val_q[0][c] = half_to_qlog7_12(l, a_bits);
+
+ const half_float h = qlog16_to_half_slow((uint32_t)std::round(high_q16[c]));
+ val_q[1][c] = half_to_qlog7_12(h, a_bits);
+#else
+ val_q[0][c] = quant_qlog16((uint32_t)std::round(low_q16[c]), a_bits);
+ val_q[1][c] = quant_qlog16((uint32_t)std::round(high_q16[c]), a_bits);
+#endif
+
+#if 1
+ if (val_q[0][c] == val_q[1][c])
+ {
+#if 0
+ if (l <= h)
+#else
+ if (low_q16[c] < high_q16[c])
+#endif
+ {
+ if (val_q[0][c])
+ val_q[0][c]--;
+
+ if (val_q[1][c] != max_a_val)
+ val_q[1][c]++;
+ }
+ else
+ {
+ if (val_q[0][c] != max_a_val)
+ val_q[0][c]++;
+
+ if (val_q[1][c])
+ val_q[1][c]--;
+ }
+ }
+#endif
+
+ val_q[0][c] = minimum<uint32_t>(val_q[0][c], max_a_qlog);
+ val_q[1][c] = minimum<uint32_t>(val_q[1][c], max_a_qlog);
+ }
+
+ int highest_q = -1, highest_val = 0, highest_comp = 0;
+
+ for (uint32_t v = 0; v < 2; v++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ assert(val_q[v][c] >= 0 && val_q[v][c] <= max_a_val);
+
+ if (val_q[v][c] > highest_q)
+ {
+ highest_q = val_q[v][c];
+ highest_val = v;
+ highest_comp = c;
+ }
+ }
+ }
+
+ const bool had_tie = (val_q[highest_val ^ 1][highest_comp] == highest_q);
+
+ if (highest_val != 1)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(val_q[0][c], val_q[1][c]);
+ }
+ }
+
+ if (highest_comp)
+ {
+ std::swap(val_q[0][0], val_q[0][highest_comp]);
+ std::swap(val_q[1][0], val_q[1][highest_comp]);
+ }
+
+ int orig_q[2][3];
+ memcpy(orig_q, val_q, sizeof(val_q));
+
+ // val[1][0] is now guaranteed to be highest
+ int best_va = 0, best_vb0 = 0, best_vb1 = 0, best_vc = 0, best_vd0 = 0, best_vd1 = 0;
+ int best_max_clamp_mag = 0;
+ bool best_did_clamp = false;
+ int best_q[2][3] = { { 0, 0, 0}, { 0, 0, 0 } };
+ BASISU_NOTE_UNUSED(best_q);
+ uint32_t best_dist = UINT_MAX;
+
+ for (uint32_t pass = 0; pass < 2; pass++)
+ {
+ int trial_va = val_q[1][0];
+
+ assert(trial_va <= max_a_val);
+ assert(trial_va >= val_q[1][1]);
+ assert(trial_va >= val_q[1][2]);
+
+ assert(trial_va >= val_q[0][0]);
+ assert(trial_va >= val_q[0][1]);
+ assert(trial_va >= val_q[0][2]);
+
+ bool did_clamp = false;
+ int trial_max_clamp_mag = 0;
+
+ int trial_vb0 = compute_clamped_val(trial_va - val_q[1][1], 0, max_b_val, did_clamp, trial_max_clamp_mag);
+ int trial_vb1 = compute_clamped_val(trial_va - val_q[1][2], 0, max_b_val, did_clamp, trial_max_clamp_mag);
+ int trial_vc = compute_clamped_val(trial_va - val_q[0][0], 0, max_c_val, did_clamp, trial_max_clamp_mag);
+ int trial_vd0 = compute_clamped_val((trial_va - trial_vb0 - trial_vc) - val_q[0][1], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
+ int trial_vd1 = compute_clamped_val((trial_va - trial_vb1 - trial_vc) - val_q[0][2], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
+
+ if (!did_clamp)
+ {
+ // Make sure decoder gets the expected values
+ assert(trial_va == val_q[1][0]);
+ assert(trial_va - trial_vb0 == val_q[1][1]);
+ assert(trial_va - trial_vb1 == val_q[1][2]);
+
+ assert((trial_va - trial_vc) == val_q[0][0]);
+ assert((trial_va - trial_vb0 - trial_vc - trial_vd0) == val_q[0][1]);
+ assert((trial_va - trial_vb1 - trial_vc - trial_vd1) == val_q[0][2]);
+ }
+
+ const int r_e0 = clamp<int>(trial_va, 0, max_a_val);
+ const int r_e1 = clamp<int>(trial_va - trial_vb0, 0, max_a_val);
+ const int r_e2 = clamp<int>(trial_va - trial_vb1, 0, max_a_val);
+
+ const int r_f0 = clamp<int>(trial_va - trial_vc, 0, max_a_val);
+ const int r_f1 = clamp<int>(trial_va - trial_vb0 - trial_vc - trial_vd0, 0, max_a_val);
+ const int r_f2 = clamp<int>(trial_va - trial_vb1 - trial_vc - trial_vd1, 0, max_a_val);
+
+ assert(r_e0 <= max_a_qlog);
+ assert(r_e1 <= max_a_qlog);
+ assert(r_e2 <= max_a_qlog);
+
+ assert(r_f0 <= max_a_qlog);
+ assert(r_f1 <= max_a_qlog);
+ assert(r_f2 <= max_a_qlog);
+
+ if ((!did_clamp) || (!had_tie))
+ {
+ best_va = trial_va;
+ best_vb0 = trial_vb0;
+ best_vb1 = trial_vb1;
+ best_vc = trial_vc;
+ best_vd0 = trial_vd0;
+ best_vd1 = trial_vd1;
+ best_max_clamp_mag = trial_max_clamp_mag;
+ best_did_clamp = did_clamp;
+
+ best_q[1][0] = r_e0;
+ best_q[1][1] = r_e1;
+ best_q[1][2] = r_e2;
+ best_q[0][0] = r_f0;
+ best_q[0][1] = r_f1;
+ best_q[0][2] = r_f2;
+ break;
+ }
+
+ // we had a tie and it did clamp, try swapping L/H for a potential slight gain
+
+ const uint32_t r_dist1 = basisu::square<int>(r_e0 - val_q[1][0]) + basisu::square<int>(r_e1 - val_q[1][1]) + basisu::square<int>(r_e2 - val_q[1][2]);
+ const uint32_t r_dist0 = basisu::square<int>(r_f0 - val_q[0][0]) + basisu::square<int>(r_f1 - val_q[0][1]) + basisu::square<int>(r_f2 - val_q[0][2]);
+
+ const uint32_t total_dist = r_dist1 + r_dist0;
+
+ if (total_dist < best_dist)
+ {
+ best_dist = total_dist;
+
+ best_va = trial_va;
+ best_vb0 = trial_vb0;
+ best_vb1 = trial_vb1;
+ best_vc = trial_vc;
+ best_vd0 = trial_vd0;
+ best_vd1 = trial_vd1;
+ best_did_clamp = did_clamp;
+
+ best_q[1][0] = r_e0;
+ best_q[1][1] = r_e1;
+ best_q[1][2] = r_e2;
+ best_q[0][0] = r_f0;
+ best_q[0][1] = r_f1;
+ best_q[0][2] = r_f2;
+ }
+
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(val_q[0][c], val_q[1][c]);
+ }
+
+ // pack bits now
+ int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;
+
+ int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;
+ switch (submode)
+ {
+ case 0:
+ x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
+ break;
+ case 1:
+ x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
+ break;
+ case 2:
+ x0 = get_bit(best_va, 9); x1 = get_bit(best_vc, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
+ break;
+ case 3:
+ x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 9); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
+ break;
+ case 4:
+ x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
+ break;
+ case 5:
+ x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_vc, 7); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
+ break;
+ case 6:
+ x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
+ break;
+ case 7:
+ x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
+ break;
+ default:
+ break;
+ }
+
+ // write mode
+ pack_bit(v1, 7, submode, 0);
+ pack_bit(v2, 7, submode, 1);
+ pack_bit(v3, 7, submode, 2);
+
+ // highest component
+ pack_bit(v4, 7, highest_comp, 0);
+ pack_bit(v5, 7, highest_comp, 1);
+
+ // write bit 8 of va
+ pack_bit(v1, 6, best_va, 8);
+
+ // extra bits
+ pack_bit(v2, 6, x0);
+ pack_bit(v3, 6, x1);
+ pack_bit(v4, 6, x2);
+ pack_bit(v5, 6, x3);
+ pack_bit(v4, 5, x4);
+ pack_bit(v5, 5, x5);
+
+ v0 = best_va & 0xFF;
+ v1 |= (best_vc & 63);
+ v2 |= (best_vb0 & 63);
+ v3 |= (best_vb1 & 63);
+ v4 |= (best_vd0 & 31);
+ v5 |= (best_vd1 & 31);
+
+ assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));
+
+ pEndpoints[0] = (uint8_t)v0;
+ pEndpoints[1] = (uint8_t)v1;
+ pEndpoints[2] = (uint8_t)v2;
+ pEndpoints[3] = (uint8_t)v3;
+ pEndpoints[4] = (uint8_t)v4;
+ pEndpoints[5] = (uint8_t)v5;
+
+#ifdef _DEBUG
+ // Test for valid pack by unpacking
+ {
+ if (highest_comp)
+ {
+ std::swap(best_q[0][0], best_q[0][highest_comp]);
+ std::swap(best_q[1][0], best_q[1][highest_comp]);
+
+ std::swap(orig_q[0][0], orig_q[0][highest_comp]);
+ std::swap(orig_q[1][0], orig_q[1][highest_comp]);
+ }
+
+ int test_e[2][3];
+ decode_mode11_to_qlog12(pEndpoints, test_e, astc_helpers::BISE_256_LEVELS);
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ for (uint32_t j = 0; j < 3; j++)
+ {
+ assert(best_q[i][j] == test_e[i][j] >> (12 - a_bits));
+
+ if (!best_did_clamp)
+ {
+ assert((orig_q[i][j] == test_e[i][j] >> (12 - a_bits)) ||
+ (orig_q[1 - i][j] == test_e[i][j] >> (12 - a_bits)));
+ }
+ }
+ }
+ }
+#endif
+
+ max_clamp_mag = best_max_clamp_mag;
+
+ return best_did_clamp;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static void pack_astc_mode11_direct(uint8_t* pEndpoints, const vec3F& l_q16, const vec3F& h_q16)
+{
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ // TODO: This goes from QLOG16->HALF->QLOG8/7
+ half_float l_half = qlog16_to_half_slow(clamp((int)std::round(l_q16[i]), 0, 65535));
+ half_float h_half = qlog16_to_half_slow(clamp((int)std::round(h_q16[i]), 0, 65535));
+
+ int l_q, h_q;
+
+ if (i == 2)
+ {
+ l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)];
+ h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)];
+
+ l_q = minimum<uint32_t>(l_q, MAX_QLOG7);
+ h_q = minimum<uint32_t>(h_q, MAX_QLOG7);
+ }
+ else
+ {
+ l_q = g_half_to_qlog8[bounds_check((uint32_t)l_half, 0U, 32768U)];
+ h_q = g_half_to_qlog8[bounds_check((uint32_t)h_half, 0U, 32768U)];
+
+ l_q = minimum<uint32_t>(l_q, MAX_QLOG8);
+ h_q = minimum<uint32_t>(h_q, MAX_QLOG8);
+ }
+
+#if 1
+ if (l_q == h_q)
+ {
+ const int m = (i == 2) ? MAX_QLOG7 : MAX_QLOG8;
+
+ if (l_q16[i] <= h_q16[i])
+ {
+ if (l_q)
+ l_q--;
+
+ if (h_q != m)
+ h_q++;
+ }
+ else
+ {
+ if (h_q)
+ h_q--;
+
+ if (l_q != m)
+ l_q++;
+ }
+ }
+#endif
+
+ if (i == 2)
+ {
+ assert(l_q <= (int)MAX_QLOG7 && h_q <= (int)MAX_QLOG7);
+ l_q |= 128;
+ h_q |= 128;
+ }
+ else
+ {
+ assert(l_q <= (int)MAX_QLOG8 && h_q <= (int)MAX_QLOG8);
+ }
+
+ pEndpoints[2 * i + 0] = (uint8_t)l_q;
+ pEndpoints[2 * i + 1] = (uint8_t)h_q;
+ }
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range)
+{
+ assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
+
+ assert(submode <= 5);
+ max_clamp_mag = 0;
+
+ static const uint8_t s_r_bits[6] = { 11, 11, 10, 9, 8, 7 };
+ static const uint8_t s_g_b_bits[6] = { 5, 6, 5, 6, 7, 7 };
+ static const uint8_t s_s_bits[6] = { 7, 5, 8, 7, 6, 7 };
+
+ // The precision of the components
+ const uint32_t prec_bits = s_r_bits[submode];
+
+ int qlog[4], pack_bits[4];
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ const float f = (i == 3) ? s_q16 : rgb_q16[i];
+
+ // The # of bits the component is packed into
+ if (i == 0)
+ pack_bits[i] = s_r_bits[submode];
+ else if (i == 3)
+ pack_bits[i] = s_s_bits[submode];
+ else
+ pack_bits[i] = s_g_b_bits[submode];
+
+#if 0
+ // this is slightly worse
+ // TODO: going from qlog16 to half loses some precision. Then going from half to qlog 7-12 will have extra error.
+ half_float h = qlog_to_half(clamp((int)std::round(f), 0, MAX_QLOG16), 16);
+ qlog[i] = half_to_qlog7_12((half_float)bounds_check((uint32_t)h, 0U, 32768U), prec_bits);
+#else
+ qlog[i] = quant_qlog16(clamp<int>((int)std::round(f), 0, MAX_QLOG16), prec_bits);
+
+ // Only bias if there are enough texel weights, 4=6 weights
+ if (ise_weight_range >= 4)
+ {
+ // Explictly bias the high color, and the scale up, to better exploit the weights.
+ // The quantized range also then encompases the complete input range.
+ const uint32_t max_val = (1 << prec_bits) - 1;
+ const uint32_t K = 3;
+ if (i == 3)
+ {
+ qlog[i] = minimum<uint32_t>(qlog[i] + K * 2, max_val);
+ }
+ else
+ {
+ qlog[i] = minimum<uint32_t>(qlog[i] + K, max_val);
+ }
+ }
+#endif
+
+ if (i != 3)
+ qlog[i] = minimum<uint32_t>(qlog[i], get_max_qlog(prec_bits));
+
+ // If S=0, we lose freedom for the texel weights to add any value.
+ if ((i == 3) && (qlog[i] == 0))
+ qlog[i] = 1;
+ }
+
+ uint32_t maj_index = 0;
+
+ bool did_clamp = false;
+
+ if (submode != 5)
+ {
+ int largest_qlog = 0;
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ if (qlog[i] > largest_qlog)
+ {
+ largest_qlog = qlog[i];
+ maj_index = i;
+ }
+ }
+
+ if (maj_index)
+ {
+ std::swap(qlog[0], qlog[maj_index]);
+ }
+
+ assert(qlog[0] >= qlog[1]);
+ assert(qlog[0] >= qlog[2]);
+
+ qlog[1] = qlog[0] - qlog[1];
+ qlog[2] = qlog[0] - qlog[2];
+
+ for (uint32_t i = 1; i < 4; i++)
+ {
+ const int max_val = (1 << pack_bits[i]) - 1;
+
+ if (qlog[i] > max_val)
+ {
+ max_clamp_mag = maximum<int>(max_clamp_mag, qlog[i] - max_val);
+ qlog[i] = max_val;
+ did_clamp = true;
+ }
+ }
+ }
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ const int max_val = (1 << pack_bits[i]) - 1; (void)max_val;
+
+ assert(qlog[i] <= max_val);
+ }
+
+ int mode = 0;
+
+ int r = qlog[0] & 63; // 6-bits
+ int g = qlog[1] & 31; // 5-bits
+ int b = qlog[2] & 31; // 5-bits
+ int s = qlog[3] & 31; // 5-bits
+
+ int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0;
+
+ switch (submode)
+ {
+ case 0:
+ {
+ mode = (maj_index << 2) | 0;
+ assert((mode & 0xC) != 0xC);
+
+ x0 = get_bit(qlog[0], 9); // R9
+ x1 = get_bit(qlog[0], 8); // R8
+ x2 = get_bit(qlog[0], 7); // R7
+ x3 = get_bit(qlog[0], 10); // R10
+ x4 = get_bit(qlog[0], 6); // R6
+ x5 = get_bit(qlog[3], 6); // S6
+ x6 = get_bit(qlog[3], 5); // S5
+ break;
+ }
+ case 1:
+ {
+ mode = (maj_index << 2) | 1;
+ assert((mode & 0xC) != 0xC);
+
+ x0 = get_bit(qlog[0], 8); // R8
+ x1 = get_bit(qlog[1], 5); // G5
+ x2 = get_bit(qlog[0], 7); // R7
+ x3 = get_bit(qlog[2], 5); // B5
+ x4 = get_bit(qlog[0], 6); // R6
+ x5 = get_bit(qlog[0], 10); // R10
+ x6 = get_bit(qlog[0], 9); // R9
+ break;
+ }
+ case 2:
+ {
+ mode = (maj_index << 2) | 2;
+ assert((mode & 0xC) != 0xC);
+
+ x0 = get_bit(qlog[0], 9); // R9
+ x1 = get_bit(qlog[0], 8); // R8
+ x2 = get_bit(qlog[0], 7); // R7
+ x3 = get_bit(qlog[0], 6); // R6
+ x4 = get_bit(qlog[3], 7); // S7
+ x5 = get_bit(qlog[3], 6); // S6
+ x6 = get_bit(qlog[3], 5); // S5
+ break;
+ }
+ case 3:
+ {
+ mode = (maj_index << 2) | 3;
+ assert((mode & 0xC) != 0xC);
+
+ x0 = get_bit(qlog[0], 8); // R8
+ x1 = get_bit(qlog[1], 5); // G5
+ x2 = get_bit(qlog[0], 7); // R7
+ x3 = get_bit(qlog[2], 5); // B5
+ x4 = get_bit(qlog[0], 6); // R6
+ x5 = get_bit(qlog[3], 6); // S6
+ x6 = get_bit(qlog[3], 5); // S5
+ break;
+ }
+ case 4:
+ {
+ mode = maj_index | 0xC; // 0b1100
+ assert((mode & 0xC) == 0xC);
+ assert(mode != 0xF);
+
+ x0 = get_bit(qlog[1], 6); // G6
+ x1 = get_bit(qlog[1], 5); // G5
+ x2 = get_bit(qlog[2], 6); // B6
+ x3 = get_bit(qlog[2], 5); // B5
+ x4 = get_bit(qlog[0], 6); // R6
+ x5 = get_bit(qlog[0], 7); // R7
+ x6 = get_bit(qlog[3], 5); // S5
+ break;
+ }
+ case 5:
+ {
+ mode = 0xF;
+
+ x0 = get_bit(qlog[1], 6); // G6
+ x1 = get_bit(qlog[1], 5); // G5
+ x2 = get_bit(qlog[2], 6); // B6
+ x3 = get_bit(qlog[2], 5); // B5
+ x4 = get_bit(qlog[0], 6); // R6
+ x5 = get_bit(qlog[3], 6); // S6
+ x6 = get_bit(qlog[3], 5); // S5
+ break;
+ }
+ default:
+ {
+ assert(0);
+ break;
+ }
+ }
+
+ pEndpoints[0] = (uint8_t)((get_bit(mode, 1) << 7) | (get_bit(mode, 0) << 6) | r);
+ pEndpoints[1] = (uint8_t)((get_bit(mode, 2) << 7) | (x0 << 6) | (x1 << 5) | g);
+ pEndpoints[2] = (uint8_t)((get_bit(mode, 3) << 7) | (x2 << 6) | (x3 << 5) | b);
+ pEndpoints[3] = (uint8_t)((x4 << 7) | (x5 << 6) | (x6 << 5) | s);
+
+#ifdef _DEBUG
+ // Test for valid pack by unpacking
+ {
+ const int inv_shift = 12 - prec_bits;
+
+ int unpacked_e[2][3];
+ if (submode != 5)
+ {
+ unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
+ unpacked_e[1][1] = clamp(left_shift32((qlog[0] - qlog[1]), inv_shift), 0, 0xFFF);
+ unpacked_e[1][2] = clamp(left_shift32((qlog[0] - qlog[2]), inv_shift), 0, 0xFFF);
+
+ unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
+ unpacked_e[0][1] = clamp(left_shift32(((qlog[0] - qlog[1]) - qlog[3]), inv_shift), 0, 0xFFF);
+ unpacked_e[0][2] = clamp(left_shift32(((qlog[0] - qlog[2]) - qlog[3]), inv_shift), 0, 0xFFF);
+ }
+ else
+ {
+ unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
+ unpacked_e[1][1] = left_shift32(qlog[1], inv_shift);
+ unpacked_e[1][2] = left_shift32(qlog[2], inv_shift);
+
+ unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
+ unpacked_e[0][1] = clamp(left_shift32((qlog[1] - qlog[3]), inv_shift), 0, 0xFFF);
+ unpacked_e[0][2] = clamp(left_shift32((qlog[2] - qlog[3]), inv_shift), 0, 0xFFF);
+ }
+
+ if (maj_index)
+ {
+ std::swap(unpacked_e[0][0], unpacked_e[0][maj_index]);
+ std::swap(unpacked_e[1][0], unpacked_e[1][maj_index]);
+ }
+
+ int e[2][3];
+ decode_mode7_to_qlog12_ise20(pEndpoints, e, nullptr);
+
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ assert(unpacked_e[0][i] == e[0][i]);
+ assert(unpacked_e[1][i] == e[1][i]);
+ }
+ }
+#endif
+
+ return did_clamp;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static void quantize_ise_endpoints(uint32_t ise_endpoint_range, const uint8_t* pSrc_endpoints, uint8_t *pDst_endpoints, uint32_t n)
+{
+ assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ memcpy(pDst_endpoints, pSrc_endpoints, n);
+ }
+ else
+ {
+ for (uint32_t i = 0; i < n; i++)
+ {
+ uint32_t v = pSrc_endpoints[i];
+ assert(v <= 255);
+
+ pDst_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_val_to_ise[v];
+ }
+ }
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+// Note this could fail to find any valid solution if use_endpoint_range!=20.
+// Returns true if improved.
+static bool try_mode11(uint32_t num_pixels,
+ uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
+ vec3F& low_color_q16, const vec3F& high_color_q16,
+ half_float block_pixels_half[16][3],
+ uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
+ bool constrain_ise_weight8_selectors,
+ int32_t first_submode, int32_t last_submode) // -1, 7
+{
+ assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
+ assert((num_weight_levels >= 3) && (num_weight_levels <= 32));
+ assert((num_pixels >= 1) && (num_pixels <= 16));
+
+ bool improved_flag = false;
+
+ half_float decoded_half[32][3];
+ vec3F decoded_float[32];
+ uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16];
+
+ if (direct_only)
+ {
+ first_submode = -1;
+ last_submode = -1;
+ }
+
+ assert(first_submode <= last_submode);
+ assert((first_submode >= -1) && (first_submode <= 7));
+ assert((last_submode >= -1) && (last_submode <= 7));
+
+ // TODO: First determine if a submode doesn't clamp first. If one is found, encode to that and we're done.
+ for (int submode = last_submode; submode >= first_submode; submode--)
+ {
+ bool did_clamp = false;
+ int max_clamp_mag = 0;
+ if (submode == -1)
+ {
+ // If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
+ pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
+ }
+ else
+ {
+ did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag);
+
+ // If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
+ const int MAX_CLAMP_MAG_ACCEPT_THRESH = 4;
+ if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
+ continue;
+ }
+
+ // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
+ // It could massively distort the endpoints, but still result in a valid encoding.
+ quantize_ise_endpoints(ise_endpoint_range, orig_trial_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
+
+ if (!get_astc_hdr_mode_11_block_colors(trial_endpoints, &decoded_half[0][0], decoded_float, num_weight_levels, ise_weight_range, ise_endpoint_range))
+ continue;
+
+ uint32_t usable_selector_bitmask = UINT32_MAX;
+ if ((constrain_ise_weight8_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))
+ usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);
+
+ double trial_blk_error = eval_selectors(num_pixels, trial_weights, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);
+ if (trial_blk_error < cur_block_error)
+ {
+ cur_block_error = trial_blk_error;
+ memcpy(pEndpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
+ memcpy(pWeights, trial_weights, num_pixels);
+ submode_used = submode + 1;
+ improved_flag = true;
+ }
+
+ // If it didn't clamp it was a lossless encode at this precision, so we can stop early as there's probably no use trying lower precision submodes.
+ // (Although it may be, because a lower precision pack could try nearby voxel coords.)
+ // However, at lower levels quantization may cause the decoded endpoints to be very distorted, so we need to evaluate up to direct.
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ if (!did_clamp)
+ break;
+ }
+ }
+
+ return improved_flag;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static bool try_mode7(
+ uint32_t num_pixels,
+ uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
+ vec3F& high_color_q16, const float s_q16,
+ half_float block_pixels_half[16][3],
+ uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_options& coptions,
+ uint32_t ise_endpoint_range)
+{
+ assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
+ assert((num_pixels >= 1) && (num_pixels <= 16));
+
+ bool improved_flag = false;
+
+ half_float decoded_half[24][3];
+ vec3F decoded_float[24];
+
+ uint8_t orig_trial_endpoints[NUM_MODE7_ENDPOINTS], trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16];
+
+ // TODO: First determine if a submode doesn't clamp first. If one is found, encode to that and we're done.
+ for (int submode = 0; submode <= 5; submode++)
+ {
+ int max_clamp_mag = 0;
+ const bool did_clamp = pack_astc_mode7_submode(submode, orig_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range);
+
+ if (submode < 5)
+ {
+ const int MAX_CLAMP_MAG_ACCEPT_THRESH = 4;
+ if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
+ continue;
+ }
+
+ // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
+ // It could massively distort the endpoints, but still result in a valid encoding.
+ quantize_ise_endpoints(ise_endpoint_range, orig_trial_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
+
+ if (!get_astc_hdr_mode_7_block_colors(trial_endpoints, &decoded_half[0][0], decoded_float, num_weight_levels, ise_weight_range, ise_endpoint_range))
+ continue;
+
+ double trial_blk_error = eval_selectors(num_pixels, trial_weights, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions);
+ if (trial_blk_error < cur_block_error)
+ {
+ cur_block_error = trial_blk_error;
+ memcpy(pEndpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
+ memcpy(pWeights, trial_weights, num_pixels);
+ submode_used = submode;
+ improved_flag = true;
+ }
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ if (!did_clamp)
+ break;
+ }
+ }
+
+ return improved_flag;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static double encode_astc_hdr_block_mode_11(
+ uint32_t num_pixels,
+ const vec4F* pBlock_pixels,
+ uint32_t ise_weight_range,
+ uint32_t& best_submode,
+ double cur_block_error,
+ uint8_t* blk_endpoints, uint8_t* blk_weights,
+ const astc_hdr_codec_options& coptions,
+ bool direct_only,
+ uint32_t ise_endpoint_range,
+ bool uber_mode,
+ bool constrain_ise_weight8_selectors,
+ int32_t first_submode, int32_t last_submode)
+{
+ assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
+ assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
+ assert((num_pixels >= 1) && (num_pixels <= 16));
+
+ best_submode = 0;
+
+ half_float block_pixels_half[16][3];
+ vec4F block_pixels_q16[16];
+
+ // TODO: This is done redundantly.
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]);
+ block_pixels_q16[i][0] = (float)half_to_qlog16(block_pixels_half[i][0]);
+
+ block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]);
+ block_pixels_q16[i][1] = (float)half_to_qlog16(block_pixels_half[i][1]);
+
+ block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]);
+ block_pixels_q16[i][2] = (float)half_to_qlog16(block_pixels_half[i][2]);
+
+ block_pixels_q16[i][3] = 0.0f;
+ }
+
+ const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
+
+ // TODO: should match MAX_SUPPORTED_ISE_WEIGHT_INDEX
+ const uint32_t MAX_WEIGHT_LEVELS = 32;
+ (void)MAX_WEIGHT_LEVELS;
+ assert(num_weight_levels <= MAX_WEIGHT_LEVELS);
+
+ vec3F block_mean_color_q16(calc_mean(num_pixels, block_pixels_q16));
+ vec3F block_axis_q16(calc_rgb_pca(num_pixels, block_pixels_q16, block_mean_color_q16));
+
+ aabb3F color_box_q16(cInitExpand);
+
+ float l = 1e+30f, h = -1e+30f;
+ vec3F low_color_q16, high_color_q16;
+
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ color_box_q16.expand(block_pixels_q16[i]);
+
+ vec3F k(vec3F(block_pixels_q16[i]) - block_mean_color_q16);
+ float kd = k.dot(block_axis_q16);
+
+ if (kd < l)
+ {
+ l = kd;
+ low_color_q16 = block_pixels_q16[i];
+ }
+
+ if (kd > h)
+ {
+ h = kd;
+ high_color_q16 = block_pixels_q16[i];
+ }
+ }
+
+ vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
+ high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
+ }
+
+ uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
+ uint8_t trial_blk_weights[16];
+ uint32_t trial_best_submode = 0;
+
+ clear_obj(trial_blk_endpoints);
+ clear_obj(trial_blk_weights);
+
+ double trial_blk_error = 1e+30f;
+
+ bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
+ low_color_q16, high_color_q16,
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
+ first_submode, last_submode);
+
+ // If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
+ if (!did_improve)
+ return cur_block_error;
+
+ // Did the solution improve?
+ if (trial_blk_error < cur_block_error)
+ {
+ cur_block_error = trial_blk_error;
+ memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
+ memcpy(blk_weights, trial_blk_weights, num_pixels);
+ best_submode = trial_best_submode;
+ }
+
+#define USE_LEAST_SQUARES (1)
+#if USE_LEAST_SQUARES
+ // least squares on the most promising trial weight indices found
+ const uint32_t NUM_LS_PASSES = 3;
+
+ for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
+ {
+ vec3F l_q16, h_q16;
+ if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
+ break;
+
+ bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ l_q16, h_q16,
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
+ first_submode, last_submode);
+
+ if (!was_improved)
+ break;
+
+ // It's improved, so let's take the new weight indices.
+ memcpy(trial_blk_weights, blk_weights, num_pixels);
+
+ } // pass
+#endif
+
+ if (uber_mode)
+ {
+ // Try varying the current best weight indices. This can be expanded/improved, but at potentially great cost.
+
+ uint8_t temp_astc_weights[16];
+ memcpy(temp_astc_weights, trial_blk_weights, num_pixels);
+
+ uint32_t min_lin_sel = 256, max_lin_sel = 0;
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ const uint32_t astc_sel = temp_astc_weights[i];
+
+ const uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
+ assert(lin_sel < num_weight_levels);
+
+ min_lin_sel = minimumu(min_lin_sel, lin_sel);
+ max_lin_sel = maximumu(max_lin_sel, lin_sel);
+ }
+
+ bool was_improved = false;
+ (void)was_improved;
+
+ {
+ bool weights_changed = false;
+ uint8_t trial_weights[16];
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ uint32_t astc_sel = temp_astc_weights[i];
+ uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
+
+ if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
+ {
+ lin_sel++;
+ weights_changed = true;
+ }
+
+ trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
+ }
+
+ if (weights_changed)
+ {
+ vec3F l_q16, h_q16;
+ if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
+ {
+ if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ l_q16, h_q16,
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
+ first_submode, last_submode))
+ {
+ was_improved = true;
+ }
+ }
+ }
+ }
+
+ {
+ bool weights_changed = false;
+ uint8_t trial_weights[16];
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ uint32_t astc_sel = temp_astc_weights[i];
+ uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
+
+ if ((lin_sel == max_lin_sel) && (lin_sel > 0))
+ {
+ lin_sel--;
+ weights_changed = true;
+ }
+
+ trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
+ }
+
+ if (weights_changed)
+ {
+ vec3F l_q16, h_q16;
+ if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
+ {
+ if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ l_q16, h_q16,
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
+ first_submode, last_submode))
+ {
+ was_improved = true;
+ }
+ }
+ }
+ }
+
+ {
+ bool weights_changed = false;
+ uint8_t trial_weights[16];
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ uint32_t astc_sel = temp_astc_weights[i];
+ uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
+
+ if ((lin_sel == max_lin_sel) && (lin_sel > 0))
+ {
+ lin_sel--;
+ weights_changed = true;
+ }
+ else if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
+ {
+ lin_sel++;
+ weights_changed = true;
+ }
+
+ trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
+ }
+
+ if (weights_changed)
+ {
+ vec3F l_q16, h_q16;
+ if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16))
+ {
+ if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ l_q16, h_q16,
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors,
+ first_submode, last_submode))
+ {
+ was_improved = true;
+ }
+ }
+ }
+ }
+ } // uber_mode
+
+ return cur_block_error;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static double encode_astc_hdr_block_mode_7(
+ uint32_t num_pixels, const vec4F* pBlock_pixels,
+ uint32_t ise_weight_range,
+ uint32_t& best_submode,
+ double cur_block_error,
+ uint8_t* blk_endpoints, //[4]
+ uint8_t* blk_weights, // [num_pixels]
+ const astc_hdr_codec_options& coptions,
+ uint32_t ise_endpoint_range)
+{
+ assert((num_pixels >= 1) && (num_pixels <= 16));
+ assert((ise_weight_range >= 1) && (ise_weight_range <= 10));
+ assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
+ const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
+
+ const uint32_t MAX_WEIGHT_LEVELS = 24;
+ assert(num_weight_levels <= MAX_WEIGHT_LEVELS);
+ BASISU_NOTE_UNUSED(MAX_WEIGHT_LEVELS);
+
+ best_submode = 0;
+
+ half_float block_pixels_half[16][3];
+
+ vec4F block_pixels_q16[16];
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]);
+ block_pixels_q16[i][0] = (float)half_to_qlog16(block_pixels_half[i][0]);
+
+ block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]);
+ block_pixels_q16[i][1] = (float)half_to_qlog16(block_pixels_half[i][1]);
+
+ block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]);
+ block_pixels_q16[i][2] = (float)half_to_qlog16(block_pixels_half[i][2]);
+
+ block_pixels_q16[i][3] = 0.0f;
+ }
+
+ vec3F block_mean_color_q16(calc_mean(num_pixels, block_pixels_q16));
+
+ vec3F block_axis_q16(0.577350259f);
+
+ aabb3F color_box_q16(cInitExpand);
+
+ float l = 1e+30f, h = -1e+30f;
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ color_box_q16.expand(block_pixels_q16[i]);
+
+ vec3F k(vec3F(block_pixels_q16[i]) - block_mean_color_q16);
+ float kd = k.dot(block_axis_q16);
+
+ l = basisu::minimum<float>(l, kd);
+ h = basisu::maximum<float>(h, kd);
+ }
+
+ vec3F low_color_q16(interp_color(block_mean_color_q16, block_axis_q16, l, color_box_q16, color_box_q16));
+ vec3F high_color_q16(interp_color(block_mean_color_q16, block_axis_q16, h, color_box_q16, color_box_q16));
+
+ low_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
+ high_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
+
+ vec3F diff(high_color_q16 - low_color_q16);
+ float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0];
+
+ uint8_t trial_blk_endpoints[NUM_MODE7_ENDPOINTS];
+ uint8_t trial_blk_weights[16];
+ uint32_t trial_best_submode = 0;
+
+ clear_obj(trial_blk_endpoints);
+ clear_obj(trial_blk_weights);
+
+ double trial_blk_error = 1e+30f;
+
+ bool did_improve = try_mode7(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
+ high_color_q16, ceilf(s_q16),
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range);
+
+ // If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
+ if (!did_improve)
+ {
+ return cur_block_error;
+ }
+
+ // Did the solution improve?
+ if (trial_blk_error < cur_block_error)
+ {
+ cur_block_error = trial_blk_error;
+ memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE7_ENDPOINTS);
+ memcpy(blk_weights, trial_blk_weights, num_pixels);
+ best_submode = trial_best_submode;
+ }
+
+ const float one_over_num_pixels = 1.0f / (float)num_pixels;
+
+ const uint32_t NUM_TRIALS = 2;
+ for (uint32_t trial = 0; trial < NUM_TRIALS; trial++)
+ {
+ // Given a set of selectors and S, try to compute a better high color
+ vec3F new_high_color_q16(block_mean_color_q16);
+
+ int e[2][3];
+ int cur_s = 0;
+ if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, &cur_s, ise_endpoint_range))
+ break;
+
+ cur_s <<= 4;
+
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ uint32_t astc_sel = trial_blk_weights[i];
+ float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
+
+ float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;
+ new_high_color_q16[0] += k;
+ new_high_color_q16[1] += k;
+ new_high_color_q16[2] += k;
+ }
+
+ bool improved = try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ new_high_color_q16, (float)cur_s,
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range);
+
+ if (improved)
+ {
+ memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
+ memcpy(trial_blk_weights, blk_weights, num_pixels);
+ }
+
+ // Given a set of selectors and a high color, try to compute a better S.
+ float t = 0.0f;
+
+ for (uint32_t i = 0; i < num_pixels; i++)
+ {
+ uint32_t astc_sel = trial_blk_weights[i];
+ float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
+
+ t += (1.0f) - lerp;
+ }
+
+ t *= one_over_num_pixels;
+
+ //int e[2][3];
+ if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, nullptr, ise_endpoint_range))
+ break;
+
+ vec3F cur_h_q16((float)(e[1][0] << 4), (float)(e[1][1] << 4), (float)(e[1][2] << 4));
+
+ if (fabs(t) > .0000125f)
+ {
+ float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;
+ float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;
+ float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;
+
+ // TODO: gather statistics on these
+ if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ cur_h_q16, ceilf(s_r),
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
+ {
+ improved = true;
+ }
+
+ if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ cur_h_q16, ceilf(s_g),
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
+ {
+ improved = true;
+ }
+
+ if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ cur_h_q16, ceilf(s_b),
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
+ {
+ improved = true;
+ }
+
+ if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
+ cur_h_q16, ceilf((s_r + s_g + s_b) / 3.0f),
+ block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range))
+ {
+ improved = true;
+ }
+ }
+
+ if (!improved)
+ break;
+
+ memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
+ memcpy(trial_blk_weights, blk_weights, num_pixels);
+
+ } // trial
+
+ return cur_block_error;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static bool pack_solid(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions)
+{
+ float r = 0.0f, g = 0.0f, b = 0.0f;
+
+ const float LOG_BIAS = .125f;
+
+ bool solid_block = true;
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if ((pBlock_linear_colors[0][0] != pBlock_linear_colors[i][0]) ||
+ (pBlock_linear_colors[0][1] != pBlock_linear_colors[i][1]) ||
+ (pBlock_linear_colors[0][2] != pBlock_linear_colors[i][2]))
+ {
+ solid_block = false;
+ }
+
+ r += log2f(pBlock_linear_colors[i][0] + LOG_BIAS);
+ g += log2f(pBlock_linear_colors[i][1] + LOG_BIAS);
+ b += log2f(pBlock_linear_colors[i][2] + LOG_BIAS);
+ }
+
+ if (solid_block)
+ {
+ r = pBlock_linear_colors[0][0];
+ g = pBlock_linear_colors[0][1];
+ b = pBlock_linear_colors[0][2];
+ }
+ else
+ {
+ r = maximum<float>(0.0f, powf(2.0f, r * (1.0f / 16.0f)) - LOG_BIAS);
+ g = maximum<float>(0.0f, powf(2.0f, g * (1.0f / 16.0f)) - LOG_BIAS);
+ b = maximum<float>(0.0f, powf(2.0f, b * (1.0f / 16.0f)) - LOG_BIAS);
+
+ // for safety
+ r = minimum<float>(r, MAX_HALF_FLOAT);
+ g = minimum<float>(g, MAX_HALF_FLOAT);
+ b = minimum<float>(b, MAX_HALF_FLOAT);
+ }
+
+ half_float rh = float_to_half_non_neg_no_nan_inf(r), gh = float_to_half_non_neg_no_nan_inf(g), bh = float_to_half_non_neg_no_nan_inf(b), ah = float_to_half_non_neg_no_nan_inf(1.0f);
+
+ astc_hdr_pack_results results;
+ results.clear();
+
+ uint8_t* packed_blk = (uint8_t*)&results.m_solid_blk;
+ results.m_is_solid = true;
+
+ packed_blk[0] = 0b11111100;
+ packed_blk[1] = 255;
+ packed_blk[2] = 255;
+ packed_blk[3] = 255;
+ packed_blk[4] = 255;
+ packed_blk[5] = 255;
+ packed_blk[6] = 255;
+ packed_blk[7] = 255;
+
+ packed_blk[8] = (uint8_t)rh;
+ packed_blk[9] = (uint8_t)(rh >> 8);
+ packed_blk[10] = (uint8_t)gh;
+ packed_blk[11] = (uint8_t)(gh >> 8);
+ packed_blk[12] = (uint8_t)bh;
+ packed_blk[13] = (uint8_t)(bh >> 8);
+ packed_blk[14] = (uint8_t)ah;
+ packed_blk[15] = (uint8_t)(ah >> 8);
+
+ results.m_best_block_error = 0;
+
+ if (!solid_block)
+ {
+ const float R_WEIGHT = coptions.m_r_err_scale;
+ const float G_WEIGHT = coptions.m_g_err_scale;
+
+ // This MUST match how errors are computed in eval_selectors().
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ half_float dr = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]), dg = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]), db = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]);
+ double rd = q(rh) - q(dr);
+ double gd = q(gh) - q(dg);
+ double bd = q(bh) - q(db);
+
+ double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
+
+ results.m_best_block_error += e;
+ }
+ }
+
+ const half_float hc[3] = { rh, gh, bh };
+
+ bc6h_enc_block_solid_color(&results.m_bc6h_block, hc);
+
+ all_results.push_back(results);
+
+ return solid_block;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static void pack_mode11(
+ const vec4F* pBlock_linear_colors,
+ basisu::vector<astc_hdr_pack_results>& all_results,
+ const astc_hdr_codec_options& coptions,
+ uint32_t first_weight_ise_range, uint32_t last_weight_ise_range, bool constrain_ise_weight8_selectors)
+{
+ uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16];
+ uint32_t trial_submode11 = 0;
+
+ clear_obj(trial_endpoints);
+ clear_obj(trial_weights);
+
+ for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++)
+ {
+ const bool direct_only = coptions.m_mode11_direct_only;
+
+ uint32_t endpoint_ise_range = astc_helpers::BISE_256_LEVELS;
+ if (weight_ise_range == astc_helpers::BISE_16_LEVELS)
+ endpoint_ise_range = astc_helpers::BISE_192_LEVELS;
+ else
+ {
+ assert(weight_ise_range < astc_helpers::BISE_16_LEVELS);
+ }
+
+ double trial_error = encode_astc_hdr_block_mode_11(16, pBlock_linear_colors, weight_ise_range, trial_submode11, 1e+30f, trial_endpoints, trial_weights, coptions, direct_only,
+ endpoint_ise_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, constrain_ise_weight8_selectors, coptions.m_first_mode11_submode, coptions.m_last_mode11_submode);
+
+ if (trial_error < 1e+30f)
+ {
+ astc_hdr_pack_results results;
+ results.clear();
+
+ results.m_best_block_error = trial_error;
+
+ results.m_best_submodes[0] = trial_submode11;
+ results.m_constrained_weights = constrain_ise_weight8_selectors;
+
+ results.m_best_blk.m_num_partitions = 1;
+ results.m_best_blk.m_color_endpoint_modes[0] = 11;
+ results.m_best_blk.m_weight_ise_range = weight_ise_range;
+ results.m_best_blk.m_endpoint_ise_range = endpoint_ise_range;
+
+ memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
+ memcpy(results.m_best_blk.m_weights, trial_weights, 16);
+
+#ifdef _DEBUG
+ {
+ half_float block_pixels_half[16][3];
+
+ vec4F block_pixels_q16[16];
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]);
+ block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]);
+ block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]);
+ }
+
+ half_float unpacked_astc_blk_rgba[4][4][4];
+ bool res = astc_helpers::decode_block(results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
+ assert(res);
+
+ half_float unpacked_astc_blk_rgb[4][4][3];
+ for (uint32_t y = 0; y < 4; y++)
+ for (uint32_t x = 0; x < 4; x++)
+ for (uint32_t c = 0; c < 3; c++)
+ unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c];
+
+ double cmp_err = compute_block_error(&block_pixels_half[0][0], &unpacked_astc_blk_rgb[0][0][0], coptions);
+ assert(results.m_best_block_error == cmp_err);
+ }
+#endif
+
+ // transcode to BC6H
+ assert(results.m_best_blk.m_color_endpoint_modes[0] == 11);
+
+ // Get qlog12 endpoints
+ int e[2][3];
+ bool success = decode_mode11_to_qlog12(results.m_best_blk.m_endpoints, e, results.m_best_blk.m_endpoint_ise_range);
+ assert(success);
+ BASISU_NOTE_UNUSED(success);
+
+ // Transform endpoints to half float
+ half_float h_e[3][2] =
+ {
+ { qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
+ { qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
+ { qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
+ };
+
+ // Transcode to bc6h
+ success = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block);
+ assert(success);
+
+ all_results.push_back(results);
+ }
+ }
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static void pack_mode7_single_part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions)
+{
+ uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16];
+ uint32_t trial_submode7 = 0;
+
+ clear_obj(trial_endpoints);
+ clear_obj(trial_weights);
+
+ for (uint32_t weight_ise_range = coptions.m_first_mode7_part1_weight_ise_range; weight_ise_range <= coptions.m_last_mode7_part1_weight_ise_range; weight_ise_range++)
+ {
+ const uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS;
+
+ double trial_error = encode_astc_hdr_block_mode_7(16, pBlock_linear_colors, weight_ise_range, trial_submode7, 1e+30f, trial_endpoints, trial_weights, coptions, ise_endpoint_range);
+
+ if (trial_error < 1e+30f)
+ {
+ astc_hdr_pack_results results;
+ results.clear();
+
+ results.m_best_block_error = trial_error;
+
+ results.m_best_submodes[0] = trial_submode7;
+
+ results.m_best_blk.m_num_partitions = 1;
+ results.m_best_blk.m_color_endpoint_modes[0] = 7;
+ results.m_best_blk.m_weight_ise_range = weight_ise_range;
+ results.m_best_blk.m_endpoint_ise_range = ise_endpoint_range;
+
+ memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
+ memcpy(results.m_best_blk.m_weights, trial_weights, 16);
+
+ // transcode to BC6H
+ assert(results.m_best_blk.m_color_endpoint_modes[0] == 7);
+
+ // Get qlog12 endpoints
+ int e[2][3];
+ if (!decode_mode7_to_qlog12(results.m_best_blk.m_endpoints, e, nullptr, results.m_best_blk.m_endpoint_ise_range))
+ continue;
+
+ // Transform endpoints to half float
+ half_float h_e[3][2] =
+ {
+ { qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
+ { qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
+ { qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
+ };
+
+ // Transcode to bc6h
+ bool status = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block);
+ assert(status);
+ (void)status;
+
+ all_results.push_back(results);
+ }
+ }
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static bool estimate_partition2(const vec4F* pBlock_pixels, int* pBest_parts, uint32_t num_best_parts)
+{
+ assert(num_best_parts <= basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ vec3F training_vecs[16], mean(0.0f);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ vec3F& v = training_vecs[i];
+
+ v[0] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]);
+ v[1] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]);
+ v[2] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]);
+
+ mean += v;
+ }
+ mean *= (1.0f / 16.0f);
+
+ vec3F cluster_centroids[2] = { mean - vec3F(.1f), mean + vec3F(.1f) };
+
+ uint32_t cluster_pixels[2][16];
+ uint32_t num_cluster_pixels[2];
+ vec3F new_cluster_means[2];
+
+ for (uint32_t s = 0; s < 4; s++)
+ {
+ num_cluster_pixels[0] = 0;
+ num_cluster_pixels[1] = 0;
+
+ new_cluster_means[0].clear();
+ new_cluster_means[1].clear();
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ float d0 = training_vecs[i].squared_distance(cluster_centroids[0]);
+ float d1 = training_vecs[i].squared_distance(cluster_centroids[1]);
+
+ if (d0 < d1)
+ {
+ cluster_pixels[0][num_cluster_pixels[0]] = i;
+ new_cluster_means[0] += training_vecs[i];
+ num_cluster_pixels[0]++;
+ }
+ else
+ {
+ cluster_pixels[1][num_cluster_pixels[1]] = i;
+ new_cluster_means[1] += training_vecs[i];
+ num_cluster_pixels[1]++;
+ }
+ }
+
+ if (!num_cluster_pixels[0] || !num_cluster_pixels[1])
+ return false;
+
+ cluster_centroids[0] = new_cluster_means[0] / (float)num_cluster_pixels[0];
+ cluster_centroids[1] = new_cluster_means[1] / (float)num_cluster_pixels[1];
+ }
+
+ int desired_parts[4][4]; // [y][x]
+ for (uint32_t p = 0; p < 2; p++)
+ {
+ for (uint32_t i = 0; i < num_cluster_pixels[p]; i++)
+ {
+ const uint32_t pix_index = cluster_pixels[p][i];
+
+ desired_parts[pix_index >> 2][pix_index & 3] = p;
+ }
+ }
+
+ uint32_t part_similarity[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2];
+
+ for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; part_index++)
+ {
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
+
+ int total_sim_non_inv = 0;
+ int total_sim_inv = 0;
+
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ int part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
+
+ if (part == desired_parts[y][x])
+ total_sim_non_inv++;
+
+ if ((part ^ 1) == desired_parts[y][x])
+ total_sim_inv++;
+ }
+ }
+
+ int total_sim = maximum(total_sim_non_inv, total_sim_inv);
+
+ part_similarity[part_index] = (total_sim << 8) | part_index;
+
+ } // part_index;
+
+ std::sort(part_similarity, part_similarity + basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ for (uint32_t i = 0; i < num_best_parts; i++)
+ pBest_parts[i] = part_similarity[(basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 - 1) - i] & 0xFF;
+
+ return true;
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static void pack_mode7_2part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions,
+ int num_estimated_partitions, const int *pEstimated_partitions,
+ uint32_t first_weight_ise_range, uint32_t last_weight_ise_range)
+{
+ assert(coptions.m_mode7_part2_part_masks);
+
+ astc_helpers::log_astc_block trial_blk;
+ clear_obj(trial_blk);
+ trial_blk.m_grid_width = 4;
+ trial_blk.m_grid_height = 4;
+
+ trial_blk.m_num_partitions = 2;
+ trial_blk.m_color_endpoint_modes[0] = 7;
+ trial_blk.m_color_endpoint_modes[1] = 7;
+
+ uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2;
+
+ if (num_estimated_partitions)
+ {
+ first_part_index = 0;
+ last_part_index = num_estimated_partitions;
+ }
+
+ for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter)
+ {
+ uint32_t part_index;
+ if (num_estimated_partitions)
+ {
+ part_index = pEstimated_partitions[part_index_iter];
+ assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+ }
+ else
+ {
+ part_index = part_index_iter;
+ if (((1U << part_index) & coptions.m_mode7_part2_part_masks) == 0)
+ continue;
+ }
+
+ const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert;
+
+ vec4F part_pixels[2][16];
+ uint32_t pixel_part_index[4][4]; // [y][x]
+ uint32_t num_part_pixels[2] = { 0, 0 };
+
+ // Extract each subset's texels for this partition pattern
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
+ if (invert_flag)
+ part = 1 - part;
+
+ pixel_part_index[y][x] = part;
+ part_pixels[part][num_part_pixels[part]] = pBlock_linear_colors[x + y * 4];
+
+ num_part_pixels[part]++;
+ }
+ }
+
+ trial_blk.m_partition_id = astc_pattern;
+
+ for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++)
+ {
+ assert(weight_ise_range <= astc_helpers::BISE_8_LEVELS);
+
+ uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS;
+ if (weight_ise_range == astc_helpers::BISE_5_LEVELS)
+ ise_endpoint_range = astc_helpers::BISE_192_LEVELS;
+ else if (weight_ise_range == astc_helpers::BISE_6_LEVELS)
+ ise_endpoint_range = astc_helpers::BISE_128_LEVELS;
+ else if (weight_ise_range == astc_helpers::BISE_8_LEVELS)
+ ise_endpoint_range = astc_helpers::BISE_80_LEVELS;
+
+ uint8_t trial_endpoints[2][NUM_MODE7_ENDPOINTS], trial_weights[2][16];
+ uint32_t trial_submode7[2];
+
+ clear_obj(trial_endpoints);
+ clear_obj(trial_weights);
+ clear_obj(trial_submode7);
+
+ double total_trial_err = 0;
+ for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
+ {
+ total_trial_err += encode_astc_hdr_block_mode_7(
+ num_part_pixels[pack_part_index], &part_pixels[pack_part_index][0],
+ weight_ise_range, trial_submode7[pack_part_index], 1e+30f,
+ &trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions, ise_endpoint_range);
+
+ } // pack_part_index
+
+ if (total_trial_err < 1e+30f)
+ {
+ trial_blk.m_weight_ise_range = weight_ise_range;
+ trial_blk.m_endpoint_ise_range = ise_endpoint_range;
+
+ for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
+ memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE7_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE7_ENDPOINTS);
+
+ uint32_t src_pixel_index[2] = { 0, 0 };
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ uint32_t p = pixel_part_index[y][x];
+ trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++];
+ }
+ }
+
+ astc_hdr_pack_results results;
+ results.clear();
+
+ results.m_best_block_error = total_trial_err;
+ results.m_best_submodes[0] = trial_submode7[0];
+ results.m_best_submodes[1] = trial_submode7[1];
+ results.m_best_pat_index = part_index;
+
+ results.m_best_blk = trial_blk;
+
+ bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block);
+ assert(status);
+ BASISU_NOTE_UNUSED(status);
+
+ all_results.push_back(results);
+ }
+
+ } // weight_ise_range
+
+ } // part_index
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+static void pack_mode11_2part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions,
+ int num_estimated_partitions, const int* pEstimated_partitions)
+{
+ assert(coptions.m_mode11_part2_part_masks);
+
+ astc_helpers::log_astc_block trial_blk;
+ clear_obj(trial_blk);
+ trial_blk.m_grid_width = 4;
+ trial_blk.m_grid_height = 4;
+
+ trial_blk.m_num_partitions = 2;
+ trial_blk.m_color_endpoint_modes[0] = 11;
+ trial_blk.m_color_endpoint_modes[1] = 11;
+
+ uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2;
+
+ if (num_estimated_partitions)
+ {
+ first_part_index = 0;
+ last_part_index = num_estimated_partitions;
+ }
+
+ for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter)
+ {
+ uint32_t part_index;
+ if (num_estimated_partitions)
+ {
+ part_index = pEstimated_partitions[part_index_iter];
+ assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+ }
+ else
+ {
+ part_index = part_index_iter;
+ if (((1U << part_index) & coptions.m_mode11_part2_part_masks) == 0)
+ continue;
+ }
+
+ const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert;
+
+ vec4F part_pixels[2][16];
+ uint32_t pixel_part_index[4][4]; // [y][x]
+ uint32_t num_part_pixels[2] = { 0, 0 };
+
+ // Extract each subset's texels for this partition pattern
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
+ if (invert_flag)
+ part = 1 - part;
+
+ pixel_part_index[y][x] = part;
+ part_pixels[part][num_part_pixels[part]] = pBlock_linear_colors[x + y * 4];
+
+ num_part_pixels[part]++;
+ }
+ }
+
+ trial_blk.m_partition_id = astc_pattern;
+
+ for (uint32_t weight_ise_range = coptions.m_first_mode11_part2_weight_ise_range; weight_ise_range <= coptions.m_last_mode11_part2_weight_ise_range; weight_ise_range++)
+ {
+ bool direct_only = false;
+ uint32_t ise_endpoint_range = astc_helpers::BISE_64_LEVELS;
+ if (weight_ise_range == astc_helpers::BISE_4_LEVELS)
+ ise_endpoint_range = astc_helpers::BISE_40_LEVELS;
+
+ uint8_t trial_endpoints[2][NUM_MODE11_ENDPOINTS], trial_weights[2][16];
+ uint32_t trial_submode11[2];
+
+ clear_obj(trial_endpoints);
+ clear_obj(trial_weights);
+ clear_obj(trial_submode11);
+
+ double total_trial_err = 0;
+ for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
+ {
+ total_trial_err += encode_astc_hdr_block_mode_11(
+ num_part_pixels[pack_part_index], &part_pixels[pack_part_index][0],
+ weight_ise_range, trial_submode11[pack_part_index], 1e+30f,
+ &trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions,
+ direct_only, ise_endpoint_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, false,
+ coptions.m_first_mode11_submode, coptions.m_last_mode11_submode);
+
+ } // pack_part_index
+
+ if (total_trial_err < 1e+30f)
+ {
+ trial_blk.m_weight_ise_range = weight_ise_range;
+ trial_blk.m_endpoint_ise_range = ise_endpoint_range;
+
+ for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
+ memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE11_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE11_ENDPOINTS);
+
+ uint32_t src_pixel_index[2] = { 0, 0 };
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ uint32_t p = pixel_part_index[y][x];
+ trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++];
+ }
+ }
+
+ astc_hdr_pack_results results;
+ results.clear();
+
+ results.m_best_block_error = total_trial_err;
+ results.m_best_submodes[0] = trial_submode11[0];
+ results.m_best_submodes[1] = trial_submode11[1];
+ results.m_best_pat_index = part_index;
+
+ results.m_best_blk = trial_blk;
+
+ bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block);
+ assert(status);
+ BASISU_NOTE_UNUSED(status);
+
+ all_results.push_back(results);
+ }
+
+ } // weight_ise_range
+
+ } // part_index
+}
+
+//--------------------------------------------------------------------------------------------------------------------------
+
+bool g_astc_hdr_enc_initialized;
+
+void astc_hdr_enc_init()
+{
+ if (g_astc_hdr_enc_initialized)
+ return;
+
+ astc_hdr_core_init();
+
+ astc_helpers::init_tables(true);
+
+ init_qlog_tables();
+
+ encode_astc_hdr_init();
+
+ g_astc_hdr_enc_initialized = true;
+}
+
+bool astc_hdr_enc_block(
+ const float* pRGBPixels,
+ const astc_hdr_codec_options& coptions,
+ basisu::vector<astc_hdr_pack_results>& all_results)
+{
+ assert(g_astc_hdr_enc_initialized);
+ if (!g_astc_hdr_enc_initialized)
+ {
+ // astc_hdr_enc_init() MUST be called first.
+ assert(0);
+ return false;
+ }
+
+ all_results.resize(0);
+
+ vec4F block_linear_colors[16];
+
+ // Sanity check the input block.
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ for (uint32_t j = 0; j < 3; j++)
+ {
+ float v = pRGBPixels[i * 3 + j];
+
+ if (std::isinf(v) || std::isnan(v))
+ {
+ // Input pixels cannot be NaN or +-Inf.
+ assert(0);
+ return false;
+ }
+
+ if (v < 0.0f)
+ {
+ // Input pixels cannot be signed.
+ assert(0);
+ return false;
+ }
+
+ if (v > MAX_HALF_FLOAT)
+ {
+ // Too large for half float.
+ assert(0);
+ return false;
+ }
+
+ block_linear_colors[i][j] = v;
+ }
+
+ block_linear_colors[i][3] = 1.0f;
+ }
+
+ assert(coptions.m_use_solid || coptions.m_use_mode11 || coptions.m_use_mode7_part2 || coptions.m_use_mode7_part1 || coptions.m_use_mode11_part2);
+
+ bool is_solid = false;
+ if (coptions.m_use_solid)
+ is_solid = pack_solid(block_linear_colors, all_results, coptions);
+
+ if (!is_solid)
+ {
+ if (coptions.m_use_mode11)
+ {
+ const size_t cur_num_results = all_results.size();
+
+ pack_mode11(block_linear_colors, all_results, coptions, coptions.m_first_mode11_weight_ise_range, coptions.m_last_mode11_weight_ise_range, false);
+
+ if (coptions.m_last_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS)
+ {
+ pack_mode11(block_linear_colors, all_results, coptions, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_16_LEVELS, true);
+ }
+
+ // If we couldn't get any mode 11 results at all, and we were restricted to just trying weight ISE range 8 (which required endpoint quantization) then
+ // fall back to weight ISE range 7 (which doesn't need any endpoint quantization).
+ // This is to guarantee we always get at least 1 non-solid result.
+ if (all_results.size() == cur_num_results)
+ {
+ if (coptions.m_first_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS)
+ {
+ pack_mode11(block_linear_colors, all_results, coptions, astc_helpers::BISE_12_LEVELS, astc_helpers::BISE_12_LEVELS, false);
+ }
+ }
+ }
+
+ if (coptions.m_use_mode7_part1)
+ {
+ // Mode 7 1-subset never requires endpoint quantization, so it cannot fail to find at least one usable solution.
+ pack_mode7_single_part(block_linear_colors, all_results, coptions);
+ }
+
+ bool have_est = false;
+ int best_parts[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2];
+
+ if ((coptions.m_use_mode7_part2) || (coptions.m_use_mode11_part2))
+ {
+ if (coptions.m_use_estimated_partitions)
+ have_est = estimate_partition2(block_linear_colors, best_parts, coptions.m_max_estimated_partitions);
+ }
+
+ if (coptions.m_use_mode7_part2)
+ {
+ const size_t cur_num_results = all_results.size();
+
+ pack_mode7_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts,
+ coptions.m_first_mode7_part2_weight_ise_range, coptions.m_last_mode7_part2_weight_ise_range);
+
+ // If we couldn't find any packable 2-subset mode 7 results at weight levels >= 5 levels (which always requires endpoint quant), then try falling back to
+ // 5 levels which doesn't require endpoint quantization.
+ if (all_results.size() == cur_num_results)
+ {
+ if (coptions.m_first_mode7_part2_weight_ise_range >= astc_helpers::BISE_5_LEVELS)
+ {
+ pack_mode7_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts,
+ astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_4_LEVELS);
+ }
+ }
+ }
+
+ if (coptions.m_use_mode11_part2)
+ {
+ // This always requires endpoint quant, so it could fail to find any usable solutions.
+ pack_mode11_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts);
+ }
+ }
+
+ if (coptions.m_refine_weights)
+ {
+ // TODO: Move this above, do it once only.
+ basist::half_float rgb_pixels_half[16 * 3];
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ rgb_pixels_half[i * 3 + 0] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 0]);
+ rgb_pixels_half[i * 3 + 1] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 1]);
+ rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 2]);
+ }
+
+ for (uint32_t i = 0; i < all_results.size(); i++)
+ {
+ bool status = astc_hdr_refine_weights(rgb_pixels_half, all_results[i], coptions, coptions.m_bc6h_err_weight, &all_results[i].m_improved_via_refinement_flag);
+ assert(status);
+ BASISU_NOTE_UNUSED(status);
+ }
+ }
+
+ return true;
+}
+
+bool astc_hdr_pack_results_to_block(astc_blk& dst_blk, const astc_hdr_pack_results& results)
+{
+ assert(g_astc_hdr_enc_initialized);
+ if (!g_astc_hdr_enc_initialized)
+ return false;
+
+ if (results.m_is_solid)
+ {
+ memcpy(&dst_blk, &results.m_solid_blk, sizeof(results.m_solid_blk));
+ }
+ else
+ {
+ bool status = astc_helpers::pack_astc_block((astc_helpers::astc_block&)dst_blk, results.m_best_blk);
+ if (!status)
+ {
+ assert(0);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Refines a block's chosen weight indices, balancing BC6H and ASTC HDR error.
+bool astc_hdr_refine_weights(const half_float *pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool *pImproved_flag)
+{
+ if (pImproved_flag)
+ *pImproved_flag = false;
+
+ if (cur_results.m_is_solid)
+ return true;
+
+ const uint32_t total_weights = astc_helpers::get_ise_levels(cur_results.m_best_blk.m_weight_ise_range);
+
+ assert((total_weights >= 3) && (total_weights <= 16));
+
+ double best_err[4][4];
+ uint8_t best_weight[4][4];
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ best_err[y][x] = 1e+30f;
+ best_weight[y][x] = 0;
+ }
+ }
+
+ astc_hdr_pack_results temp_results;
+
+ const float c_weights[3] = { coptions.m_r_err_scale, coptions.m_g_err_scale, 1.0f };
+
+ for (uint32_t weight_index = 0; weight_index < total_weights; weight_index++)
+ {
+ temp_results = cur_results;
+ for (uint32_t i = 0; i < 16; i++)
+ temp_results.m_best_blk.m_weights[i] = (uint8_t)weight_index;
+
+ half_float unpacked_astc_blk_rgba[4][4][4];
+ bool res = astc_helpers::decode_block(temp_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
+ assert(res);
+
+ basist::bc6h_block trial_bc6h_blk;
+ res = basist::astc_hdr_transcode_to_bc6h(temp_results.m_best_blk, trial_bc6h_blk);
+ assert(res);
+
+ half_float unpacked_bc6h_blk[4][4][3];
+ res = unpack_bc6h(&trial_bc6h_blk, unpacked_bc6h_blk, false);
+ assert(res);
+ BASISU_NOTE_UNUSED(res);
+
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ double total_err = 0.0f;
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const half_float orig_c = pSource_block[(x + y * 4) * 3 + c];
+ const double orig_c_q = q(orig_c);
+
+ const half_float astc_c = unpacked_astc_blk_rgba[y][x][c];
+ const double astc_c_q = q(astc_c);
+ const double astc_e = square(astc_c_q - orig_c_q) * c_weights[c];
+
+ const half_float bc6h_c = unpacked_bc6h_blk[y][x][c];
+ const double bc6h_c_q = q(bc6h_c);
+ const double bc6h_e = square(bc6h_c_q - orig_c_q) * c_weights[c];
+
+ const double overall_err = astc_e * (1.0f - bc6h_weight) + bc6h_e * bc6h_weight;
+
+ total_err += overall_err;
+
+ } // c
+
+ if (total_err < best_err[y][x])
+ {
+ best_err[y][x] = total_err;
+ best_weight[y][x] = (uint8_t)weight_index;
+ }
+
+ } // x
+ } // y
+
+ } // weight_index
+
+ bool any_changed = false;
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if (cur_results.m_best_blk.m_weights[i] != best_weight[i >> 2][i & 3])
+ {
+ any_changed = true;
+ break;
+ }
+ }
+
+ if (any_changed)
+ {
+ memcpy(cur_results.m_best_blk.m_weights, best_weight, 16);
+
+ {
+ bool res = basist::astc_hdr_transcode_to_bc6h(cur_results.m_best_blk, cur_results.m_bc6h_block);
+ assert(res);
+ BASISU_NOTE_UNUSED(res);
+
+ half_float unpacked_astc_blk_rgba[4][4][4];
+ res = astc_helpers::decode_block(cur_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
+ assert(res);
+
+ half_float unpacked_astc_blk_rgb[4][4][3];
+ for (uint32_t y = 0; y < 4; y++)
+ for (uint32_t x = 0; x < 4; x++)
+ for (uint32_t c = 0; c < 3; c++)
+ unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c];
+
+ cur_results.m_best_block_error = compute_block_error(pSource_block, &unpacked_astc_blk_rgb[0][0][0], coptions);
+ }
+
+ if (pImproved_flag)
+ *pImproved_flag = true;
+ }
+
+ return true;
+}
+
+void astc_hdr_block_stats::update(const astc_hdr_pack_results& log_blk)
+{
+ std::lock_guard<std::mutex> lck(m_mutex);
+
+ m_total_blocks++;
+
+ if (log_blk.m_improved_via_refinement_flag)
+ m_total_refined++;
+
+ if (log_blk.m_is_solid)
+ {
+ m_total_solid++;
+ }
+ else
+ {
+ int best_weight_range = log_blk.m_best_blk.m_weight_ise_range;
+
+ if (log_blk.m_best_blk.m_color_endpoint_modes[0] == 7)
+ {
+ m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 6U)]++;
+
+ if (log_blk.m_best_blk.m_num_partitions == 2)
+ {
+ m_total_mode7_2part++;
+
+ m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 6U)]++;
+ m_total_2part++;
+
+ m_weight_range_hist_7_2part[bounds_check(best_weight_range, 0, 11)]++;
+
+ m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++;
+ }
+ else
+ {
+ m_total_mode7_1part++;
+
+ m_weight_range_hist_7[bounds_check(best_weight_range, 0, 11)]++;
+ }
+ }
+ else
+ {
+ m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 9U)]++;
+ if (log_blk.m_constrained_weights)
+ m_total_mode11_1part_constrained_weights++;
+
+ if (log_blk.m_best_blk.m_num_partitions == 2)
+ {
+ m_total_mode11_2part++;
+
+ m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 9U)]++;
+ m_total_2part++;
+
+ m_weight_range_hist_11_2part[bounds_check(best_weight_range, 0, 11)]++;
+
+ m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++;
+ }
+ else
+ {
+ m_total_mode11_1part++;
+
+ m_weight_range_hist_11[bounds_check(best_weight_range, 0, 11)]++;
+ }
+ }
+ }
+}
+
+void astc_hdr_block_stats::print()
+{
+ std::lock_guard<std::mutex> lck(m_mutex);
+
+ assert(m_total_blocks);
+ if (!m_total_blocks)
+ return;
+
+ printf("\nLow-level ASTC Encoder Statistics:\n");
+ printf("Total blocks: %u\n", m_total_blocks);
+ printf("Total solid: %u %3.2f%%\n", m_total_solid, (m_total_solid * 100.0f) / m_total_blocks);
+ printf("Total refined: %u %3.2f%%\n", m_total_refined, (m_total_refined * 100.0f) / m_total_blocks);
+
+ printf("Total mode 11, 1 partition: %u %3.2f%%\n", m_total_mode11_1part, (m_total_mode11_1part * 100.0f) / m_total_blocks);
+ printf("Total mode 11, 1 partition, constrained weights: %u %3.2f%%\n", m_total_mode11_1part_constrained_weights, (m_total_mode11_1part_constrained_weights * 100.0f) / m_total_blocks);
+ printf("Total mode 11, 2 partition: %u %3.2f%%\n", m_total_mode11_2part, (m_total_mode11_2part * 100.0f) / m_total_blocks);
+
+ printf("Total mode 7, 1 partition: %u %3.2f%%\n", m_total_mode7_1part, (m_total_mode7_1part * 100.0f) / m_total_blocks);
+ printf("Total mode 7, 2 partition: %u %3.2f%%\n", m_total_mode7_2part, (m_total_mode7_2part * 100.0f) / m_total_blocks);
+
+ printf("Total 2 partitions: %u %3.2f%%\n", m_total_2part, (m_total_2part * 100.0f) / m_total_blocks);
+ printf("\n");
+
+ printf("ISE texel weight range histogram mode 11:\n");
+ for (uint32_t i = 1; i <= MODE11_LAST_ISE_RANGE; i++)
+ printf("%u %u\n", i, m_weight_range_hist_11[i]);
+ printf("\n");
+
+ printf("ISE texel weight range histogram mode 11, 2 partition:\n");
+ for (uint32_t i = 1; i <= MODE11_PART2_LAST_ISE_RANGE; i++)
+ printf("%u %u\n", i, m_weight_range_hist_11_2part[i]);
+ printf("\n");
+
+ printf("ISE texel weight range histogram mode 7:\n");
+ for (uint32_t i = 1; i <= MODE7_PART1_LAST_ISE_RANGE; i++)
+ printf("%u %u\n", i, m_weight_range_hist_7[i]);
+ printf("\n");
+
+ printf("ISE texel weight range histogram mode 7, 2 partition:\n");
+ for (uint32_t i = 1; i <= MODE7_PART2_LAST_ISE_RANGE; i++)
+ printf("%u %u\n", i, m_weight_range_hist_7_2part[i]);
+ printf("\n");
+
+ printf("Mode 11 submode histogram:\n");
+ for (uint32_t i = 0; i <= MODE11_TOTAL_SUBMODES; i++) // +1 because of the extra direct encoding
+ printf("%u %u\n", i, m_mode11_submode_hist[i]);
+ printf("\n");
+
+ printf("Mode 7 submode histogram:\n");
+ for (uint32_t i = 0; i < MODE7_TOTAL_SUBMODES; i++)
+ printf("%u %u\n", i, m_mode7_submode_hist[i]);
+ printf("\n");
+
+ printf("Partition pattern table usage histogram:\n");
+ for (uint32_t i = 0; i < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2; i++)
+ printf("%u:%u ", i, m_part_hist[i]);
+ printf("\n\n");
+}
+
+} // namespace basisu
+
diff --git a/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h
new file mode 100644
index 0000000000..ee122ff7ce
--- /dev/null
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h
@@ -0,0 +1,224 @@
+// basisu_astc_hdr_enc.h
+#pragma once
+#include "basisu_enc.h"
+#include "basisu_gpu_texture.h"
+#include "../transcoder/basisu_astc_helpers.h"
+#include "../transcoder/basisu_astc_hdr_core.h"
+
+namespace basisu
+{
+ // This MUST be called before encoding any blocks.
+ void astc_hdr_enc_init();
+
+ const uint32_t MODE11_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
+ const uint32_t MODE7_PART1_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART1_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
+ const uint32_t MODE7_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART2_LAST_ISE_RANGE = astc_helpers::BISE_8_LEVELS;
+ const uint32_t MODE11_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_PART2_LAST_ISE_RANGE = astc_helpers::BISE_4_LEVELS;
+ const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)
+ const uint32_t MODE7_TOTAL_SUBMODES = 6;
+
+ struct astc_hdr_codec_options
+ {
+ float m_bc6h_err_weight;
+
+ bool m_use_solid;
+
+ bool m_use_mode11;
+ bool m_mode11_uber_mode;
+ uint32_t m_first_mode11_weight_ise_range;
+ uint32_t m_last_mode11_weight_ise_range;
+ bool m_mode11_direct_only;
+ int32_t m_first_mode11_submode;
+ int32_t m_last_mode11_submode;
+
+ bool m_use_mode7_part1;
+ uint32_t m_first_mode7_part1_weight_ise_range;
+ uint32_t m_last_mode7_part1_weight_ise_range;
+
+ bool m_use_mode7_part2;
+ uint32_t m_mode7_part2_part_masks;
+ uint32_t m_first_mode7_part2_weight_ise_range;
+ uint32_t m_last_mode7_part2_weight_ise_range;
+
+ bool m_use_mode11_part2;
+ uint32_t m_mode11_part2_part_masks;
+ uint32_t m_first_mode11_part2_weight_ise_range;
+ uint32_t m_last_mode11_part2_weight_ise_range;
+
+ float m_r_err_scale, m_g_err_scale;
+
+ bool m_refine_weights;
+
+ uint32_t m_level;
+
+ bool m_use_estimated_partitions;
+ uint32_t m_max_estimated_partitions;
+
+ // If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.
+ bool m_allow_uber_mode;
+
+ astc_hdr_codec_options();
+
+ void init();
+
+ // TODO: set_quality_level() is preferred to configure the codec for transcoding purposes.
+ static const int cMinLevel = 0;
+ static const int cMaxLevel = 4;
+ static const int cDefaultLevel = 1;
+ void set_quality_level(int level);
+
+ private:
+ void set_quality_best();
+ void set_quality_normal();
+ void set_quality_fastest();
+ };
+
+ struct astc_hdr_pack_results
+ {
+ double m_best_block_error;
+ double m_bc6h_block_error; // note this is not used/set by the encoder, here for convienance
+
+ // Encoder results (logical ASTC block)
+ astc_helpers::log_astc_block m_best_blk;
+
+ // For statistical use
+ uint32_t m_best_submodes[2];
+ uint32_t m_best_pat_index;
+ bool m_constrained_weights;
+
+ bool m_improved_via_refinement_flag;
+
+ // Only valid if the block is solid
+ basist::astc_blk m_solid_blk;
+
+ // The BC6H transcoded block
+ basist::bc6h_block m_bc6h_block;
+
+ // Solid color/void extent flag
+ bool m_is_solid;
+
+ void clear()
+ {
+ m_best_block_error = 1e+30f;
+ m_bc6h_block_error = 1e+30f;
+
+ m_best_blk.clear();
+ m_best_blk.m_grid_width = 4;
+ m_best_blk.m_grid_height = 4;
+ m_best_blk.m_endpoint_ise_range = 20; // 0-255
+
+ clear_obj(m_best_submodes);
+
+ m_best_pat_index = 0;
+ m_constrained_weights = false;
+
+ clear_obj(m_bc6h_block);
+
+ m_is_solid = false;
+ m_improved_via_refinement_flag = false;
+ }
+ };
+
+ void interpolate_qlog12_colors(
+ const int e[2][3],
+ basist::half_float* pDecoded_half,
+ vec3F* pDecoded_float,
+ uint32_t n, uint32_t ise_weight_range);
+
+ bool get_astc_hdr_mode_11_block_colors(
+ const uint8_t* pEndpoints,
+ basist::half_float* pDecoded_half,
+ vec3F* pDecoded_float,
+ uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
+
+ bool get_astc_hdr_mode_7_block_colors(
+ const uint8_t* pEndpoints,
+ basist::half_float* pDecoded_half,
+ vec3F* pDecoded_float,
+ uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
+
+ double eval_selectors(
+ uint32_t num_pixels,
+ uint8_t* pWeights,
+ const basist::half_float* pBlock_pixels_half,
+ uint32_t num_weight_levels,
+ const basist::half_float* pDecoded_half,
+ const astc_hdr_codec_options& coptions,
+ uint32_t usable_selector_bitmask = UINT32_MAX);
+
+ double compute_block_error(const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_options& coptions);
+
+ // Encodes a 4x4 ASTC HDR block given a 4x4 array of source block pixels/texels.
+ // Supports solid color blocks, mode 11 (all submodes), mode 7/1 partition (all submodes),
+ // and mode 7/2 partitions (all submodes) - 30 patterns, only the ones also in common with the BC6H format.
+ // The packed ASTC weight grid dimensions are currently always 4x4 texels, but may be also 3x3 in the future.
+ // This function is thread safe, i.e. it may be called from multiple encoding threads simultanously with different blocks.
+ //
+ // Parameters:
+ // pRGBPixels - An array of 48 (16 RGB) floats: the 4x4 block to pack
+ // pPacked_block - A pointer to the packed ASTC HDR block
+ // coptions - Codec options
+ // pInternal_results - An optional pointer to details about how the block was packed, for statistics/debugging purposes. May be nullptr.
+ //
+ // Requirements:
+ // astc_hdr_enc_init() MUST have been called first to initialized the codec.
+ // Input pixels are checked and cannot be NaN's, Inf's, signed, or too large (greater than MAX_HALF_FLOAT, or 65504).
+ // Normal values and denormals are okay.
+ bool astc_hdr_enc_block(
+ const float* pRGBPixels,
+ const astc_hdr_codec_options& coptions,
+ basisu::vector<astc_hdr_pack_results> &all_results);
+
+ bool astc_hdr_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_pack_results& results);
+
+ bool astc_hdr_refine_weights(const basist::half_float* pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool* pImproved_flag);
+
+ struct astc_hdr_block_stats
+ {
+ std::mutex m_mutex;
+
+ uint32_t m_total_blocks;
+ uint32_t m_total_2part, m_total_solid;
+ uint32_t m_total_mode7_1part, m_total_mode7_2part;
+ uint32_t m_total_mode11_1part, m_total_mode11_2part;
+ uint32_t m_total_mode11_1part_constrained_weights;
+
+ uint32_t m_weight_range_hist_7[11];
+ uint32_t m_weight_range_hist_7_2part[11];
+ uint32_t m_mode7_submode_hist[6];
+
+ uint32_t m_weight_range_hist_11[11];
+ uint32_t m_weight_range_hist_11_2part[11];
+ uint32_t m_mode11_submode_hist[9];
+
+ uint32_t m_part_hist[32];
+
+ uint32_t m_total_refined;
+
+ astc_hdr_block_stats() { clear(); }
+
+ void clear()
+ {
+ std::lock_guard<std::mutex> lck(m_mutex);
+
+ m_total_blocks = 0;
+ m_total_mode7_1part = 0, m_total_mode7_2part = 0, m_total_mode11_1part = 0, m_total_2part = 0, m_total_solid = 0, m_total_mode11_2part = 0;
+ m_total_mode11_1part_constrained_weights = 0;
+ m_total_refined = 0;
+
+ clear_obj(m_weight_range_hist_11);
+ clear_obj(m_weight_range_hist_11_2part);
+ clear_obj(m_weight_range_hist_7);
+ clear_obj(m_weight_range_hist_7_2part);
+ clear_obj(m_mode7_submode_hist);
+ clear_obj(m_mode11_submode_hist);
+ clear_obj(m_part_hist);
+ }
+
+ void update(const astc_hdr_pack_results& log_blk);
+
+ void print();
+ };
+
+} // namespace basisu
+
diff --git a/thirdparty/basis_universal/encoder/basisu_backend.cpp b/thirdparty/basis_universal/encoder/basisu_backend.cpp
index abb61750a6..3fa3d8892f 100644
--- a/thirdparty/basis_universal/encoder/basisu_backend.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_backend.cpp
@@ -1,5 +1,5 @@
// basisu_backend.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_backend.h b/thirdparty/basis_universal/encoder/basisu_backend.h
index 07778aeb9b..58a9a8aa0e 100644
--- a/thirdparty/basis_universal/encoder/basisu_backend.h
+++ b/thirdparty/basis_universal/encoder/basisu_backend.h
@@ -1,5 +1,5 @@
// basisu_backend.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_basis_file.cpp b/thirdparty/basis_universal/encoder/basisu_basis_file.cpp
index f4c77bef23..77f467f670 100644
--- a/thirdparty/basis_universal/encoder/basisu_basis_file.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_basis_file.cpp
@@ -1,5 +1,5 @@
// basisu_basis_file.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_basis_file.h b/thirdparty/basis_universal/encoder/basisu_basis_file.h
index 98498a0121..57448bccb1 100644
--- a/thirdparty/basis_universal/encoder/basisu_basis_file.h
+++ b/thirdparty/basis_universal/encoder/basisu_basis_file.h
@@ -1,5 +1,5 @@
// basisu_basis_file.h
-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp
index 22fdfa603f..914e7fbbb9 100644
--- a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp
@@ -1,5 +1,5 @@
// File: basisu_bc7enc.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -394,6 +394,7 @@ void bc7enc_compress_block_init()
static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors)
{
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
+ // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
@@ -1301,6 +1302,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color
for (uint32_t c = 0; c < 4; c++)
colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]);
+#ifdef _DEBUG
uint64_t total_err = 0;
for (uint32_t p = 0; p < pParams->m_num_pixels; p++)
{
@@ -1313,6 +1315,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color
total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights);
}
assert(total_err == pResults->m_best_overall_err);
+#endif
// HACK HACK
//if (total_err != pResults->m_best_overall_err)
diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.h b/thirdparty/basis_universal/encoder/basisu_bc7enc.h
index 8d8b7888ca..925d6b2e8d 100644
--- a/thirdparty/basis_universal/encoder/basisu_bc7enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.h
@@ -1,5 +1,5 @@
// File: basisu_bc7enc.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_comp.cpp b/thirdparty/basis_universal/encoder/basisu_comp.cpp
index 4e69e9e2ee..81813257cd 100644
--- a/thirdparty/basis_universal/encoder/basisu_comp.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp
@@ -1,5 +1,5 @@
// basisu_comp.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -16,6 +16,9 @@
#include "basisu_enc.h"
#include <unordered_set>
#include <atomic>
+#include <map>
+
+//#define UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS
// basisu_transcoder.cpp is where basisu_miniz lives now, we just need the declarations here.
#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
@@ -23,6 +26,8 @@
#include "basisu_opencl.h"
+#include "../transcoder/basisu_astc_hdr_core.h"
+
#if !BASISD_SUPPORT_KTX2
#error BASISD_SUPPORT_KTX2 must be enabled (set to 1).
#endif
@@ -34,7 +39,7 @@
// Set to 1 to disable the mipPadding alignment workaround (which only seems to be needed when no key-values are written at all)
#define BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND (0)
-// Set to 1 to disable writing all KTX2 key values, triggering the validator bug.
+// Set to 1 to disable writing all KTX2 key values, triggering an early validator bug.
#define BASISU_DISABLE_KTX2_KEY_VALUES (0)
using namespace buminiz;
@@ -46,27 +51,143 @@ using namespace buminiz;
namespace basisu
{
- basis_compressor::basis_compressor() :
- m_pOpenCL_context(nullptr),
+ basis_compressor::basis_compressor() :
+ m_pOpenCL_context(nullptr),
m_basis_file_size(0),
m_basis_bits_per_texel(0.0f),
m_total_blocks(0),
m_any_source_image_has_alpha(false),
- m_opencl_failed(false)
+ m_opencl_failed(false)
{
debug_printf("basis_compressor::basis_compressor\n");
assert(g_library_initialized);
}
- basis_compressor::~basis_compressor()
- {
- if (m_pOpenCL_context)
- {
- opencl_destroy_context(m_pOpenCL_context);
- m_pOpenCL_context = nullptr;
- }
- }
+ basis_compressor::~basis_compressor()
+ {
+ if (m_pOpenCL_context)
+ {
+ opencl_destroy_context(m_pOpenCL_context);
+ m_pOpenCL_context = nullptr;
+ }
+ }
+
+ void basis_compressor::check_for_hdr_inputs()
+ {
+ if ((!m_params.m_source_filenames.size()) && (!m_params.m_source_images.size()))
+ {
+ if (m_params.m_source_images_hdr.size())
+ {
+ // Assume they want UASTC HDR if they've specified any HDR source images.
+ m_params.m_hdr = true;
+ }
+ }
+
+ if (!m_params.m_hdr)
+ {
+ // See if any files are .EXR or .HDR, if so switch the compressor to UASTC HDR mode.
+ for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++)
+ {
+ std::string filename;
+ string_get_filename(m_params.m_source_filenames[i].c_str(), filename);
+
+ std::string ext(string_get_extension(filename));
+ string_tolower(ext);
+
+ if ((ext == "exr") || (ext == "hdr"))
+ {
+ m_params.m_hdr = true;
+ break;
+ }
+ }
+ }
+
+ if (m_params.m_hdr)
+ {
+ if (m_params.m_source_alpha_filenames.size())
+ {
+ debug_printf("Warning: Alpha channel image filenames are not supported in UASTC HDR mode.\n");
+ m_params.m_source_alpha_filenames.clear();
+ }
+ }
+
+ if (m_params.m_hdr)
+ m_params.m_uastc = true;
+ }
+
+ bool basis_compressor::sanity_check_input_params()
+ {
+ // Check for no source filenames specified.
+ if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size()))
+ {
+ assert(0);
+ return false;
+ }
+
+ // See if they've specified any source filenames, but didn't tell us to read them.
+ if ((!m_params.m_read_source_images) && (m_params.m_source_filenames.size()))
+ {
+ assert(0);
+ return false;
+ }
+
+ // Sanity check the input image parameters.
+ if (m_params.m_read_source_images)
+ {
+ // Caller can't specify their own images if they want us to read source images from files.
+ if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size())
+ {
+ assert(0);
+ return false;
+ }
+
+ if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size())
+ {
+ assert(0);
+ return false;
+ }
+ }
+ else
+ {
+ // They didn't tell us to read any source files, so check for no LDR/HDR source images.
+ if (!m_params.m_source_images.size() && !m_params.m_source_images_hdr.size())
+ {
+ assert(0);
+ return false;
+ }
+
+ // Now we know we've been supplied LDR and/or HDR source images, check for LDR vs. HDR conflicts.
+
+ if (m_params.m_source_images.size())
+ {
+ // They've supplied LDR images, so make sure they also haven't specified HDR input images.
+ if (m_params.m_source_images_hdr.size() || m_params.m_source_mipmap_images_hdr.size())
+ {
+ assert(0);
+ return false;
+ }
+ }
+ else
+ {
+ // No LDR images, so make sure they haven't specified any LDR mipmaps.
+ if (m_params.m_source_mipmap_images.size())
+ {
+ assert(0);
+ return false;
+ }
+
+ // No LDR images, so ensure they've supplied some HDR images to process.
+ if (!m_params.m_source_images_hdr.size())
+ {
+ assert(0);
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
bool basis_compressor::init(const basis_compressor_params &params)
{
@@ -85,7 +206,12 @@ namespace basisu
}
m_params = params;
-
+
+ if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data))
+ m_params.m_validate_output_data = true;
+
+ check_for_hdr_inputs();
+
if (m_params.m_debug)
{
debug_printf("basis_compressor::init:\n");
@@ -95,8 +221,10 @@ namespace basisu
#define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast<uint32_t>(m_params.v), m_params.v.was_changed());
#define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast<float>(m_params.v), m_params.v.was_changed());
- debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i, Source mipmap images: %u\n",
- m_params.m_source_images.size(), m_params.m_source_filenames.size(), m_params.m_source_alpha_filenames.size(), m_params.m_source_mipmap_images.size());
+ debug_printf("Source LDR images: %u, HDR images: %u, filenames: %u, alpha filenames: %i, LDR mipmap images: %u, HDR mipmap images: %u\n",
+ m_params.m_source_images.size(), m_params.m_source_images_hdr.size(),
+ m_params.m_source_filenames.size(), m_params.m_source_alpha_filenames.size(),
+ m_params.m_source_mipmap_images.size(), m_params.m_source_mipmap_images_hdr.size());
if (m_params.m_source_mipmap_images.size())
{
@@ -106,6 +234,15 @@ namespace basisu
debug_printf("\n");
}
+ if (m_params.m_source_mipmap_images_hdr.size())
+ {
+ debug_printf("m_source_mipmap_images_hdr array sizes:\n");
+ for (uint32_t i = 0; i < m_params.m_source_mipmap_images_hdr.size(); i++)
+ debug_printf("%u ", m_params.m_source_mipmap_images_hdr[i].size());
+ debug_printf("\n");
+ }
+
+ PRINT_BOOL_VALUE(m_hdr);
PRINT_BOOL_VALUE(m_uastc);
PRINT_BOOL_VALUE(m_use_opencl);
PRINT_BOOL_VALUE(m_y_flip);
@@ -117,7 +254,7 @@ namespace basisu
PRINT_BOOL_VALUE(m_no_endpoint_rdo);
PRINT_BOOL_VALUE(m_no_selector_rdo);
PRINT_BOOL_VALUE(m_read_source_images);
- PRINT_BOOL_VALUE(m_write_output_basis_files);
+ PRINT_BOOL_VALUE(m_write_output_basis_or_ktx2_files);
PRINT_BOOL_VALUE(m_compute_stats);
PRINT_BOOL_VALUE(m_check_for_alpha);
PRINT_BOOL_VALUE(m_force_alpha);
@@ -146,6 +283,7 @@ namespace basisu
debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters);
debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters);
debug_printf("m_quality_level: %i\n", m_params.m_quality_level);
+ debug_printf("UASTC HDR quality level: %u\n", m_params.m_uastc_hdr_options.m_level);
debug_printf("m_tex_type: %u\n", m_params.m_tex_type);
debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1);
@@ -185,6 +323,9 @@ namespace basisu
}
PRINT_BOOL_VALUE(m_validate_output_data);
+ PRINT_BOOL_VALUE(m_hdr_ldr_srgb_to_linear_conversion);
+ debug_printf("Allow UASTC HDR uber mode: %u\n", m_params.m_uastc_hdr_options.m_allow_uber_mode);
+ PRINT_BOOL_VALUE(m_hdr_favor_astc);
#undef PRINT_BOOL_VALUE
#undef PRINT_INT_VALUE
@@ -192,19 +333,9 @@ namespace basisu
#undef PRINT_FLOAT_VALUE
}
- if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size()))
- {
- assert(0);
+ if (!sanity_check_input_params())
return false;
- }
-
- if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data))
- {
- m_params.m_validate_output_data = true;
-
- debug_printf("Note: m_compute_stats is true, so forcing m_validate_output_data to true as well\n");
- }
-
+
if ((m_params.m_use_opencl) && opencl_is_available() && !m_pOpenCL_context && !m_opencl_failed)
{
m_pOpenCL_context = opencl_create_context();
@@ -219,6 +350,9 @@ namespace basisu
{
debug_printf("basis_compressor::process\n");
+ if (!read_dds_source_images())
+ return cECFailedReadingSourceImages;
+
if (!read_source_images())
return cECFailedReadingSourceImages;
@@ -228,20 +362,38 @@ namespace basisu
if (m_params.m_create_ktx2_file)
{
if (!validate_ktx2_constraints())
+ {
+ error_printf("Inputs do not satisfy .KTX2 texture constraints: all source images must be the same resolution and have the same number of mipmap levels.\n");
return cECFailedValidating;
+ }
}
if (!extract_source_blocks())
return cECFailedFrontEnd;
- if (m_params.m_uastc)
+ if (m_params.m_hdr)
+ {
+ // UASTC HDR
+ printf("Mode: UASTC HDR Level %u\n", m_params.m_uastc_hdr_options.m_level);
+
+ error_code ec = encode_slices_to_uastc_hdr();
+ if (ec != cECSuccess)
+ return ec;
+ }
+ else if (m_params.m_uastc)
{
+ // UASTC
+ printf("Mode: UASTC LDR Level %u\n", m_params.m_pack_uastc_flags & cPackUASTCLevelMask);
+
error_code ec = encode_slices_to_uastc();
if (ec != cECSuccess)
return ec;
}
else
{
+ // ETC1S
+ printf("Mode: ETC1S Quality %i, Level %i\n", m_params.m_quality_level, (int)m_params.m_compression_level);
+
if (!process_frontend())
return cECFailedFrontEnd;
@@ -254,7 +406,7 @@ namespace basisu
if (!create_basis_file_and_transcode())
return cECFailedCreateBasisFile;
-
+
if (m_params.m_create_ktx2_file)
{
if (!create_ktx2_file())
@@ -267,6 +419,309 @@ namespace basisu
return cECSuccess;
}
+ basis_compressor::error_code basis_compressor::encode_slices_to_uastc_hdr()
+ {
+ debug_printf("basis_compressor::encode_slices_to_uastc_hdr\n");
+
+ interval_timer tm;
+ tm.start();
+
+ m_uastc_slice_textures.resize(m_slice_descs.size());
+ for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
+ m_uastc_slice_textures[slice_index].init(texture_format::cUASTC_HDR_4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height);
+
+ m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC_HDR_4x4;
+ m_uastc_backend_output.m_etc1s = false;
+ m_uastc_backend_output.m_slice_desc = m_slice_descs;
+ m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size());
+ m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size());
+
+ if (!m_params.m_perceptual)
+ {
+ m_params.m_uastc_hdr_options.m_r_err_scale = 1.0f;
+ m_params.m_uastc_hdr_options.m_g_err_scale = 1.0f;
+ }
+
+ const float DEFAULT_BC6H_ERROR_WEIGHT = .85f;
+ const float LOWEST_BC6H_ERROR_WEIGHT = .1f;
+ m_params.m_uastc_hdr_options.m_bc6h_err_weight = m_params.m_hdr_favor_astc ? LOWEST_BC6H_ERROR_WEIGHT : DEFAULT_BC6H_ERROR_WEIGHT;
+
+ std::atomic<bool> any_failures;
+ any_failures = false;
+
+ astc_hdr_block_stats enc_stats;
+
+ struct uastc_blk_desc
+ {
+ uint32_t m_solid_flag;
+ uint32_t m_num_partitions;
+ uint32_t m_cem_index;
+ uint32_t m_weight_ise_range;
+ uint32_t m_endpoint_ise_range;
+
+ bool operator< (const uastc_blk_desc& desc) const
+ {
+ if (this == &desc)
+ return false;
+
+#define COMP(XX) if (XX < desc.XX) return true; else if (XX != desc.XX) return false;
+ COMP(m_solid_flag)
+ COMP(m_num_partitions)
+ COMP(m_cem_index)
+ COMP(m_weight_ise_range)
+ COMP(m_endpoint_ise_range)
+#undef COMP
+
+ return false;
+ }
+
+ bool operator== (const uastc_blk_desc& desc) const
+ {
+ if (this == &desc)
+ return true;
+ if ((*this < desc) || (desc < *this))
+ return false;
+ return true;
+ }
+
+ bool operator!= (const uastc_blk_desc& desc) const
+ {
+ return !(*this == desc);
+ }
+ };
+
+ struct uastc_blk_desc_stats
+ {
+ uastc_blk_desc_stats() : m_count(0) { }
+ uint32_t m_count;
+#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS
+ basisu::vector<basist::astc_blk> m_blks;
+#endif
+ };
+
+ std::map<uastc_blk_desc, uastc_blk_desc_stats> unique_block_descs;
+ std::mutex unique_block_desc_mutex;
+
+ for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
+ {
+ gpu_image& tex = m_uastc_slice_textures[slice_index];
+ basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];
+ (void)slice_desc;
+
+ const uint32_t num_blocks_x = tex.get_blocks_x();
+ const uint32_t num_blocks_y = tex.get_blocks_y();
+ const uint32_t total_blocks = tex.get_total_blocks();
+ const imagef& source_image = m_slice_images_hdr[slice_index];
+
+ std::atomic<uint32_t> total_blocks_processed;
+ total_blocks_processed = 0;
+
+ const uint32_t N = 256;
+ for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N)
+ {
+ const uint32_t first_index = block_index_iter;
+ const uint32_t last_index = minimum<uint32_t>(total_blocks, block_index_iter + N);
+
+ // FIXME: This sucks, but we're having a stack size related problem with std::function with emscripten.
+#ifndef __EMSCRIPTEN__
+ m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image,
+ &tex, &total_blocks_processed, &any_failures, &enc_stats, &unique_block_descs, &unique_block_desc_mutex]
+ {
+#endif
+ BASISU_NOTE_UNUSED(num_blocks_y);
+
+ basisu::vector<astc_hdr_pack_results> all_results;
+ all_results.reserve(256);
+
+ for (uint32_t block_index = first_index; block_index < last_index; block_index++)
+ {
+ const uint32_t block_x = block_index % num_blocks_x;
+ const uint32_t block_y = block_index / num_blocks_x;
+
+ vec4F block_pixels[16];
+
+ source_image.extract_block_clamped(&block_pixels[0], block_x * 4, block_y * 4, 4, 4);
+
+ basist::astc_blk& dest_block = *(basist::astc_blk*)tex.get_block_ptr(block_x, block_y);
+
+ float rgb_pixels[16 * 3];
+ basist::half_float rgb_pixels_half[16 * 3];
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ rgb_pixels[i * 3 + 0] = block_pixels[i][0];
+ rgb_pixels_half[i * 3 + 0] = float_to_half_non_neg_no_nan_inf(block_pixels[i][0]);
+
+ rgb_pixels[i * 3 + 1] = block_pixels[i][1];
+ rgb_pixels_half[i * 3 + 1] = float_to_half_non_neg_no_nan_inf(block_pixels[i][1]);
+
+ rgb_pixels[i * 3 + 2] = block_pixels[i][2];
+ rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(block_pixels[i][2]);
+ }
+
+ bool status = astc_hdr_enc_block(&rgb_pixels[0], m_params.m_uastc_hdr_options, all_results);
+ if (!status)
+ {
+ any_failures = true;
+ continue;
+ }
+
+ double best_err = 1e+30f;
+ int best_result_index = -1;
+
+ const double bc6h_err_weight = m_params.m_uastc_hdr_options.m_bc6h_err_weight;
+ const double astc_err_weight = (1.0f - bc6h_err_weight);
+
+ for (uint32_t i = 0; i < all_results.size(); i++)
+ {
+ basist::half_float unpacked_bc6h_block[4 * 4 * 3];
+ unpack_bc6h(&all_results[i].m_bc6h_block, unpacked_bc6h_block, false);
+
+ all_results[i].m_bc6h_block_error = compute_block_error(rgb_pixels_half, unpacked_bc6h_block, m_params.m_uastc_hdr_options);
+
+ double overall_err = (all_results[i].m_bc6h_block_error * bc6h_err_weight) + (all_results[i].m_best_block_error * astc_err_weight);
+
+ if ((!i) || (overall_err < best_err))
+ {
+ best_err = overall_err;
+ best_result_index = i;
+ }
+ }
+
+ const astc_hdr_pack_results& best_results = all_results[best_result_index];
+
+ astc_hdr_pack_results_to_block(dest_block, best_results);
+
+ // Verify that this block is valid UASTC HDR and we can successfully transcode it to BC6H.
+ // (Well, except in fastest mode.)
+ if (m_params.m_uastc_hdr_options.m_level > 0)
+ {
+ basist::bc6h_block transcoded_bc6h_blk;
+ bool transcode_results = astc_hdr_transcode_to_bc6h(dest_block, transcoded_bc6h_blk);
+ assert(transcode_results);
+ if ((!transcode_results) && (!any_failures))
+ {
+ error_printf("basis_compressor::encode_slices_to_uastc_hdr: UASTC HDR block transcode check failed!\n");
+
+ any_failures = true;
+ continue;
+ }
+ }
+
+ if (m_params.m_debug)
+ {
+ // enc_stats has its own mutex
+ enc_stats.update(best_results);
+
+ uastc_blk_desc blk_desc;
+ clear_obj(blk_desc);
+
+ blk_desc.m_solid_flag = best_results.m_is_solid;
+ if (!blk_desc.m_solid_flag)
+ {
+ blk_desc.m_num_partitions = best_results.m_best_blk.m_num_partitions;
+ blk_desc.m_cem_index = best_results.m_best_blk.m_color_endpoint_modes[0];
+ blk_desc.m_weight_ise_range = best_results.m_best_blk.m_weight_ise_range;
+ blk_desc.m_endpoint_ise_range = best_results.m_best_blk.m_endpoint_ise_range;
+ }
+
+ {
+ std::lock_guard<std::mutex> lck(unique_block_desc_mutex);
+
+ auto res = unique_block_descs.insert(std::make_pair(blk_desc, uastc_blk_desc_stats()));
+
+ (res.first)->second.m_count++;
+#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS
+ (res.first)->second.m_blks.push_back(dest_block);
+#endif
+ }
+ }
+
+ total_blocks_processed++;
+
+ uint32_t val = total_blocks_processed;
+ if (((val & 1023) == 1023) && m_params.m_status_output)
+ {
+ debug_printf("basis_compressor::encode_slices_to_uastc_hdr: %3.1f%% done\n", static_cast<float>(val) * 100.0f / total_blocks);
+ }
+ }
+
+#ifndef __EMSCRIPTEN__
+ });
+#endif
+
+ } // block_index_iter
+
+#ifndef __EMSCRIPTEN__
+ m_params.m_pJob_pool->wait_for_all();
+#endif
+
+ if (any_failures)
+ return cECFailedEncodeUASTC;
+
+ m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes());
+ memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes());
+
+ m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0);
+
+ } // slice_index
+
+ debug_printf("basis_compressor::encode_slices_to_uastc_hdr: Total time: %3.3f secs\n", tm.get_elapsed_secs());
+
+ if (m_params.m_debug)
+ {
+ debug_printf("\n----- Total unique UASTC block descs: %u\n", (uint32_t)unique_block_descs.size());
+
+ uint32_t c = 0;
+ for (auto it = unique_block_descs.begin(); it != unique_block_descs.end(); ++it)
+ {
+ debug_printf("%u. Total uses: %u %3.2f%%, solid color: %u\n", c, it->second.m_count,
+ ((float)it->second.m_count * 100.0f) / enc_stats.m_total_blocks, it->first.m_solid_flag);
+
+ if (!it->first.m_solid_flag)
+ {
+ debug_printf(" Num partitions: %u\n", it->first.m_num_partitions);
+ debug_printf(" CEM index: %u\n", it->first.m_cem_index);
+ debug_printf(" Weight ISE range: %u (%u levels)\n", it->first.m_weight_ise_range, astc_helpers::get_ise_levels(it->first.m_weight_ise_range));
+ debug_printf(" Endpoint ISE range: %u (%u levels)\n", it->first.m_endpoint_ise_range, astc_helpers::get_ise_levels(it->first.m_endpoint_ise_range));
+ }
+
+#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS
+ debug_printf(" -- UASTC HDR block bytes:\n");
+ for (uint32_t j = 0; j < minimum<uint32_t>(4, it->second.m_blks.size()); j++)
+ {
+ basist::astc_blk& blk = it->second.m_blks[j];
+
+ debug_printf(" - UASTC HDR: { ");
+ for (uint32_t k = 0; k < 16; k++)
+ debug_printf("%u%s", ((const uint8_t*)&blk)[k], (k != 15) ? ", " : "");
+ debug_printf(" }\n");
+
+ basist::bc6h_block bc6h_blk;
+ bool res = astc_hdr_transcode_to_bc6h(blk, bc6h_blk);
+ assert(res);
+ if (!res)
+ {
+ error_printf("astc_hdr_transcode_to_bc6h() failed!\n");
+ return cECFailedEncodeUASTC;
+ }
+
+ debug_printf(" - BC6H: { ");
+ for (uint32_t k = 0; k < 16; k++)
+ debug_printf("%u%s", ((const uint8_t*)&bc6h_blk)[k], (k != 15) ? ", " : "");
+ debug_printf(" }\n");
+ }
+#endif
+
+ c++;
+ }
+ printf("\n");
+
+ enc_stats.print();
+ }
+
+ return cECSuccess;
+ }
+
basis_compressor::error_code basis_compressor::encode_slices_to_uastc()
{
debug_printf("basis_compressor::encode_slices_to_uastc\n");
@@ -328,7 +783,7 @@ namespace basisu
total_blocks_processed++;
uint32_t val = total_blocks_processed;
- if ((val & 16383) == 16383)
+ if (((val & 16383) == 16383) && m_params.m_status_output)
{
debug_printf("basis_compressor::encode_slices_to_uastc: %3.1f%% done\n", static_cast<float>(val) * 100.0f / total_blocks);
}
@@ -374,6 +829,57 @@ namespace basisu
return cECSuccess;
}
+ bool basis_compressor::generate_mipmaps(const imagef& img, basisu::vector<imagef>& mips, bool has_alpha)
+ {
+ debug_printf("basis_compressor::generate_mipmaps\n");
+
+ interval_timer tm;
+ tm.start();
+
+ uint32_t total_levels = 1;
+ uint32_t w = img.get_width(), h = img.get_height();
+ while (maximum<uint32_t>(w, h) > (uint32_t)m_params.m_mip_smallest_dimension)
+ {
+ w = maximum(w >> 1U, 1U);
+ h = maximum(h >> 1U, 1U);
+ total_levels++;
+ }
+
+ for (uint32_t level = 1; level < total_levels; level++)
+ {
+ const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level);
+ const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level);
+
+ imagef& level_img = *enlarge_vector(mips, 1);
+ level_img.resize(level_width, level_height);
+
+ const imagef* pSource_image = &img;
+
+ if (m_params.m_mip_fast)
+ {
+ if (level > 1)
+ pSource_image = &mips[level - 1];
+ }
+
+ bool status = image_resample(*pSource_image, level_img,
+ //m_params.m_mip_filter.c_str(),
+ "box", // TODO: negative lobes in the filter are causing negative colors, try Mitchell
+ m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3);
+ if (!status)
+ {
+ error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n");
+ return false;
+ }
+
+ clean_hdr_image(level_img);
+ }
+
+ if (m_params.m_debug)
+ debug_printf("Total mipmap generation time: %3.3f secs\n", tm.get_elapsed_secs());
+
+ return true;
+ }
+
bool basis_compressor::generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha)
{
debug_printf("basis_compressor::generate_mipmaps\n");
@@ -463,17 +969,224 @@ namespace basisu
return true;
}
+ void basis_compressor::clean_hdr_image(imagef& src_img)
+ {
+ const uint32_t width = src_img.get_width();
+ const uint32_t height = src_img.get_height();
+
+ float max_used_val = 0.0f;
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ vec4F& c = src_img(x, y);
+ for (uint32_t i = 0; i < 3; i++)
+ max_used_val = maximum(max_used_val, c[i]);
+ }
+ }
+
+ double hdr_image_scale = 1.0f;
+ if (max_used_val > basist::ASTC_HDR_MAX_VAL)
+ {
+ hdr_image_scale = max_used_val / basist::ASTC_HDR_MAX_VAL;
+
+ const double inv_hdr_image_scale = basist::ASTC_HDR_MAX_VAL / max_used_val;
+
+ for (uint32_t y = 0; y < src_img.get_height(); y++)
+ {
+ for (uint32_t x = 0; x < src_img.get_width(); x++)
+ {
+ vec4F& c = src_img(x, y);
+
+ for (uint32_t i = 0; i < 3; i++)
+ c[i] = (float)minimum<double>(c[i] * inv_hdr_image_scale, basist::ASTC_HDR_MAX_VAL);
+ }
+ }
+
+ printf("Warning: The input HDR image's maximum used float value was %f, which is too high to encode as ASTC HDR. The image's components have been linearly scaled so the maximum used value is %f, by multiplying by %f.\n",
+ max_used_val, basist::ASTC_HDR_MAX_VAL, inv_hdr_image_scale);
+
+ printf("The decoded ASTC HDR texture will have to be scaled up by %f.\n", hdr_image_scale);
+ }
+
+ // TODO: Determine a constant scale factor, apply if > MAX_HALF_FLOAT
+ if (!src_img.clean_astc_hdr_pixels(basist::ASTC_HDR_MAX_VAL))
+ printf("Warning: clean_astc_hdr_pixels() had to modify the input image to encode to ASTC HDR - see previous warning(s).\n");
+
+ float lowest_nonzero_val = 1e+30f;
+ float lowest_val = 1e+30f;
+ float highest_val = -1e+30f;
+
+ for (uint32_t y = 0; y < src_img.get_height(); y++)
+ {
+ for (uint32_t x = 0; x < src_img.get_width(); x++)
+ {
+ const vec4F& c = src_img(x, y);
+
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ lowest_val = basisu::minimum(lowest_val, c[i]);
+
+ if (c[i] != 0.0f)
+ lowest_nonzero_val = basisu::minimum(lowest_nonzero_val, c[i]);
+
+ highest_val = basisu::maximum(highest_val, c[i]);
+ }
+ }
+ }
+
+ debug_printf("Lowest image value: %e, lowest non-zero value: %e, highest value: %e, dynamic range: %e\n", lowest_val, lowest_nonzero_val, highest_val, highest_val / lowest_nonzero_val);
+ }
+
+ bool basis_compressor::read_dds_source_images()
+ {
+ debug_printf("basis_compressor::read_dds_source_images\n");
+
+ // Nothing to do if the caller doesn't want us reading source images.
+ if ((!m_params.m_read_source_images) || (!m_params.m_source_filenames.size()))
+ return true;
+
+ // Just bail of the caller has specified their own source images.
+ if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size())
+ return true;
+
+ if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size())
+ return true;
+
+ // See if any input filenames are .DDS
+ bool any_dds = false, all_dds = true;
+ for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++)
+ {
+ std::string ext(string_get_extension(m_params.m_source_filenames[i]));
+ if (strcasecmp(ext.c_str(), "dds") == 0)
+ any_dds = true;
+ else
+ all_dds = false;
+ }
+
+ // Bail if no .DDS files specified.
+ if (!any_dds)
+ return true;
+
+ // If any input is .DDS they all must be .DDS, for simplicity.
+ if (!all_dds)
+ {
+ error_printf("If any filename is DDS, all filenames must be DDS.\n");
+ return false;
+ }
+
+ // Can't jam in alpha channel images if any .DDS files specified.
+ if (m_params.m_source_alpha_filenames.size())
+ {
+ error_printf("Source alpha filenames are not supported in DDS mode.\n");
+ return false;
+ }
+
+ bool any_mipmaps = false;
+
+ // Read each .DDS texture file
+ for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++)
+ {
+ basisu::vector<image> ldr_mips;
+ basisu::vector<imagef> hdr_mips;
+ bool status = read_uncompressed_dds_file(m_params.m_source_filenames[i].c_str(), ldr_mips, hdr_mips);
+ if (!status)
+ return false;
+
+ assert(ldr_mips.size() || hdr_mips.size());
+
+ if (m_params.m_status_output)
+ {
+ printf("Read DDS file \"%s\", %s, %ux%u, %u mipmap levels\n",
+ m_params.m_source_filenames[i].c_str(),
+ ldr_mips.size() ? "LDR" : "HDR",
+ ldr_mips.size() ? ldr_mips[0].get_width() : hdr_mips[0].get_width(),
+ ldr_mips.size() ? ldr_mips[0].get_height() : hdr_mips[0].get_height(),
+ ldr_mips.size() ? ldr_mips.size() : hdr_mips.size());
+ }
+
+ if (ldr_mips.size())
+ {
+ if (m_params.m_source_images_hdr.size())
+ {
+ error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n");
+ return false;
+ }
+
+ m_params.m_source_images.push_back(ldr_mips[0]);
+ m_params.m_source_mipmap_images.resize(m_params.m_source_mipmap_images.size() + 1);
+
+ if (ldr_mips.size() > 1)
+ {
+ ldr_mips.erase(0U);
+
+ m_params.m_source_mipmap_images.back().swap(ldr_mips);
+
+ any_mipmaps = true;
+ }
+ }
+ else
+ {
+ if (m_params.m_source_images.size())
+ {
+ error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n");
+ return false;
+ }
+
+ m_params.m_source_images_hdr.push_back(hdr_mips[0]);
+ m_params.m_source_mipmap_images_hdr.resize(m_params.m_source_mipmap_images_hdr.size() + 1);
+
+ if (hdr_mips.size() > 1)
+ {
+ hdr_mips.erase(0U);
+
+ m_params.m_source_mipmap_images_hdr.back().swap(hdr_mips);
+
+ any_mipmaps = true;
+ }
+
+ m_params.m_hdr = true;
+ m_params.m_uastc = true;
+ }
+ }
+
+ m_params.m_read_source_images = false;
+ m_params.m_source_filenames.clear();
+ m_params.m_source_alpha_filenames.clear();
+
+ if (!any_mipmaps)
+ {
+ m_params.m_source_mipmap_images.clear();
+ m_params.m_source_mipmap_images_hdr.clear();
+ }
+
+ if ((m_params.m_hdr) && (!m_params.m_source_images_hdr.size()))
+ {
+ error_printf("HDR mode enabled, but only LDR .DDS files were loaded. HDR mode requires half or float (HDR) .DDS inputs.\n");
+ return false;
+ }
+
+ return true;
+ }
+
bool basis_compressor::read_source_images()
{
debug_printf("basis_compressor::read_source_images\n");
- const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size();
+ const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() :
+ (m_params.m_hdr ? (uint32_t)m_params.m_source_images_hdr.size() : (uint32_t)m_params.m_source_images.size());
+
if (!total_source_files)
+ {
+ debug_printf("basis_compressor::read_source_images: No source images to process\n");
+
return false;
+ }
m_stats.resize(0);
m_slice_descs.resize(0);
m_slice_images.resize(0);
+ m_slice_images_hdr.resize(0);
m_total_blocks = 0;
uint32_t total_macroblocks = 0;
@@ -481,106 +1194,196 @@ namespace basisu
m_any_source_image_has_alpha = false;
basisu::vector<image> source_images;
+ basisu::vector<imagef> source_images_hdr;
+
basisu::vector<std::string> source_filenames;
+ // TODO: Note HDR images don't support alpha here, currently.
+
// First load all source images, and determine if any have an alpha channel.
for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)
{
- const char *pSource_filename = "";
+ const char* pSource_filename = "";
image file_image;
-
+ imagef file_image_hdr;
+
if (m_params.m_read_source_images)
{
pSource_filename = m_params.m_source_filenames[source_file_index].c_str();
// Load the source image
- if (!load_image(pSource_filename, file_image))
+ if (m_params.m_hdr)
{
- error_printf("Failed reading source image: %s\n", pSource_filename);
- return false;
+ if (!load_image_hdr(pSource_filename, file_image_hdr, m_params.m_hdr_ldr_srgb_to_linear_conversion))
+ {
+ error_printf("Failed reading source image: %s\n", pSource_filename);
+ return false;
+ }
+
+ // For now, just slam alpha to 1.0f. UASTC HDR doesn't support alpha yet.
+ for (uint32_t y = 0; y < file_image_hdr.get_height(); y++)
+ for (uint32_t x = 0; x < file_image_hdr.get_width(); x++)
+ file_image_hdr(x, y)[3] = 1.0f;
}
+ else
+ {
+ if (!load_image(pSource_filename, file_image))
+ {
+ error_printf("Failed reading source image: %s\n", pSource_filename);
+ return false;
+ }
+ }
+
+ const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width();
+ const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height();
if (m_params.m_status_output)
{
- printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height());
+ printf("Read source image \"%s\", %ux%u\n", pSource_filename, width, height);
}
- // Optionally load another image and put a grayscale version of it into the alpha channel.
- if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size()))
+ if (m_params.m_hdr)
+ {
+ clean_hdr_image(file_image_hdr);
+ }
+ else
{
- const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str();
+ // Optionally load another image and put a grayscale version of it into the alpha channel.
+ if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size()))
+ {
+ const char* pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str();
- image alpha_data;
+ image alpha_data;
- if (!load_image(pSource_alpha_image, alpha_data))
- {
- error_printf("Failed reading source image: %s\n", pSource_alpha_image);
- return false;
- }
+ if (!load_image(pSource_alpha_image, alpha_data))
+ {
+ error_printf("Failed reading source image: %s\n", pSource_alpha_image);
+ return false;
+ }
- printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height());
+ printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height());
- alpha_data.crop(file_image.get_width(), file_image.get_height());
+ alpha_data.crop(width, height);
- for (uint32_t y = 0; y < file_image.get_height(); y++)
- for (uint32_t x = 0; x < file_image.get_width(); x++)
- file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma();
+ for (uint32_t y = 0; y < height; y++)
+ for (uint32_t x = 0; x < width; x++)
+ file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma();
+ }
}
}
else
{
- file_image = m_params.m_source_images[source_file_index];
+ if (m_params.m_hdr)
+ {
+ file_image_hdr = m_params.m_source_images_hdr[source_file_index];
+ clean_hdr_image(file_image_hdr);
+ }
+ else
+ {
+ file_image = m_params.m_source_images[source_file_index];
+ }
}
- if (m_params.m_renormalize)
- file_image.renormalize_normal_map();
+ if (!m_params.m_hdr)
+ {
+ if (m_params.m_renormalize)
+ file_image.renormalize_normal_map();
+ }
bool alpha_swizzled = false;
+
if (m_params.m_swizzle[0] != 0 ||
m_params.m_swizzle[1] != 1 ||
m_params.m_swizzle[2] != 2 ||
m_params.m_swizzle[3] != 3)
{
- // Used for XY normal maps in RG - puts X in color, Y in alpha
- for (uint32_t y = 0; y < file_image.get_height(); y++)
- for (uint32_t x = 0; x < file_image.get_width(); x++)
+ if (!m_params.m_hdr)
+ {
+ // Used for XY normal maps in RG - puts X in color, Y in alpha
+ for (uint32_t y = 0; y < file_image.get_height(); y++)
+ {
+ for (uint32_t x = 0; x < file_image.get_width(); x++)
+ {
+ const color_rgba& c = file_image(x, y);
+ file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]);
+ }
+ }
+
+ alpha_swizzled = (m_params.m_swizzle[3] != 3);
+ }
+ else
+ {
+ // Used for XY normal maps in RG - puts X in color, Y in alpha
+ for (uint32_t y = 0; y < file_image_hdr.get_height(); y++)
{
- const color_rgba &c = file_image(x, y);
- file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]);
+ for (uint32_t x = 0; x < file_image_hdr.get_width(); x++)
+ {
+ const vec4F& c = file_image_hdr(x, y);
+
+ // For now, alpha is always 1.0f in UASTC HDR.
+ file_image_hdr(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]);
+ }
}
- alpha_swizzled = m_params.m_swizzle[3] != 3;
+ }
}
-
+
bool has_alpha = false;
- if (m_params.m_force_alpha || alpha_swizzled)
- has_alpha = true;
- else if (!m_params.m_check_for_alpha)
- file_image.set_alpha(255);
- else if (file_image.has_alpha())
- has_alpha = true;
- if (has_alpha)
- m_any_source_image_has_alpha = true;
+ if (!m_params.m_hdr)
+ {
+ if (m_params.m_force_alpha || alpha_swizzled)
+ has_alpha = true;
+ else if (!m_params.m_check_for_alpha)
+ file_image.set_alpha(255);
+ else if (file_image.has_alpha())
+ has_alpha = true;
+
+ if (has_alpha)
+ m_any_source_image_has_alpha = true;
+ }
+
+ {
+ const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width();
+ const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height();
+
+ debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, width, height, has_alpha);
+ }
- debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha);
-
if (m_params.m_y_flip)
- file_image.flip_y();
+ {
+ if (m_params.m_hdr)
+ file_image_hdr.flip_y();
+ else
+ file_image.flip_y();
+ }
#if DEBUG_EXTRACT_SINGLE_BLOCK
- image block_image(4, 4);
const uint32_t block_x = 0;
const uint32_t block_y = 0;
- block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0);
- file_image = block_image;
+
+ if (m_params.m_hdr)
+ {
+ imagef block_image(4, 4);
+ block_image_hdr.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image_hdr, 0);
+ file_image_hdr = block_image;
+ }
+ else
+ {
+ image block_image(4, 4);
+ block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0);
+ file_image = block_image;
+ }
#endif
#if DEBUG_CROP_TEXTURE_TO_64x64
- file_image.resize(64, 64);
+ if (m_params.m_hdr)
+ file_image_hdr.resize(64, 64);
+ else
+ file_image.resize(64, 64);
#endif
- if (m_params.m_resample_width > 0 && m_params.m_resample_height > 0)
+ if ((m_params.m_resample_width > 0) && (m_params.m_resample_height > 0))
{
int new_width = basisu::minimum<int>(m_params.m_resample_width, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
int new_height = basisu::minimum<int>(m_params.m_resample_height, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
@@ -588,129 +1391,225 @@ namespace basisu
debug_printf("Resampling to %ix%i\n", new_width, new_height);
// TODO: A box filter - kaiser looks too sharp on video. Let the caller control this.
- image temp_img(new_width, new_height);
- image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser");
- temp_img.swap(file_image);
+ if (m_params.m_hdr)
+ {
+ imagef temp_img(new_width, new_height);
+ image_resample(file_image_hdr, temp_img, "box"); // "kaiser");
+ clean_hdr_image(temp_img);
+ temp_img.swap(file_image_hdr);
+ }
+ else
+ {
+ image temp_img(new_width, new_height);
+ image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser");
+ temp_img.swap(file_image);
+ }
}
else if (m_params.m_resample_factor > 0.0f)
{
- int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
- int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
+ // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this.
+ if (m_params.m_hdr)
+ {
+ int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image_hdr.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
+ int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image_hdr.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
- debug_printf("Resampling to %ix%i\n", new_width, new_height);
+ debug_printf("Resampling to %ix%i\n", new_width, new_height);
- // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this.
- image temp_img(new_width, new_height);
- image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser");
- temp_img.swap(file_image);
+ imagef temp_img(new_width, new_height);
+ image_resample(file_image_hdr, temp_img, "box"); // "kaiser");
+ clean_hdr_image(temp_img);
+ temp_img.swap(file_image_hdr);
+ }
+ else
+ {
+ int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
+ int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);
+
+ debug_printf("Resampling to %ix%i\n", new_width, new_height);
+
+ image temp_img(new_width, new_height);
+ image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser");
+ temp_img.swap(file_image);
+ }
}
- if ((!file_image.get_width()) || (!file_image.get_height()))
+ const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width();
+ const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height();
+
+ if ((!width) || (!height))
{
error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n");
return false;
}
- if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION))
+ if ((width > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (height > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION))
{
error_printf("basis_compressor::read_source_images: Source image \"%s\" is too large!\n", pSource_filename);
return false;
}
- source_images.enlarge(1)->swap(file_image);
+ if (!m_params.m_hdr)
+ source_images.enlarge(1)->swap(file_image);
+ else
+ source_images_hdr.enlarge(1)->swap(file_image_hdr);
+
source_filenames.push_back(pSource_filename);
}
// Check if the caller has generated their own mipmaps.
- if (m_params.m_source_mipmap_images.size())
+ if (m_params.m_hdr)
{
- // Make sure they've passed us enough mipmap chains.
- if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size()))
+ if (m_params.m_source_mipmap_images_hdr.size())
{
- error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n");
- return false;
+ // Make sure they've passed us enough mipmap chains.
+ if ((m_params.m_source_images_hdr.size() != m_params.m_source_mipmap_images_hdr.size()) || (total_source_files != m_params.m_source_images_hdr.size()))
+ {
+ error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images_hdr.size() must equal m_params.m_source_images_hdr.size()!\n");
+ return false;
+ }
}
-
- // Check if any of the user-supplied mipmap levels has alpha.
- // We're assuming the user has already preswizzled their mipmap source images.
- if (!m_any_source_image_has_alpha)
+ }
+ else
+ {
+ if (m_params.m_source_mipmap_images.size())
{
- for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)
+ // Make sure they've passed us enough mipmap chains.
+ if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size()))
{
- for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++)
- {
- const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index];
+ error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n");
+ return false;
+ }
- if (mip_img.has_alpha())
+ // Check if any of the user-supplied mipmap levels has alpha.
+ if (!m_any_source_image_has_alpha)
+ {
+ for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)
+ {
+ for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++)
{
- m_any_source_image_has_alpha = true;
- break;
+ const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index];
+
+ // Be sure to take into account any swizzling which will be applied.
+ if (mip_img.has_alpha(m_params.m_swizzle[3]))
+ {
+ m_any_source_image_has_alpha = true;
+ break;
+ }
}
- }
- if (m_any_source_image_has_alpha)
- break;
+ if (m_any_source_image_has_alpha)
+ break;
+ }
}
}
}
debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha);
+ // Now, for each source image, create the slices corresponding to that image.
for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)
{
const std::string &source_filename = source_filenames[source_file_index];
-
- // Now, for each source image, create the slices corresponding to that image.
+
basisu::vector<image> slices;
+ basisu::vector<imagef> slices_hdr;
slices.reserve(32);
+ slices_hdr.reserve(32);
// The first (largest) mipmap level.
- image& file_image = source_images[source_file_index];
-
+ image *pFile_image = source_images.size() ? &source_images[source_file_index] : nullptr;
+ imagef *pFile_image_hdr = source_images_hdr.size() ? &source_images_hdr[source_file_index] : nullptr;
+
// Reserve a slot for mip0.
- slices.resize(1);
-
- if (m_params.m_source_mipmap_images.size())
+ if (m_params.m_hdr)
+ slices_hdr.resize(1);
+ else
+ slices.resize(1);
+
+ if ((!m_params.m_hdr) && (m_params.m_source_mipmap_images.size()))
{
// User-provided mipmaps for each layer or image in the texture array.
for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++)
{
image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index];
- if (m_params.m_swizzle[0] != 0 ||
- m_params.m_swizzle[1] != 1 ||
- m_params.m_swizzle[2] != 2 ||
- m_params.m_swizzle[3] != 3)
+ if ((m_params.m_swizzle[0] != 0) ||
+ (m_params.m_swizzle[1] != 1) ||
+ (m_params.m_swizzle[2] != 2) ||
+ (m_params.m_swizzle[3] != 3))
{
// Used for XY normal maps in RG - puts X in color, Y in alpha
for (uint32_t y = 0; y < mip_img.get_height(); y++)
+ {
for (uint32_t x = 0; x < mip_img.get_width(); x++)
{
- const color_rgba &c = mip_img(x, y);
+ const color_rgba& c = mip_img(x, y);
mip_img(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]);
}
+ }
}
slices.push_back(mip_img);
}
}
+ else if ((m_params.m_hdr) && (m_params.m_source_mipmap_images_hdr.size()))
+ {
+ // User-provided mipmaps for each layer or image in the texture array.
+ for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images_hdr[source_file_index].size(); mip_index++)
+ {
+ imagef& mip_img = m_params.m_source_mipmap_images_hdr[source_file_index][mip_index];
+
+ if ((m_params.m_swizzle[0] != 0) ||
+ (m_params.m_swizzle[1] != 1) ||
+ (m_params.m_swizzle[2] != 2) ||
+ (m_params.m_swizzle[3] != 3))
+ {
+ // Used for XY normal maps in RG - puts X in color, Y in alpha
+ for (uint32_t y = 0; y < mip_img.get_height(); y++)
+ {
+ for (uint32_t x = 0; x < mip_img.get_width(); x++)
+ {
+ const vec4F& c = mip_img(x, y);
+
+ // For now, HDR alpha is always 1.0f.
+ mip_img(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]);
+ }
+ }
+ }
+
+ clean_hdr_image(mip_img);
+
+ slices_hdr.push_back(mip_img);
+ }
+ }
else if (m_params.m_mip_gen)
{
// Automatically generate mipmaps.
- if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha))
- return false;
+ if (m_params.m_hdr)
+ {
+ if (!generate_mipmaps(*pFile_image_hdr, slices_hdr, m_any_source_image_has_alpha))
+ return false;
+ }
+ else
+ {
+ if (!generate_mipmaps(*pFile_image, slices, m_any_source_image_has_alpha))
+ return false;
+ }
}
// Swap in the largest mipmap level here to avoid copying it, because generate_mips() will change the array.
// NOTE: file_image is now blank.
- slices[0].swap(file_image);
+ if (m_params.m_hdr)
+ slices_hdr[0].swap(*pFile_image_hdr);
+ else
+ slices[0].swap(*pFile_image);
- uint_vec mip_indices(slices.size());
- for (uint32_t i = 0; i < slices.size(); i++)
+ uint_vec mip_indices(m_params.m_hdr ? slices_hdr.size() : slices.size());
+ for (uint32_t i = 0; i < (m_params.m_hdr ? slices_hdr.size() : slices.size()); i++)
mip_indices[i] = i;
- if ((m_any_source_image_has_alpha) && (!m_params.m_uastc))
+ if ((!m_params.m_hdr) && (m_any_source_image_has_alpha) && (!m_params.m_uastc))
{
// For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB.
basisu::vector<image> alpha_slices;
@@ -745,20 +1644,29 @@ namespace basisu
mip_indices.swap(new_mip_indices);
}
- assert(slices.size() == mip_indices.size());
-
- for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++)
+ if (m_params.m_hdr)
+ {
+ assert(slices_hdr.size() == mip_indices.size());
+ }
+ else
+ {
+ assert(slices.size() == mip_indices.size());
+ }
+
+ for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? slices_hdr.size() : slices.size()); slice_index++)
{
- image& slice_image = slices[slice_index];
- const uint32_t orig_width = slice_image.get_width();
- const uint32_t orig_height = slice_image.get_height();
+ image *pSlice_image = m_params.m_hdr ? nullptr : &slices[slice_index];
+ imagef *pSlice_image_hdr = m_params.m_hdr ? &slices_hdr[slice_index] : nullptr;
+
+ const uint32_t orig_width = m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width();
+ const uint32_t orig_height = m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height();
bool is_alpha_slice = false;
- if (m_any_source_image_has_alpha)
+ if ((!m_params.m_hdr) && (m_any_source_image_has_alpha))
{
if (m_params.m_uastc)
{
- is_alpha_slice = slice_image.has_alpha();
+ is_alpha_slice = pSlice_image->has_alpha();
}
else
{
@@ -767,43 +1675,69 @@ namespace basisu
}
// Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks.
- slice_image.crop_dup_borders(slice_image.get_block_width(4) * 4, slice_image.get_block_height(4) * 4);
+ if (m_params.m_hdr)
+ pSlice_image_hdr->crop_dup_borders(pSlice_image_hdr->get_block_width(4) * 4, pSlice_image_hdr->get_block_height(4) * 4);
+ else
+ pSlice_image->crop_dup_borders(pSlice_image->get_block_width(4) * 4, pSlice_image->get_block_height(4) * 4);
if (m_params.m_debug_images)
{
- save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), slice_image);
+ if (m_params.m_hdr)
+ write_exr(string_format("basis_debug_source_image_%u_slice_%u.exr", source_file_index, slice_index).c_str(), *pSlice_image_hdr, 3, 0);
+ else
+ save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), *pSlice_image);
}
- const uint32_t dest_image_index = m_slice_images.size();
+ const uint32_t dest_image_index = (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size());
enlarge_vector(m_stats, 1);
- enlarge_vector(m_slice_images, 1);
+
+ if (m_params.m_hdr)
+ enlarge_vector(m_slice_images_hdr, 1);
+ else
+ enlarge_vector(m_slice_images, 1);
+
enlarge_vector(m_slice_descs, 1);
-
+
m_stats[dest_image_index].m_filename = source_filename.c_str();
m_stats[dest_image_index].m_width = orig_width;
m_stats[dest_image_index].m_height = orig_height;
-
- debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, slice_image.get_width(), slice_image.get_height());
- basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index];
+ debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n",
+ m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(),
+ orig_width, orig_height,
+ m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(),
+ m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height());
+
+ basisu_backend_slice_desc& slice_desc = m_slice_descs[dest_image_index];
slice_desc.m_first_block_index = m_total_blocks;
slice_desc.m_orig_width = orig_width;
slice_desc.m_orig_height = orig_height;
- slice_desc.m_width = slice_image.get_width();
- slice_desc.m_height = slice_image.get_height();
+ if (m_params.m_hdr)
+ {
+ slice_desc.m_width = pSlice_image_hdr->get_width();
+ slice_desc.m_height = pSlice_image_hdr->get_height();
- slice_desc.m_num_blocks_x = slice_image.get_block_width(4);
- slice_desc.m_num_blocks_y = slice_image.get_block_height(4);
+ slice_desc.m_num_blocks_x = pSlice_image_hdr->get_block_width(4);
+ slice_desc.m_num_blocks_y = pSlice_image_hdr->get_block_height(4);
+ }
+ else
+ {
+ slice_desc.m_width = pSlice_image->get_width();
+ slice_desc.m_height = pSlice_image->get_height();
+
+ slice_desc.m_num_blocks_x = pSlice_image->get_block_width(4);
+ slice_desc.m_num_blocks_y = pSlice_image->get_block_height(4);
+ }
slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1;
slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1;
slice_desc.m_source_file_index = source_file_index;
-
+
slice_desc.m_mip_index = mip_indices[slice_index];
slice_desc.m_alpha = is_alpha_slice;
@@ -818,8 +1752,11 @@ namespace basisu
// Finally, swap in the slice's image to avoid copying it.
// NOTE: slice_image is now blank.
- m_slice_images[dest_image_index].swap(slice_image);
-
+ if (m_params.m_hdr)
+ m_slice_images_hdr[dest_image_index].swap(*pSlice_image_hdr);
+ else
+ m_slice_images[dest_image_index].swap(*pSlice_image);
+
} // slice_index
} // source_file_index
@@ -855,7 +1792,7 @@ namespace basisu
if (m_params.m_status_output)
{
- printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size());
+ printf("Total slices: %u\n", (uint32_t)m_slice_descs.size());
}
for (uint32_t i = 0; i < m_slice_descs.size(); i++)
@@ -865,11 +1802,17 @@ namespace basisu
if (m_params.m_status_output)
{
printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n",
- i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe);
+ i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height,
+ slice_desc.m_width, slice_desc.m_height,
+ slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe);
}
if (m_any_source_image_has_alpha)
{
+ // HDR doesn't support alpha yet
+ if (m_params.m_hdr)
+ return false;
+
if (!m_params.m_uastc)
{
// For ETC1S, alpha slices must be at odd slice indices.
@@ -903,6 +1846,7 @@ namespace basisu
if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height))
return false;
+
if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames))
{
if (!slice_desc.m_iframe)
@@ -924,7 +1868,7 @@ namespace basisu
uint32_t total_basis_images = 0;
- for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++)
+ for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++)
{
const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index];
@@ -945,7 +1889,7 @@ namespace basisu
uint_vec image_mipmap_levels(total_basis_images);
int width = -1, height = -1;
- for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++)
+ for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++)
{
const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index];
@@ -982,20 +1926,52 @@ namespace basisu
{
debug_printf("basis_compressor::extract_source_blocks\n");
- m_source_blocks.resize(m_total_blocks);
+ if (m_params.m_hdr)
+ m_source_blocks_hdr.resize(m_total_blocks);
+ else
+ m_source_blocks.resize(m_total_blocks);
- for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++)
+ for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++)
{
const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];
const uint32_t num_blocks_x = slice_desc.m_num_blocks_x;
const uint32_t num_blocks_y = slice_desc.m_num_blocks_y;
- const image& source_image = m_slice_images[slice_index];
+ const image *pSource_image = m_params.m_hdr ? nullptr : &m_slice_images[slice_index];
+ const imagef *pSource_image_hdr = m_params.m_hdr ? &m_slice_images_hdr[slice_index] : nullptr;
for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
+ {
for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
- source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4);
+ {
+ if (m_params.m_hdr)
+ {
+ vec4F* pBlock = m_source_blocks_hdr[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr();
+
+ pSource_image_hdr->extract_block_clamped(pBlock, block_x * 4, block_y * 4, 4, 4);
+
+ // Additional (technically optional) early sanity checking of the block texels.
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ float v = pBlock[i][c];
+
+ if (std::isnan(v) || std::isinf(v) || (v < 0.0f) || (v > basist::MAX_HALF_FLOAT))
+ {
+ error_printf("basis_compressor::extract_source_blocks: invalid float component\n");
+ return false;
+ }
+ }
+ }
+ }
+ else
+ {
+ pSource_image->extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4);
+ }
+ }
+ }
}
return true;
@@ -1304,6 +2280,8 @@ namespace basisu
m_output_basis_file = comp_data;
uint32_t total_orig_pixels = 0, total_texels = 0, total_orig_texels = 0;
+ (void)total_texels;
+
for (uint32_t i = 0; i < m_slice_descs.size(); i++)
{
const basisu_backend_slice_desc& slice_desc = m_slice_descs[i];
@@ -1335,10 +2313,21 @@ namespace basisu
}
m_decoded_output_textures.resize(m_slice_descs.size());
- m_decoded_output_textures_unpacked.resize(m_slice_descs.size());
- m_decoded_output_textures_bc7.resize(m_slice_descs.size());
- m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size());
+ if (m_params.m_hdr)
+ {
+ m_decoded_output_textures_bc6h_hdr_unpacked.resize(m_slice_descs.size());
+
+ m_decoded_output_textures_astc_hdr.resize(m_slice_descs.size());
+ m_decoded_output_textures_astc_hdr_unpacked.resize(m_slice_descs.size());
+ }
+ else
+ {
+ m_decoded_output_textures_unpacked.resize(m_slice_descs.size());
+
+ m_decoded_output_textures_bc7.resize(m_slice_descs.size());
+ m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size());
+ }
tm.start();
if (m_params.m_pGlobal_codebooks)
@@ -1360,12 +2349,16 @@ namespace basisu
for (uint32_t i = 0; i < m_slice_descs.size(); i++)
{
+ basisu::texture_format tex_format = m_params.m_hdr ? texture_format::cBC6HUnsigned : (m_params.m_uastc ? texture_format::cUASTC4x4 : texture_format::cETC1);
+ basist::block_format format = m_params.m_hdr ? basist::block_format::cBC6H : (m_params.m_uastc ? basist::block_format::cUASTC_4x4 : basist::block_format::cETC1);
+
gpu_image decoded_texture;
- decoded_texture.init(m_params.m_uastc ? texture_format::cUASTC4x4 : texture_format::cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height);
+ decoded_texture.init(
+ tex_format,
+ m_slice_descs[i].m_width, m_slice_descs[i].m_height);
tm.start();
-
- basist::block_format format = m_params.m_uastc ? basist::block_format::cUASTC_4x4 : basist::block_format::cETC1;
+
uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8;
if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,
@@ -1391,43 +2384,87 @@ namespace basisu
m_decoded_output_textures[i] = decoded_texture;
}
- double total_time_bc7 = 0;
+ double total_alt_transcode_time = 0;
+ tm.start();
- if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) &&
- basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S))
+ if (m_params.m_hdr)
{
+ assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_4x4_RGBA, basist::basis_tex_format::cUASTC_HDR_4x4));
+
for (uint32_t i = 0; i < m_slice_descs.size(); i++)
{
gpu_image decoded_texture;
- decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height);
+ decoded_texture.init(texture_format::cASTC_HDR_4x4, m_slice_descs[i].m_width, m_slice_descs[i].m_height);
tm.start();
if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,
- reinterpret_cast<etc_block*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16))
+ reinterpret_cast<basist::astc_blk*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_4x4, 16))
{
- error_printf("Transcoding failed to BC7 on slice %u!\n", i);
+ error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i);
return false;
}
-
- total_time_bc7 += tm.get_elapsed_secs();
-
- m_decoded_output_textures_bc7[i] = decoded_texture;
+
+ m_decoded_output_textures_astc_hdr[i] = decoded_texture;
+ }
+ }
+ else
+ {
+ if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) &&
+ basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S))
+ {
+ for (uint32_t i = 0; i < m_slice_descs.size(); i++)
+ {
+ gpu_image decoded_texture;
+ decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height);
+
+ if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,
+ reinterpret_cast<etc_block*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16))
+ {
+ error_printf("Transcoding failed to BC7 on slice %u!\n", i);
+ return false;
+ }
+
+ m_decoded_output_textures_bc7[i] = decoded_texture;
+ }
}
}
+ total_alt_transcode_time = tm.get_elapsed_secs();
+
for (uint32_t i = 0; i < m_slice_descs.size(); i++)
{
- m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]);
+ if (m_params.m_hdr)
+ {
+ // BC6H
+ bool status = m_decoded_output_textures[i].unpack_hdr(m_decoded_output_textures_bc6h_hdr_unpacked[i]);
+ assert(status);
+ BASISU_NOTE_UNUSED(status);
+
+ // ASTC HDR
+ status = m_decoded_output_textures_astc_hdr[i].unpack_hdr(m_decoded_output_textures_astc_hdr_unpacked[i]);
+ assert(status);
+ }
+ else
+ {
+ bool status = m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]);
+ assert(status);
+ BASISU_NOTE_UNUSED(status);
- if (m_decoded_output_textures_bc7[i].get_pixel_width())
- m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]);
+ if (m_decoded_output_textures_bc7[i].get_pixel_width())
+ {
+ status = m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]);
+ assert(status);
+ }
+ }
}
- debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n", m_params.m_uastc ? "ASTC" : "ETC1", total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc);
+ debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n",
+ m_params.m_hdr ? "BC6H" : (m_params.m_uastc ? "ASTC" : "ETC1"),
+ total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc);
- if (total_time_bc7 != 0)
- debug_printf("Transcoded to BC7 in %3.3fms, %f texels/sec\n", total_time_bc7 * 1000.0f, total_orig_pixels / total_time_bc7);
+ if (total_alt_transcode_time != 0)
+ debug_printf("Alternate transcode in %3.3fms, %f texels/sec\n", total_alt_transcode_time * 1000.0f, total_orig_pixels / total_alt_transcode_time);
for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
{
@@ -1438,17 +2475,82 @@ namespace basisu
assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks);
}
+
} // if (m_params.m_validate_output_data)
return true;
}
+ bool basis_compressor::write_hdr_debug_images(const char* pBasename, const imagef& orig_hdr_img, uint32_t width, uint32_t height)
+ {
+ // Copy image to account for 4x4 block expansion
+ imagef hdr_img(orig_hdr_img);
+ hdr_img.resize(width, height);
+
+ image srgb_img(width, height);
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ vec4F p(hdr_img(x, y));
+
+ p[0] = clamp(p[0], 0.0f, 1.0f);
+ p[1] = clamp(p[1], 0.0f, 1.0f);
+ p[2] = clamp(p[2], 0.0f, 1.0f);
+
+ int rc = (int)std::round(linear_to_srgb(p[0]) * 255.0f);
+ int gc = (int)std::round(linear_to_srgb(p[1]) * 255.0f);
+ int bc = (int)std::round(linear_to_srgb(p[2]) * 255.0f);
+
+ srgb_img.set_clipped(x, y, color_rgba(rc, gc, bc, 255));
+ }
+ }
+
+ {
+ const std::string filename(string_format("%s_linear_clamped_to_srgb.png", pBasename));
+ save_png(filename.c_str(), srgb_img);
+ printf("Wrote .PNG file %s\n", filename.c_str());
+ }
+
+ {
+ const std::string filename(string_format("%s_compressive_tonemapped.png", pBasename));
+ image compressive_tonemapped_img;
+
+ bool status = tonemap_image_compressive(compressive_tonemapped_img, hdr_img);
+ if (!status)
+ {
+ error_printf("basis_compressor::write_hdr_debug_images: tonemap_image_compressive() failed (invalid half-float input)\n");
+ }
+ else
+ {
+ save_png(filename.c_str(), compressive_tonemapped_img);
+ printf("Wrote .PNG file %s\n", filename.c_str());
+ }
+ }
+
+ image tonemapped_img;
+
+ for (int e = -5; e <= 5; e++)
+ {
+ const float scale = powf(2.0f, (float)e);
+
+ tonemap_image_reinhard(tonemapped_img, hdr_img, scale);
+
+ std::string filename(string_format("%s_reinhard_tonemapped_scale_%f.png", pBasename, scale));
+ save_png(filename.c_str(), tonemapped_img, cImageSaveIgnoreAlpha);
+ printf("Wrote .PNG file %s\n", filename.c_str());
+ }
+
+ return true;
+ }
+
bool basis_compressor::write_output_files_and_compute_stats()
{
debug_printf("basis_compressor::write_output_files_and_compute_stats\n");
const uint8_vec& comp_data = m_params.m_create_ktx2_file ? m_output_ktx2_file : m_basis_file.get_compressed_data();
- if (m_params.m_write_output_basis_files)
+ if (m_params.m_write_output_basis_or_ktx2_files)
{
const std::string& output_filename = m_params.m_out_filename;
@@ -1458,7 +2560,7 @@ namespace basisu
return false;
}
- if (m_params.m_status_output)
+ //if (m_params.m_status_output)
{
printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str());
}
@@ -1485,7 +2587,7 @@ namespace basisu
m_basis_bits_per_texel = comp_size * 8.0f / total_texels;
- debug_printf(".basis file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n",
+ debug_printf("Output file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n",
(uint32_t)comp_data.size(),
(uint32_t)comp_size,
m_basis_bits_per_texel);
@@ -1495,191 +2597,324 @@ namespace basisu
if (m_params.m_validate_output_data)
{
- for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
+ if (m_params.m_hdr)
{
- const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];
+ if (m_params.m_print_stats)
+ {
+ printf("ASTC/BC6H half float space error metrics (a piecewise linear approximation of log2 error):\n");
+ }
- if (m_params.m_compute_stats)
+ for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
{
- if (m_params.m_print_stats)
- printf("Slice: %u\n", slice_index);
+ const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];
- image_stats& s = m_stats[slice_index];
+ if (m_params.m_compute_stats)
+ {
+ image_stats& s = m_stats[slice_index];
- // TODO: We used to output SSIM (during heavy encoder development), but this slowed down compression too much. We'll be adding it back.
+ if (m_params.m_print_stats)
+ {
+ printf("Slice: %u\n", slice_index);
+ }
- image_metrics em;
+ image_metrics im;
- // ---- .basis stats
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3);
- if (m_params.m_print_stats)
- em.print(".basis RGB Avg: ");
- s.m_basis_rgb_avg_psnr = em.m_psnr;
+ if (m_params.m_print_stats)
+ {
+ printf("\nASTC channels:\n");
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], i, 1, true);
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4);
- if (m_params.m_print_stats)
- em.print(".basis RGBA Avg: ");
- s.m_basis_rgba_avg_psnr = em.m_psnr;
+ printf("%c: ", "RGB"[i]);
+ im.print_hp();
+ }
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1);
- if (m_params.m_print_stats)
- em.print(".basis R Avg: ");
+ printf("BC6H channels:\n");
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], i, 1, true);
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1);
- if (m_params.m_print_stats)
- em.print(".basis G Avg: ");
+ printf("%c: ", "RGB"[i]);
+ im.print_hp();
+ }
+ }
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1);
- if (m_params.m_print_stats)
- em.print(".basis B Avg: ");
+ im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true);
+ s.m_basis_rgb_avg_psnr = (float)im.m_psnr;
- if (m_params.m_uastc)
- {
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1);
if (m_params.m_print_stats)
- em.print(".basis A Avg: ");
+ {
+ printf("\nASTC RGB: ");
+ im.print_hp();
+#if 0
+ // Validation
+ im.calc_half2(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true);
+ printf("\nASTC RGB (Alt): ");
+ im.print_hp();
+#endif
+ }
- s.m_basis_a_avg_psnr = em.m_psnr;
+ im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], 0, 3, true);
+ s.m_basis_rgb_avg_bc6h_psnr = (float)im.m_psnr;
+
+ if (m_params.m_print_stats)
+ {
+ printf("BC6H RGB: ");
+ im.print_hp();
+ printf("\n");
+ }
}
+
+ if (m_params.m_debug_images)
+ {
+ std::string out_basename;
+ if (m_params.m_out_filename.size())
+ string_get_filename(m_params.m_out_filename.c_str(), out_basename);
+ else if (m_params.m_source_filenames.size())
+ string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename);
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0);
- if (m_params.m_print_stats)
- em.print(".basis 709 Luma: ");
- s.m_basis_luma_709_psnr = static_cast<float>(em.m_psnr);
- s.m_basis_luma_709_ssim = static_cast<float>(em.m_ssim);
+ string_remove_extension(out_basename);
+ out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index);
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true);
- if (m_params.m_print_stats)
- em.print(".basis 601 Luma: ");
- s.m_basis_luma_601_psnr = static_cast<float>(em.m_psnr);
+ // Write BC6H .DDS file.
+ {
+ gpu_image bc6h_tex(m_decoded_output_textures[slice_index]);
+ bc6h_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+
+ std::string filename(out_basename + "_bc6h.dds");
+ write_compressed_texture_file(filename.c_str(), bc6h_tex, true);
+ printf("Wrote .DDS file %s\n", filename.c_str());
+ }
- if (m_slice_descs.size() == 1)
- {
- const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size();
- if (m_params.m_print_stats)
+ // Write ASTC .KTX/.astc files. ("astcenc -dh input.astc output.exr" to decode the astc file.)
+ {
+ gpu_image astc_tex(m_decoded_output_textures_astc_hdr[slice_index]);
+ astc_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+
+ std::string filename1(out_basename + "_astc.astc");
+ write_astc_file(filename1.c_str(), astc_tex.get_ptr(), 4, 4, slice_desc.m_orig_width, slice_desc.m_orig_height);
+ printf("Wrote .ASTC file %s\n", filename1.c_str());
+
+ std::string filename2(out_basename + "_astc.ktx");
+ write_compressed_texture_file(filename2.c_str(), astc_tex, true);
+ printf("Wrote .KTX file %s\n", filename2.c_str());
+ }
+
+ // Write unpacked ASTC image to .EXR
{
- debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
- debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
+ imagef astc_img(m_decoded_output_textures_astc_hdr_unpacked[slice_index]);
+ astc_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height);
+
+ std::string filename(out_basename + "_unpacked_astc.exr");
+ write_exr(filename.c_str(), astc_img, 3, 0);
+ printf("Wrote .EXR file %s\n", filename.c_str());
}
+
+ // Write unpacked BC6H image to .EXR
+ {
+ imagef bc6h_img(m_decoded_output_textures_bc6h_hdr_unpacked[slice_index]);
+ bc6h_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height);
+
+ std::string filename(out_basename + "_unpacked_bc6h.exr");
+ write_exr(filename.c_str(), bc6h_img, 3, 0);
+ printf("Wrote .EXR file %s\n", filename.c_str());
+ }
+
+ // Write tonemapped/srgb images
+ write_hdr_debug_images((out_basename + "_source").c_str(), m_slice_images_hdr[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_hdr_debug_images((out_basename + "_unpacked_astc").c_str(), m_decoded_output_textures_astc_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_hdr_debug_images((out_basename + "_unpacked_bc6h").c_str(), m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height);
}
+ }
+ }
+ else
+ {
+ for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
+ {
+ const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];
- if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width())
+ if (m_params.m_compute_stats)
{
- // ---- BC7 stats
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3);
if (m_params.m_print_stats)
- em.print("BC7 RGB Avg: ");
- s.m_bc7_rgb_avg_psnr = em.m_psnr;
+ printf("Slice: %u\n", slice_index);
+
+ image_stats& s = m_stats[slice_index];
+
+ image_metrics em;
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4);
+ // ---- .basis stats
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3);
if (m_params.m_print_stats)
- em.print("BC7 RGBA Avg: ");
- s.m_bc7_rgba_avg_psnr = em.m_psnr;
+ em.print(".basis RGB Avg: ");
+ s.m_basis_rgb_avg_psnr = (float)em.m_psnr;
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1);
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4);
if (m_params.m_print_stats)
- em.print("BC7 R Avg: ");
+ em.print(".basis RGBA Avg: ");
+ s.m_basis_rgba_avg_psnr = (float)em.m_psnr;
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1);
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1);
if (m_params.m_print_stats)
- em.print("BC7 G Avg: ");
+ em.print(".basis R Avg: ");
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1);
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1);
if (m_params.m_print_stats)
- em.print("BC7 B Avg: ");
+ em.print(".basis G Avg: ");
+
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1);
+ if (m_params.m_print_stats)
+ em.print(".basis B Avg: ");
if (m_params.m_uastc)
{
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1);
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1);
if (m_params.m_print_stats)
- em.print("BC7 A Avg: ");
+ em.print(".basis A Avg: ");
- s.m_bc7_a_avg_psnr = em.m_psnr;
+ s.m_basis_a_avg_psnr = (float)em.m_psnr;
}
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0);
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0);
if (m_params.m_print_stats)
- em.print("BC7 709 Luma: ");
- s.m_bc7_luma_709_psnr = static_cast<float>(em.m_psnr);
- s.m_bc7_luma_709_ssim = static_cast<float>(em.m_ssim);
+ em.print(".basis 709 Luma: ");
+ s.m_basis_luma_709_psnr = static_cast<float>(em.m_psnr);
+ s.m_basis_luma_709_ssim = static_cast<float>(em.m_ssim);
- em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true);
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true);
if (m_params.m_print_stats)
- em.print("BC7 601 Luma: ");
- s.m_bc7_luma_601_psnr = static_cast<float>(em.m_psnr);
- }
+ em.print(".basis 601 Luma: ");
+ s.m_basis_luma_601_psnr = static_cast<float>(em.m_psnr);
- if (!m_params.m_uastc)
- {
- // ---- Nearly best possible ETC1S stats
- em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3);
- if (m_params.m_print_stats)
- em.print("Unquantized ETC1S RGB Avg: ");
- s.m_best_etc1s_rgb_avg_psnr = static_cast<float>(em.m_psnr);
+ if (m_slice_descs.size() == 1)
+ {
+ const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size();
+ if (m_params.m_print_stats)
+ {
+ debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
+ debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
+ }
+ }
- em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0);
- if (m_params.m_print_stats)
- em.print("Unquantized ETC1S 709 Luma: ");
- s.m_best_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr);
- s.m_best_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim);
+ if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width())
+ {
+ // ---- BC7 stats
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 RGB Avg: ");
+ s.m_bc7_rgb_avg_psnr = (float)em.m_psnr;
+
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 RGBA Avg: ");
+ s.m_bc7_rgba_avg_psnr = (float)em.m_psnr;
+
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 R Avg: ");
+
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 G Avg: ");
+
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 B Avg: ");
+
+ if (m_params.m_uastc)
+ {
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 A Avg: ");
- em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true);
- if (m_params.m_print_stats)
- em.print("Unquantized ETC1S 601 Luma: ");
- s.m_best_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr);
+ s.m_bc7_a_avg_psnr = (float)em.m_psnr;
+ }
+
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 709 Luma: ");
+ s.m_bc7_luma_709_psnr = static_cast<float>(em.m_psnr);
+ s.m_bc7_luma_709_ssim = static_cast<float>(em.m_ssim);
+
+ em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true);
+ //if (m_params.m_print_stats)
+ // em.print("BC7 601 Luma: ");
+ s.m_bc7_luma_601_psnr = static_cast<float>(em.m_psnr);
+ }
+
+ if (!m_params.m_uastc)
+ {
+ // ---- Nearly best possible ETC1S stats
+ em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3);
+ //if (m_params.m_print_stats)
+ // em.print("Unquantized ETC1S RGB Avg: ");
+ s.m_best_etc1s_rgb_avg_psnr = static_cast<float>(em.m_psnr);
+
+ em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0);
+ //if (m_params.m_print_stats)
+ // em.print("Unquantized ETC1S 709 Luma: ");
+ s.m_best_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr);
+ s.m_best_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim);
+
+ em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true);
+ //if (m_params.m_print_stats)
+ // em.print("Unquantized ETC1S 601 Luma: ");
+ s.m_best_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr);
+ }
}
- }
- std::string out_basename;
- if (m_params.m_out_filename.size())
- string_get_filename(m_params.m_out_filename.c_str(), out_basename);
- else if (m_params.m_source_filenames.size())
- string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename);
+ std::string out_basename;
+ if (m_params.m_out_filename.size())
+ string_get_filename(m_params.m_out_filename.c_str(), out_basename);
+ else if (m_params.m_source_filenames.size())
+ string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename);
- string_remove_extension(out_basename);
- out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index);
+ string_remove_extension(out_basename);
+ out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index);
- if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images))
- {
- // Write "best" ETC1S debug images
- if (!m_params.m_uastc)
+ if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images))
{
- gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]);
- best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
- write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image);
+ // Write "best" ETC1S debug images
+ if (!m_params.m_uastc)
+ {
+ gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]);
+ best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image, true);
- image best_etc1s_unpacked;
- best_etc1s_gpu_image.unpack(best_etc1s_unpacked);
- save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked);
+ image best_etc1s_unpacked;
+ best_etc1s_gpu_image.unpack(best_etc1s_unpacked);
+ save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked);
+ }
}
- }
- if (m_params.m_debug_images)
- {
- // Write decoded ETC1S/ASTC debug images
+ if (m_params.m_debug_images)
{
- gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]);
- decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
- write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc);
+ // Write decoded ETC1S/ASTC debug images
+ {
+ gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]);
+ decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc, true);
- image temp(m_decoded_output_textures_unpacked[slice_index]);
- temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);
- save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp);
- }
+ image temp(m_decoded_output_textures_unpacked[slice_index]);
+ temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp);
+ }
- // Write decoded BC7 debug images
- if (m_decoded_output_textures_bc7[slice_index].get_pixel_width())
- {
- gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]);
- decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
- write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7);
+ // Write decoded BC7 debug images
+ if (m_decoded_output_textures_bc7[slice_index].get_pixel_width())
+ {
+ gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]);
+ decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7, true);
- image temp(m_decoded_output_textures_unpacked_bc7[slice_index]);
- temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);
- save_png(out_basename + "_transcoded_bc7.png", temp);
+ image temp(m_decoded_output_textures_unpacked_bc7[slice_index]);
+ temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ save_png(out_basename + "_transcoded_bc7.png", temp);
+ }
}
}
- }
+ } // if (m_params.m_hdr)
+
} // if (m_params.m_validate_output_data)
return true;
@@ -1727,10 +2962,27 @@ namespace basisu
}
static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };
- static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };
+ static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };
+
static uint8_t g_ktx2_uastc_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };
- static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };
-
+ static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };
+
+ // HDR TODO - what is the best Khronos DFD to use for UASTC HDR?
+ static uint8_t g_ktx2_uastc_hdr_nonalpha_dfd[44] =
+ {
+ 0x2C,0x0,0x0,0x0, // 0 totalSize
+ 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId
+ 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber
+ 0xA7,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel
+ 0x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3
+ 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3
+ 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7
+ 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.)
+ 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3
+ 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0)
+ 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0)
+ };
+
void basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header)
{
const uint8_t* pDFD;
@@ -1738,7 +2990,12 @@ namespace basisu
if (m_params.m_uastc)
{
- if (m_any_source_image_has_alpha)
+ if (m_params.m_hdr)
+ {
+ pDFD = g_ktx2_uastc_hdr_nonalpha_dfd;
+ dfd_len = sizeof(g_ktx2_uastc_hdr_nonalpha_dfd);
+ }
+ else if (m_any_source_image_has_alpha)
{
pDFD = g_ktx2_uastc_alpha_dfd;
dfd_len = sizeof(g_ktx2_uastc_alpha_dfd);
@@ -1772,10 +3029,18 @@ namespace basisu
dfd_bits &= ~(0xFF << 16);
- if (m_params.m_ktx2_srgb_transfer_func)
- dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16);
- else
+ if (m_params.m_hdr)
+ {
+ // TODO: In HDR mode, always write linear for now.
dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16);
+ }
+ else
+ {
+ if (m_params.m_ktx2_srgb_transfer_func)
+ dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16);
+ else
+ dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16);
+ }
basisu::write_le_dword(dfd.data() + 3 * sizeof(uint32_t), dfd_bits);
@@ -1850,7 +3115,12 @@ namespace basisu
header.m_pixel_width = base_width;
header.m_pixel_height = base_height;
header.m_face_count = total_faces;
- header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED;
+
+ if (m_params.m_hdr)
+ header.m_vk_format = basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK;
+ else
+ header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED;
+
header.m_type_size = 1;
header.m_level_count = total_levels;
header.m_layer_count = (total_layers > 1) ? total_layers : 0;
@@ -2061,7 +3331,8 @@ namespace basisu
if (bytes_needed_to_pad < 6)
bytes_needed_to_pad += 16;
- printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad);
+ // Just add the padding. It's likely not necessary anymore, but can't really hurt.
+ //printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad);
// We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned.
// We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize().
@@ -2258,18 +3529,34 @@ namespace basisu
return result;
}
- void* basis_compress(
- const basisu::vector<image>& source_images,
+ static void* basis_compress(
+ const basisu::vector<image> *pSource_images,
+ const basisu::vector<imagef> *pSource_images_hdr,
uint32_t flags_and_quality, float uastc_rdo_quality,
size_t* pSize,
image_stats* pStats)
{
+ assert((pSource_images != nullptr) || (pSource_images_hdr != nullptr));
+ assert(!((pSource_images != nullptr) && (pSource_images_hdr != nullptr)));
+
// Check input parameters
- if ((!source_images.size()) || (!pSize))
+ if (pSource_images)
{
- error_printf("basis_compress: Invalid parameter\n");
- assert(0);
- return nullptr;
+ if ((!pSource_images->size()) || (!pSize))
+ {
+ error_printf("basis_compress: Invalid parameter\n");
+ assert(0);
+ return nullptr;
+ }
+ }
+ else
+ {
+ if ((!pSource_images_hdr->size()) || (!pSize))
+ {
+ error_printf("basis_compress: Invalid parameter\n");
+ assert(0);
+ return nullptr;
+ }
}
*pSize = 0;
@@ -2287,40 +3574,70 @@ namespace basisu
comp_params.m_y_flip = (flags_and_quality & cFlagYFlip) != 0;
comp_params.m_debug = (flags_and_quality & cFlagDebug) != 0;
-
+ comp_params.m_debug_images = (flags_and_quality & cFlagDebugImages) != 0;
+
// Copy the largest mipmap level
- comp_params.m_source_images.resize(1);
- comp_params.m_source_images[0] = source_images[0];
+ if (pSource_images)
+ {
+ comp_params.m_source_images.resize(1);
+ comp_params.m_source_images[0] = (*pSource_images)[0];
+
+ // Copy the smaller mipmap levels, if any
+ if (pSource_images->size() > 1)
+ {
+ comp_params.m_source_mipmap_images.resize(1);
+ comp_params.m_source_mipmap_images[0].resize(pSource_images->size() - 1);
- // Copy the smaller mipmap levels, if any
- if (source_images.size() > 1)
+ for (uint32_t i = 1; i < pSource_images->size(); i++)
+ comp_params.m_source_mipmap_images[0][i - 1] = (*pSource_images)[i];
+ }
+ }
+ else
{
- comp_params.m_source_mipmap_images.resize(1);
- comp_params.m_source_mipmap_images[0].resize(source_images.size() - 1);
+ comp_params.m_source_images_hdr.resize(1);
+ comp_params.m_source_images_hdr[0] = (*pSource_images_hdr)[0];
- for (uint32_t i = 1; i < source_images.size(); i++)
- comp_params.m_source_mipmap_images[0][i - 1] = source_images[i];
+ // Copy the smaller mipmap levels, if any
+ if (pSource_images_hdr->size() > 1)
+ {
+ comp_params.m_source_mipmap_images_hdr.resize(1);
+ comp_params.m_source_mipmap_images_hdr[0].resize(pSource_images_hdr->size() - 1);
+
+ for (uint32_t i = 1; i < pSource_images->size(); i++)
+ comp_params.m_source_mipmap_images_hdr[0][i - 1] = (*pSource_images_hdr)[i];
+ }
}
comp_params.m_multithreading = (flags_and_quality & cFlagThreaded) != 0;
comp_params.m_use_opencl = (flags_and_quality & cFlagUseOpenCL) != 0;
- comp_params.m_write_output_basis_files = false;
+ comp_params.m_write_output_basis_or_ktx2_files = false;
comp_params.m_perceptual = (flags_and_quality & cFlagSRGB) != 0;
comp_params.m_mip_srgb = comp_params.m_perceptual;
comp_params.m_mip_gen = (flags_and_quality & (cFlagGenMipsWrap | cFlagGenMipsClamp)) != 0;
comp_params.m_mip_wrapping = (flags_and_quality & cFlagGenMipsWrap) != 0;
- comp_params.m_uastc = (flags_and_quality & cFlagUASTC) != 0;
- if (comp_params.m_uastc)
+ if ((pSource_images_hdr) || (flags_and_quality & cFlagHDR))
{
- comp_params.m_pack_uastc_flags = flags_and_quality & cPackUASTCLevelMask;
- comp_params.m_rdo_uastc = (flags_and_quality & cFlagUASTCRDO) != 0;
- comp_params.m_rdo_uastc_quality_scalar = uastc_rdo_quality;
+ // In UASTC HDR mode, the compressor will jam this to true anyway.
+ // And there's no need to set UASTC LDR or ETC1S options.
+ comp_params.m_uastc = true;
}
else
- comp_params.m_quality_level = basisu::maximum<uint32_t>(1, flags_and_quality & 255);
+ {
+ comp_params.m_uastc = (flags_and_quality & cFlagUASTC) != 0;
+ if (comp_params.m_uastc)
+ {
+ comp_params.m_pack_uastc_flags = flags_and_quality & cPackUASTCLevelMask;
+ comp_params.m_rdo_uastc = (flags_and_quality & cFlagUASTCRDO) != 0;
+ comp_params.m_rdo_uastc_quality_scalar = uastc_rdo_quality;
+ }
+ else
+ {
+ comp_params.m_quality_level = basisu::maximum<uint32_t>(1, flags_and_quality & 255);
+ }
+ }
comp_params.m_create_ktx2_file = (flags_and_quality & cFlagKTX2) != 0;
@@ -2337,6 +3654,15 @@ namespace basisu
comp_params.m_print_stats = (flags_and_quality & cFlagPrintStats) != 0;
comp_params.m_status_output = (flags_and_quality & cFlagPrintStatus) != 0;
+ if ((flags_and_quality & cFlagHDR) || (pSource_images_hdr))
+ {
+ comp_params.m_hdr = true;
+ comp_params.m_uastc_hdr_options.set_quality_level(flags_and_quality & cPackUASTCLevelMask);
+ }
+
+ if (flags_and_quality & cFlagHDRLDRImageSRGBToLinearConversion)
+ comp_params.m_hdr_ldr_srgb_to_linear_conversion = true;
+
// Create the compressor, initialize it, and process the input
basis_compressor comp;
if (!comp.init(comp_params))
@@ -2381,6 +3707,24 @@ namespace basisu
}
void* basis_compress(
+ const basisu::vector<image>& source_images,
+ uint32_t flags_and_quality, float uastc_rdo_quality,
+ size_t* pSize,
+ image_stats* pStats)
+ {
+ return basis_compress(&source_images, nullptr, flags_and_quality, uastc_rdo_quality, pSize, pStats);
+ }
+
+ void* basis_compress(
+ const basisu::vector<imagef>& source_images_hdr,
+ uint32_t flags_and_quality,
+ size_t* pSize,
+ image_stats* pStats)
+ {
+ return basis_compress(nullptr, &source_images_hdr, flags_and_quality, 0.0f, pSize, pStats);
+ }
+
+ void* basis_compress(
const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels,
uint32_t flags_and_quality, float uastc_rdo_quality,
size_t* pSize,
diff --git a/thirdparty/basis_universal/encoder/basisu_comp.h b/thirdparty/basis_universal/encoder/basisu_comp.h
index b6c9fef9e2..1cc75fc8a3 100644
--- a/thirdparty/basis_universal/encoder/basisu_comp.h
+++ b/thirdparty/basis_universal/encoder/basisu_comp.h
@@ -1,5 +1,5 @@
// basisu_comp.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,9 +18,10 @@
#include "basisu_basis_file.h"
#include "../transcoder/basisu_transcoder.h"
#include "basisu_uastc_enc.h"
+#include "basisu_astc_hdr_enc.h"
-#define BASISU_LIB_VERSION 116
-#define BASISU_LIB_VERSION_STRING "1.16"
+#define BASISU_LIB_VERSION 150
+#define BASISU_LIB_VERSION_STRING "1.50"
#ifndef BASISD_SUPPORT_KTX2
#error BASISD_SUPPORT_KTX2 is undefined
@@ -81,6 +82,8 @@ namespace basisu
m_basis_luma_601_psnr = 0.0f;
m_basis_luma_709_ssim = 0.0f;
+ m_basis_rgb_avg_bc6h_psnr = 0.0f;
+
m_bc7_rgb_avg_psnr = 0.0f;
m_bc7_rgba_avg_psnr = 0.0f;
m_bc7_a_avg_psnr = 0.0f;
@@ -100,7 +103,7 @@ namespace basisu
uint32_t m_width;
uint32_t m_height;
- // .basis compressed (ETC1S or UASTC statistics)
+ // .basis/.ktx2 compressed (LDR: ETC1S or UASTC statistics, HDR: transcoded BC6H statistics)
float m_basis_rgb_avg_psnr;
float m_basis_rgba_avg_psnr;
float m_basis_a_avg_psnr;
@@ -108,7 +111,10 @@ namespace basisu
float m_basis_luma_601_psnr;
float m_basis_luma_709_ssim;
- // BC7 statistics
+ // UASTC HDR only.
+ float m_basis_rgb_avg_bc6h_psnr;
+
+ // LDR: BC7 statistics
float m_bc7_rgb_avg_psnr;
float m_bc7_rgba_avg_psnr;
float m_bc7_a_avg_psnr;
@@ -116,7 +122,7 @@ namespace basisu
float m_bc7_luma_601_psnr;
float m_bc7_luma_709_ssim;
- // Highest achievable quality ETC1S statistics
+ // LDR: Highest achievable quality ETC1S statistics
float m_best_etc1s_rgb_avg_psnr;
float m_best_etc1s_luma_709_psnr;
float m_best_etc1s_luma_601_psnr;
@@ -256,7 +262,7 @@ namespace basisu
m_no_selector_rdo.clear();
m_selector_rdo_thresh.clear();
m_read_source_images.clear();
- m_write_output_basis_files.clear();
+ m_write_output_basis_or_ktx2_files.clear();
m_compression_level.clear();
m_compute_stats.clear();
m_print_stats.clear();
@@ -317,27 +323,38 @@ namespace basisu
m_validate_output_data.clear();
+ m_hdr_ldr_srgb_to_linear_conversion.clear();
+
+ m_hdr_favor_astc.clear();
+
m_pJob_pool = nullptr;
}
- // True to generate UASTC .basis file data, otherwise ETC1S.
+ // True to generate UASTC .basis/.KTX2 file data, otherwise ETC1S.
bool_param<false> m_uastc;
+ // Set m_hdr to true to switch to UASTC HDR mode.
+ bool_param<false> m_hdr;
+
bool_param<false> m_use_opencl;
- // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read.
- // Otherwise, the compressor processes the images in m_source_images.
+ // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read.
+ // Otherwise, the compressor processes the images in m_source_images or m_source_images_hdr.
basisu::vector<std::string> m_source_filenames;
basisu::vector<std::string> m_source_alpha_filenames;
basisu::vector<image> m_source_images;
+ basisu::vector<imagef> m_source_images_hdr;
+
// Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual.
// If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error.
// The compressor applies the user-provided swizzling (in m_swizzle) to these images.
basisu::vector< basisu::vector<image> > m_source_mipmap_images;
+
+ basisu::vector< basisu::vector<imagef> > m_source_mipmap_images_hdr;
- // Filename of the output basis file
+ // Filename of the output basis/ktx2 file
std::string m_out_filename;
// The params are done this way so we can detect when the user has explictly changed them.
@@ -373,8 +390,8 @@ namespace basisu
// Read source images from m_source_filenames/m_source_alpha_filenames
bool_param<false> m_read_source_images;
- // Write the output basis file to disk using m_out_filename
- bool_param<false> m_write_output_basis_files;
+ // Write the output basis/ktx2 file to disk using m_out_filename
+ bool_param<false> m_write_output_basis_or_ktx2_files;
// Compute and display image metrics
bool_param<false> m_compute_stats;
@@ -382,15 +399,15 @@ namespace basisu
// Print stats to stdout, if m_compute_stats is true.
bool_param<true> m_print_stats;
- // Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels
+ // Check to see if any input image has an alpha channel, if so then the output basis/ktx2 file will have alpha channels
bool_param<true> m_check_for_alpha;
- // Always put alpha slices in the output basis file, even when the input doesn't have alpha
+ // Always put alpha slices in the output basis/ktx2 file, even when the input doesn't have alpha
bool_param<false> m_force_alpha;
bool_param<true> m_multithreading;
- // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels
- char m_swizzle[4];
+ // Split the R channel to RGB and the G channel to alpha, then write a basis/ktx2 file with alpha channels
+ uint8_t m_swizzle[4];
bool_param<false> m_renormalize;
@@ -448,8 +465,17 @@ namespace basisu
param<int> m_ktx2_zstd_supercompression_level;
bool_param<false> m_ktx2_srgb_transfer_func;
+ astc_hdr_codec_options m_uastc_hdr_options;
+
bool_param<false> m_validate_output_data;
+ // If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion) and then processed as HDR.
+ // Otherwise, LDR images will be processed as HDR as-is.
+ bool_param<true> m_hdr_ldr_srgb_to_linear_conversion;
+
+ // If true, ASTC HDR quality is favored more than BC6H quality. Otherwise it's a rough balance.
+ bool_param<false> m_hdr_favor_astc;
+
job_pool *m_pJob_pool;
};
@@ -504,6 +530,7 @@ namespace basisu
opencl_context_ptr m_pOpenCL_context;
basisu::vector<image> m_slice_images;
+ basisu::vector<imagef> m_slice_images_hdr;
basisu::vector<image_stats> m_stats;
@@ -515,7 +542,9 @@ namespace basisu
uint32_t m_total_blocks;
basisu_frontend m_frontend;
+
pixel_block_vec m_source_blocks;
+ pixel_block_hdr_vec m_source_blocks_hdr;
basisu::vector<gpu_image> m_frontend_output_textures;
@@ -526,11 +555,17 @@ namespace basisu
basisu_file m_basis_file;
- basisu::vector<gpu_image> m_decoded_output_textures;
+ basisu::vector<gpu_image> m_decoded_output_textures; // BC6H in HDR mode
basisu::vector<image> m_decoded_output_textures_unpacked;
+
basisu::vector<gpu_image> m_decoded_output_textures_bc7;
basisu::vector<image> m_decoded_output_textures_unpacked_bc7;
+ basisu::vector<imagef> m_decoded_output_textures_bc6h_hdr_unpacked; // BC6H in HDR mode
+
+ basisu::vector<gpu_image> m_decoded_output_textures_astc_hdr;
+ basisu::vector<imagef> m_decoded_output_textures_astc_hdr_unpacked;
+
uint8_vec m_output_basis_file;
uint8_vec m_output_ktx2_file;
@@ -541,14 +576,21 @@ namespace basisu
bool m_opencl_failed;
+ void check_for_hdr_inputs();
+ bool sanity_check_input_params();
+ void clean_hdr_image(imagef& src_img);
+ bool read_dds_source_images();
bool read_source_images();
bool extract_source_blocks();
bool process_frontend();
bool extract_frontend_texture_data();
bool process_backend();
bool create_basis_file_and_transcode();
+ bool write_hdr_debug_images(const char* pBasename, const imagef& img, uint32_t width, uint32_t height);
bool write_output_files_and_compute_stats();
+ error_code encode_slices_to_uastc_hdr();
error_code encode_slices_to_uastc();
+ bool generate_mipmaps(const imagef& img, basisu::vector<imagef>& mips, bool has_alpha);
bool generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha);
bool validate_texture_type_constraints();
bool validate_ktx2_constraints();
@@ -568,7 +610,8 @@ namespace basisu
//
// flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC level, i.e. "cFlagSRGB | cFlagGenMipsClamp | cFlagThreaded | 128" or "cFlagSRGB | cFlagGenMipsClamp | cFlagUASTC | cFlagThreaded | cPackUASTCLevelDefault".
// In ETC1S mode, the lower 8-bits are the ETC1S quality level which ranges from [1,255] (higher=better quality/larger files)
- // In UASTC mode, the lower 8-bits are the UASTC pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly.
+ // In UASTC mode, the lower 8-bits are the UASTC LDR/HDR pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. Valid values are [0,4] for both LDR/HDR.
+ // In UASTC mode, be sure to set this, otherwise it defaults to 0 (fastest/lowest quality).
//
// uastc_rdo_quality: Float UASTC RDO quality level (0=no change, higher values lower quality but increase compressibility, initially try .5-1.5)
//
@@ -594,20 +637,36 @@ namespace basisu
cFlagUASTCRDO = 1 << 18, // use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar)
cFlagPrintStats = 1 << 19, // print image stats to stdout
- cFlagPrintStatus = 1 << 20 // print status to stdout
+ cFlagPrintStatus = 1 << 20, // print status to stdout
+
+ cFlagHDR = 1 << 21, // Force encoder into HDR mode, even if source image is LDR.
+ cFlagHDRLDRImageSRGBToLinearConversion = 1 << 22, // In HDR mode, convert LDR source images to linear before encoding.
+
+ cFlagDebugImages = 1 << 23 // enable status output
};
// This function accepts an array of source images.
// If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled.
- // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. The returned block must be freed using basis_free_data().
+ // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data.
+ // Important: The returned block MUST be manually freed using basis_free_data().
// basisu_encoder_init() MUST be called first!
+ // LDR version. To compress the LDR source image as HDR: Use the cFlagHDR flag.
void* basis_compress(
const basisu::vector<image> &source_images,
uint32_t flags_and_quality, float uastc_rdo_quality,
size_t* pSize,
image_stats* pStats = nullptr);
- // This function only accepts a single source image.
+ // HDR-only version.
+ // Important: The returned block MUST be manually freed using basis_free_data().
+ void* basis_compress(
+ const basisu::vector<imagef>& source_images_hdr,
+ uint32_t flags_and_quality,
+ size_t* pSize,
+ image_stats* pStats = nullptr);
+
+ // This function only accepts a single LDR source image. It's just a wrapper for basis_compress() above.
+ // Important: The returned block MUST be manually freed using basis_free_data().
void* basis_compress(
const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels,
uint32_t flags_and_quality, float uastc_rdo_quality,
@@ -615,6 +674,7 @@ namespace basisu
image_stats* pStats = nullptr);
// Frees the dynamically allocated file data returned by basis_compress().
+ // This MUST be called on the pointer returned by basis_compress() when you're done with it.
void basis_free_data(void* p);
// Runs a short benchmark using synthetic image data to time OpenCL encoding vs. CPU encoding, with multithreading enabled.
diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
index e87dd636a2..fff98e8301 100644
--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
@@ -1,5 +1,5 @@
// basisu_enc.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -21,10 +21,20 @@
#include "jpgd.h"
#include "pvpngreader.h"
#include "basisu_opencl.h"
+#include "basisu_astc_hdr_enc.h"
#include <vector>
+#ifndef TINYEXR_USE_ZFP
+#define TINYEXR_USE_ZFP (1)
+#endif
+#include <tinyexr.h>
+
+#ifndef MINIZ_HEADER_FILE_ONLY
#define MINIZ_HEADER_FILE_ONLY
+#endif
+#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+#endif
#include "basisu_miniz.h"
#if defined(_WIN32)
@@ -165,14 +175,14 @@ namespace basisu
bool g_library_initialized;
std::mutex g_encoder_init_mutex;
-
+
// Encoder library initialization (just call once at startup)
- void basisu_encoder_init(bool use_opencl, bool opencl_force_serialization)
+ bool basisu_encoder_init(bool use_opencl, bool opencl_force_serialization)
{
std::lock_guard<std::mutex> lock(g_encoder_init_mutex);
if (g_library_initialized)
- return;
+ return true;
detect_sse41();
@@ -189,7 +199,11 @@ namespace basisu
interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports
+ astc_hdr_enc_init();
+ basist::bc6h_enc_init();
+
g_library_initialized = true;
+ return true;
}
void basisu_encoder_deinit()
@@ -316,6 +330,24 @@ namespace basisu
init();
return ticks * g_timer_freq;
}
+
+ float linear_to_srgb(float l)
+ {
+ assert(l >= 0.0f && l <= 1.0f);
+ if (l < .0031308f)
+ return saturate(l * 12.92f);
+ else
+ return saturate(1.055f * powf(l, 1.0f / 2.4f) - .055f);
+ }
+
+ float srgb_to_linear(float s)
+ {
+ assert(s >= 0.0f && s <= 1.0f);
+ if (s < .04045f)
+ return saturate(s * (1.0f / 12.92f));
+ else
+ return saturate(powf((s + .055f) * (1.0f / 1.055f), 2.4f));
+ }
const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000;
@@ -336,7 +368,7 @@ namespace basisu
if (sizeof(void *) == sizeof(uint32_t))
{
- if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE)
+ if (((uint64_t)w * h * n_chans) > MAX_32BIT_ALLOC_SIZE)
{
error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h);
@@ -371,6 +403,11 @@ namespace basisu
return true;
}
+ bool load_qoi(const char* pFilename, image& img)
+ {
+ return false;
+ }
+
bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename)
{
interval_timer tm;
@@ -433,11 +470,178 @@ namespace basisu
return load_png(pFilename, img);
if (strcasecmp(pExt, "tga") == 0)
return load_tga(pFilename, img);
+ if (strcasecmp(pExt, "qoi") == 0)
+ return load_qoi(pFilename, img);
if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) )
return load_jpg(pFilename, img);
return false;
}
+
+ static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear)
+ {
+ img.resize(ldr_img.get_width(), ldr_img.get_height());
+
+ for (uint32_t y = 0; y < ldr_img.get_height(); y++)
+ {
+ for (uint32_t x = 0; x < ldr_img.get_width(); x++)
+ {
+ const color_rgba& c = ldr_img(x, y);
+
+ vec4F& d = img(x, y);
+ if (ldr_srgb_to_linear)
+ {
+ // TODO: Multiply by 100-200 nits?
+ d[0] = srgb_to_linear(c[0] * (1.0f / 255.0f));
+ d[1] = srgb_to_linear(c[1] * (1.0f / 255.0f));
+ d[2] = srgb_to_linear(c[2] * (1.0f / 255.0f));
+ }
+ else
+ {
+ d[0] = c[0] * (1.0f / 255.0f);
+ d[1] = c[1] * (1.0f / 255.0f);
+ d[2] = c[2] * (1.0f / 255.0f);
+ }
+ d[3] = c[3] * (1.0f / 255.0f);
+ }
+ }
+ }
+
+ bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear)
+ {
+ if ((!pMem) || (!mem_size))
+ {
+ assert(0);
+ return false;
+ }
+
+ switch (img_type)
+ {
+ case hdr_image_type::cHITRGBAHalfFloat:
+ {
+ if (mem_size != width * height * sizeof(basist::half_float) * 4)
+ {
+ assert(0);
+ return false;
+ }
+
+ if ((!width) || (!height))
+ {
+ assert(0);
+ return false;
+ }
+
+ const basist::half_float* pSrc_image_h = static_cast<const basist::half_float *>(pMem);
+
+ img.resize(width, height);
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ const basist::half_float* pSrc_pixel = &pSrc_image_h[x * 4];
+
+ vec4F& dst = img(x, y);
+ dst[0] = basist::half_to_float(pSrc_pixel[0]);
+ dst[1] = basist::half_to_float(pSrc_pixel[1]);
+ dst[2] = basist::half_to_float(pSrc_pixel[2]);
+ dst[3] = basist::half_to_float(pSrc_pixel[3]);
+ }
+
+ pSrc_image_h += (width * 4);
+ }
+
+ break;
+ }
+ case hdr_image_type::cHITRGBAFloat:
+ {
+ if (mem_size != width * height * sizeof(float) * 4)
+ {
+ assert(0);
+ return false;
+ }
+
+ if ((!width) || (!height))
+ {
+ assert(0);
+ return false;
+ }
+
+ img.resize(width, height);
+ memcpy(img.get_ptr(), pMem, width * height * sizeof(float) * 4);
+
+ break;
+ }
+ case hdr_image_type::cHITPNGImage:
+ {
+ image ldr_img;
+ if (!load_png(static_cast<const uint8_t *>(pMem), mem_size, ldr_img))
+ return false;
+
+ convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear);
+ break;
+ }
+ case hdr_image_type::cHITEXRImage:
+ {
+ if (!read_exr(pMem, mem_size, img))
+ return false;
+
+ break;
+ }
+ case hdr_image_type::cHITHDRImage:
+ {
+ uint8_vec buf(mem_size);
+ memcpy(buf.get_ptr(), pMem, mem_size);
+
+ rgbe_header_info hdr;
+ if (!read_rgbe(buf, img, hdr))
+ return false;
+
+ break;
+ }
+ default:
+ assert(0);
+ return false;
+ }
+
+ return true;
+ }
+
+ bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear)
+ {
+ std::string ext(string_get_extension(std::string(pFilename)));
+
+ if (ext.length() == 0)
+ return false;
+
+ const char* pExt = ext.c_str();
+
+ if (strcasecmp(pExt, "hdr") == 0)
+ {
+ rgbe_header_info rgbe_info;
+ if (!read_rgbe(pFilename, img, rgbe_info))
+ return false;
+ return true;
+ }
+
+ if (strcasecmp(pExt, "exr") == 0)
+ {
+ int n_chans = 0;
+ if (!read_exr(pFilename, img, n_chans))
+ return false;
+ return true;
+ }
+
+ // Try loading image as LDR, then optionally convert to linear light.
+ {
+ image ldr_img;
+ if (!load_image(pFilename, ldr_img))
+ return false;
+
+ convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear);
+ }
+
+ return true;
+ }
bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp)
{
@@ -559,6 +763,45 @@ namespace basisu
return true;
}
+ bool read_file_to_data(const char* pFilename, void *pData, size_t len)
+ {
+ assert(pData && len);
+ if ((!pData) || (!len))
+ return false;
+
+ FILE* pFile = nullptr;
+#ifdef _WIN32
+ fopen_s(&pFile, pFilename, "rb");
+#else
+ pFile = fopen(pFilename, "rb");
+#endif
+ if (!pFile)
+ return false;
+
+ fseek(pFile, 0, SEEK_END);
+#ifdef _WIN32
+ int64_t filesize = _ftelli64(pFile);
+#else
+ int64_t filesize = ftello(pFile);
+#endif
+
+ if ((filesize < 0) || ((size_t)filesize < len))
+ {
+ fclose(pFile);
+ return false;
+ }
+ fseek(pFile, 0, SEEK_SET);
+
+ if (fread(pData, 1, (size_t)len, pFile) != (size_t)len)
+ {
+ fclose(pFile);
+ return false;
+ }
+
+ fclose(pFile);
+ return true;
+ }
+
bool write_data_to_file(const char* pFilename, const void* pData, size_t len)
{
FILE* pFile = nullptr;
@@ -581,25 +824,7 @@ namespace basisu
return fclose(pFile) != EOF;
}
-
- float linear_to_srgb(float l)
- {
- assert(l >= 0.0f && l <= 1.0f);
- if (l < .0031308f)
- return saturate(l * 12.92f);
- else
- return saturate(1.055f * powf(l, 1.0f/2.4f) - .055f);
- }
-
- float srgb_to_linear(float s)
- {
- assert(s >= 0.0f && s <= 1.0f);
- if (s < .04045f)
- return saturate(s * (1.0f/12.92f));
- else
- return saturate(powf((s + .055f) * (1.0f/1.055f), 2.4f));
- }
-
+
bool image_resample(const image &src, image &dst, bool srgb,
const char *pFilter, float filter_scale,
bool wrapping,
@@ -747,6 +972,121 @@ namespace basisu
return true;
}
+ bool image_resample(const imagef& src, imagef& dst,
+ const char* pFilter, float filter_scale,
+ bool wrapping,
+ uint32_t first_comp, uint32_t num_comps)
+ {
+ assert((first_comp + num_comps) <= 4);
+
+ const int cMaxComps = 4;
+
+ const uint32_t src_w = src.get_width(), src_h = src.get_height();
+ const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();
+
+ if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)
+ {
+ printf("Image is too large!\n");
+ return false;
+ }
+
+ if (!src_w || !src_h || !dst_w || !dst_h)
+ return false;
+
+ if ((num_comps < 1) || (num_comps > cMaxComps))
+ return false;
+
+ if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))
+ {
+ printf("Image is too large!\n");
+ return false;
+ }
+
+ if ((src_w == dst_w) && (src_h == dst_h))
+ {
+ dst = src;
+ return true;
+ }
+
+ std::vector<float> samples[cMaxComps];
+ Resampler* resamplers[cMaxComps];
+
+ resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h,
+ wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping
+ pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0);
+ samples[0].resize(src_w);
+
+ for (uint32_t i = 1; i < num_comps; ++i)
+ {
+ resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h,
+ wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping
+ pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0);
+ samples[i].resize(src_w);
+ }
+
+ uint32_t dst_y = 0;
+
+ for (uint32_t src_y = 0; src_y < src_h; ++src_y)
+ {
+ const vec4F* pSrc = &src(0, src_y);
+
+ // Put source lines into resampler(s)
+ for (uint32_t x = 0; x < src_w; ++x)
+ {
+ for (uint32_t c = 0; c < num_comps; ++c)
+ {
+ const uint32_t comp_index = first_comp + c;
+ const float v = (*pSrc)[comp_index];
+
+ samples[c][x] = v;
+ }
+
+ pSrc++;
+ }
+
+ for (uint32_t c = 0; c < num_comps; ++c)
+ {
+ if (!resamplers[c]->put_line(&samples[c][0]))
+ {
+ for (uint32_t i = 0; i < num_comps; i++)
+ delete resamplers[i];
+ return false;
+ }
+ }
+
+ // Now retrieve any output lines
+ for (;;)
+ {
+ uint32_t c;
+ for (c = 0; c < num_comps; ++c)
+ {
+ const uint32_t comp_index = first_comp + c;
+
+ const float* pOutput_samples = resamplers[c]->get_line();
+ if (!pOutput_samples)
+ break;
+
+ vec4F* pDst = &dst(0, dst_y);
+
+ for (uint32_t x = 0; x < dst_w; x++)
+ {
+ (*pDst)[comp_index] = pOutput_samples[x];
+ pDst++;
+ }
+ }
+ if (c < num_comps)
+ break;
+
+ ++dst_y;
+ }
+ }
+
+ for (uint32_t i = 0; i < num_comps; ++i)
+ delete resamplers[i];
+
+ return true;
+ }
+
void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms)
{
// See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen
@@ -1312,11 +1652,13 @@ namespace basisu
uint32_t a = max_index / num_syms, b = max_index % num_syms;
+ const uint32_t ofs = m_entries_picked.size();
+
m_entries_picked.push_back(a);
m_entries_picked.push_back(b);
for (uint32_t i = 0; i < num_syms; i++)
- if ((i != b) && (i != a))
+ if ((i != m_entries_picked[ofs + 1]) && (i != m_entries_picked[ofs]))
m_entries_to_do.push_back(i);
for (uint32_t i = 0; i < m_entries_to_do.size(); i++)
@@ -1372,6 +1714,235 @@ namespace basisu
}
return which_side;
}
+
+ void image_metrics::calc(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool log)
+ {
+ assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
+
+ const uint32_t width = basisu::minimum(a.get_width(), b.get_width());
+ const uint32_t height = basisu::minimum(a.get_height(), b.get_height());
+
+ double max_e = -1e+30f;
+ double sum = 0.0f, sum_sqr = 0.0f;
+
+ m_has_neg = false;
+ m_any_abnormal = false;
+ m_hf_mag_overflow = false;
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ const vec4F& ca = a(x, y), &cb = b(x, y);
+
+ if (total_chans)
+ {
+ for (uint32_t c = 0; c < total_chans; c++)
+ {
+ float fa = ca[first_chan + c], fb = cb[first_chan + c];
+
+ if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT))
+ m_hf_mag_overflow = true;
+
+ if ((fa < 0.0f) || (fb < 0.0f))
+ m_has_neg = true;
+
+ if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb))
+ m_any_abnormal = true;
+
+ const double delta = fabs(fa - fb);
+ max_e = basisu::maximum<double>(max_e, delta);
+
+ if (log)
+ {
+ double log2_delta = log2f(basisu::maximum(0.0f, fa) + 1.0f) - log2f(basisu::maximum(0.0f, fb) + 1.0f);
+
+ sum += fabs(log2_delta);
+ sum_sqr += log2_delta * log2_delta;
+ }
+ else
+ {
+ sum += fabs(delta);
+ sum_sqr += delta * delta;
+ }
+ }
+ }
+ else
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ float fa = ca[c], fb = cb[c];
+
+ if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT))
+ m_hf_mag_overflow = true;
+
+ if ((fa < 0.0f) || (fb < 0.0f))
+ m_has_neg = true;
+
+ if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb))
+ m_any_abnormal = true;
+ }
+
+ double ca_l = get_luminance(ca), cb_l = get_luminance(cb);
+
+ double delta = fabs(ca_l - cb_l);
+ max_e = basisu::maximum(max_e, delta);
+
+ if (log)
+ {
+ double log2_delta = log2(basisu::maximum<double>(0.0f, ca_l) + 1.0f) - log2(basisu::maximum<double>(0.0f, cb_l) + 1.0f);
+
+ sum += fabs(log2_delta);
+ sum_sqr += log2_delta * log2_delta;
+ }
+ else
+ {
+ sum += delta;
+ sum_sqr += delta * delta;
+ }
+ }
+ }
+ }
+
+ m_max = (double)(max_e);
+
+ double total_values = (double)width * (double)height;
+ if (avg_comp_error)
+ total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);
+
+ m_mean = (float)(sum / total_values);
+ m_mean_squared = (float)(sum_sqr / total_values);
+ m_rms = (float)sqrt(sum_sqr / total_values);
+
+ const double max_val = 1.0f;
+ m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;
+ }
+
+ void image_metrics::calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error)
+ {
+ assert(total_chans);
+ assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
+
+ const uint32_t width = basisu::minimum(a.get_width(), b.get_width());
+ const uint32_t height = basisu::minimum(a.get_height(), b.get_height());
+
+ m_has_neg = false;
+ m_hf_mag_overflow = false;
+ m_any_abnormal = false;
+
+ uint_vec hist(65536);
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ const vec4F& ca = a(x, y), &cb = b(x, y);
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if ((ca[i] < 0.0f) || (cb[i] < 0.0f))
+ m_has_neg = true;
+
+ if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT))
+ m_hf_mag_overflow = true;
+
+ if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i]))
+ m_any_abnormal = true;
+ }
+
+ int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) };
+ int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) };
+
+ for (uint32_t c = 0; c < total_chans; c++)
+ hist[iabs(cah[first_chan + c] - cbh[first_chan + c]) & 65535]++;
+
+ } // x
+ } // y
+
+ m_max = 0;
+ double sum = 0.0f, sum2 = 0.0f;
+ for (uint32_t i = 0; i < 65536; i++)
+ {
+ if (hist[i])
+ {
+ m_max = basisu::maximum<double>(m_max, (double)i);
+ double v = (double)i * (double)hist[i];
+ sum += v;
+ sum2 += (double)i * v;
+ }
+ }
+
+ double total_values = (double)width * (double)height;
+ if (avg_comp_error)
+ total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);
+
+ const float max_val = 65535.0f;
+ m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val);
+ m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val);
+ m_rms = (float)sqrt(m_mean_squared);
+ m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;
+ }
+
+ // Alt. variant, same as calc_half(), for validation.
+ void image_metrics::calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error)
+ {
+ assert(total_chans);
+ assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
+
+ const uint32_t width = basisu::minimum(a.get_width(), b.get_width());
+ const uint32_t height = basisu::minimum(a.get_height(), b.get_height());
+
+ m_has_neg = false;
+ m_hf_mag_overflow = false;
+ m_any_abnormal = false;
+
+ double sum = 0.0f, sum2 = 0.0f;
+ m_max = 0;
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ const vec4F& ca = a(x, y), & cb = b(x, y);
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if ((ca[i] < 0.0f) || (cb[i] < 0.0f))
+ m_has_neg = true;
+
+ if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT))
+ m_hf_mag_overflow = true;
+
+ if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i]))
+ m_any_abnormal = true;
+ }
+
+ int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) };
+ int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) };
+
+ for (uint32_t c = 0; c < total_chans; c++)
+ {
+ int diff = iabs(cah[first_chan + c] - cbh[first_chan + c]);
+ if (diff)
+ m_max = std::max<double>(m_max, (double)diff);
+
+ sum += diff;
+ sum2 += squarei(cah[first_chan + c] - cbh[first_chan + c]);
+ }
+
+ } // x
+ } // y
+
+ double total_values = (double)width * (double)height;
+ if (avg_comp_error)
+ total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);
+
+ const float max_val = 65535.0f;
+ m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val);
+ m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val);
+ m_rms = (float)sqrt(m_mean_squared);
+ m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;
+ }
void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)
{
@@ -1383,6 +1954,10 @@ namespace basisu
double hist[256];
clear_obj(hist);
+ m_has_neg = false;
+ m_any_abnormal = false;
+ m_hf_mag_overflow = false;
+
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
@@ -1410,7 +1985,7 @@ namespace basisu
{
if (hist[i])
{
- m_max = basisu::maximum<float>(m_max, (float)i);
+ m_max = basisu::maximum<double>(m_max, (double)i);
double v = i * hist[i];
sum += v;
sum2 += i * v;
@@ -1922,7 +2497,7 @@ namespace basisu
} while (pixels_remaining);
- assert((pDst - &input_line_buf[0]) == width * tga_bytes_per_pixel);
+ assert((pDst - &input_line_buf[0]) == (int)(width * tga_bytes_per_pixel));
pLine_data = &input_line_buf[0];
}
@@ -2052,6 +2627,808 @@ namespace basisu
return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans);
}
+ static inline void hdr_convert(const color_rgba& rgbe, vec4F& c)
+ {
+ if (rgbe[3] != 0)
+ {
+ float scale = ldexp(1.0f, rgbe[3] - 128 - 8);
+ c.set((float)rgbe[0] * scale, (float)rgbe[1] * scale, (float)rgbe[2] * scale, 1.0f);
+ }
+ else
+ {
+ c.set(0.0f, 0.0f, 0.0f, 1.0f);
+ }
+ }
+
+ bool string_begins_with(const std::string& str, const char* pPhrase)
+ {
+ const size_t str_len = str.size();
+
+ const size_t phrase_len = strlen(pPhrase);
+ assert(phrase_len);
+
+ if (str_len >= phrase_len)
+ {
+#ifdef _MSC_VER
+ if (_strnicmp(pPhrase, str.c_str(), phrase_len) == 0)
+#else
+ if (strncasecmp(pPhrase, str.c_str(), phrase_len) == 0)
+#endif
+ return true;
+ }
+
+ return false;
+ }
+
+ // Radiance RGBE (.HDR) image reading.
+ // This code tries to preserve the original logic in Radiance's ray/src/common/color.c code:
+ // https://www.radiance-online.org/cgi-bin/viewcvs.cgi/ray/src/common/color.c?revision=2.26&view=markup&sortby=log
+ // Also see: https://flipcode.com/archives/HDR_Image_Reader.shtml.
+ // https://github.com/LuminanceHDR/LuminanceHDR/blob/master/src/Libpfs/io/rgbereader.cpp.
+ // https://radsite.lbl.gov/radiance/refer/filefmts.pdf
+ // Buggy readers:
+ // stb_image.h: appears to be a clone of rgbe.c, but with goto's (doesn't support old format files, doesn't support mixture of RLE/non-RLE scanlines)
+ // http://www.graphics.cornell.edu/~bjw/rgbe.html - rgbe.c/h
+ // http://www.graphics.cornell.edu/online/formats/rgbe/ - rgbe.c/.h - buggy
+ bool read_rgbe(const uint8_vec &filedata, imagef& img, rgbe_header_info& hdr_info)
+ {
+ hdr_info.clear();
+
+ const uint32_t MAX_SUPPORTED_DIM = 65536;
+
+ if (filedata.size() < 4)
+ return false;
+
+ // stb_image.h checks for the string "#?RADIANCE" or "#?RGBE" in the header.
+ // The original Radiance header code doesn't care about the specific string.
+ // opencv's reader only checks for "#?", so that's what we're going to do.
+ if ((filedata[0] != '#') || (filedata[1] != '?'))
+ return false;
+
+ //uint32_t width = 0, height = 0;
+ bool is_rgbe = false;
+ size_t cur_ofs = 0;
+
+ // Parse the lines until we encounter a blank line.
+ std::string cur_line;
+ for (; ; )
+ {
+ if (cur_ofs >= filedata.size())
+ return false;
+
+ const uint32_t HEADER_TOO_BIG_SIZE = 4096;
+ if (cur_ofs >= HEADER_TOO_BIG_SIZE)
+ {
+ // Header seems too large - something is likely wrong. Return failure.
+ return false;
+ }
+
+ uint8_t c = filedata[cur_ofs++];
+
+ if (c == '\n')
+ {
+ if (!cur_line.size())
+ break;
+
+ if ((cur_line[0] == '#') && (!string_begins_with(cur_line, "#?")) && (!hdr_info.m_program.size()))
+ {
+ cur_line.erase(0, 1);
+ while (cur_line.size() && (cur_line[0] == ' '))
+ cur_line.erase(0, 1);
+
+ hdr_info.m_program = cur_line;
+ }
+ else if (string_begins_with(cur_line, "EXPOSURE=") && (cur_line.size() > 9))
+ {
+ hdr_info.m_exposure = atof(cur_line.c_str() + 9);
+ hdr_info.m_has_exposure = true;
+ }
+ else if (string_begins_with(cur_line, "GAMMA=") && (cur_line.size() > 6))
+ {
+ hdr_info.m_exposure = atof(cur_line.c_str() + 6);
+ hdr_info.m_has_gamma = true;
+ }
+ else if (cur_line == "FORMAT=32-bit_rle_rgbe")
+ {
+ is_rgbe = true;
+ }
+
+ cur_line.resize(0);
+ }
+ else
+ cur_line.push_back((char)c);
+ }
+
+ if (!is_rgbe)
+ return false;
+
+ // Assume and require the final line to have the image's dimensions. We're not supporting flipping.
+ for (; ; )
+ {
+ if (cur_ofs >= filedata.size())
+ return false;
+ uint8_t c = filedata[cur_ofs++];
+ if (c == '\n')
+ break;
+ cur_line.push_back((char)c);
+ }
+
+ int comp[2] = { 1, 0 }; // y, x (major, minor)
+ int dir[2] = { -1, 1 }; // -1, 1, (major, minor), for y -1=up
+ uint32_t major_dim = 0, minor_dim = 0;
+
+ // Parse the dimension string, normally it'll be "-Y # +X #" (major, minor), rarely it differs
+ for (uint32_t d = 0; d < 2; d++) // 0=major, 1=minor
+ {
+ const bool is_neg_x = (strncmp(&cur_line[0], "-X ", 3) == 0);
+ const bool is_pos_x = (strncmp(&cur_line[0], "+X ", 3) == 0);
+ const bool is_x = is_neg_x || is_pos_x;
+
+ const bool is_neg_y = (strncmp(&cur_line[0], "-Y ", 3) == 0);
+ const bool is_pos_y = (strncmp(&cur_line[0], "+Y ", 3) == 0);
+ const bool is_y = is_neg_y || is_pos_y;
+
+ if (cur_line.size() < 3)
+ return false;
+
+ if (!is_x && !is_y)
+ return false;
+
+ comp[d] = is_x ? 0 : 1;
+ dir[d] = (is_neg_x || is_neg_y) ? -1 : 1;
+
+ uint32_t& dim = d ? minor_dim : major_dim;
+
+ cur_line.erase(0, 3);
+
+ while (cur_line.size())
+ {
+ char c = cur_line[0];
+ if (c != ' ')
+ break;
+ cur_line.erase(0, 1);
+ }
+
+ bool has_digits = false;
+ while (cur_line.size())
+ {
+ char c = cur_line[0];
+ cur_line.erase(0, 1);
+
+ if (c == ' ')
+ break;
+
+ if ((c < '0') || (c > '9'))
+ return false;
+
+ const uint32_t prev_dim = dim;
+ dim = dim * 10 + (c - '0');
+ if (dim < prev_dim)
+ return false;
+
+ has_digits = true;
+ }
+ if (!has_digits)
+ return false;
+
+ if ((dim < 1) || (dim > MAX_SUPPORTED_DIM))
+ return false;
+ }
+
+ // temp image: width=minor, height=major
+ img.resize(minor_dim, major_dim);
+
+ std::vector<color_rgba> temp_scanline(minor_dim);
+
+ // Read the scanlines.
+ for (uint32_t y = 0; y < major_dim; y++)
+ {
+ vec4F* pDst = &img(0, y);
+
+ if ((filedata.size() - cur_ofs) < 4)
+ return false;
+
+ // Determine if the line uses the new or old format. See the logic in color.c.
+ bool old_decrunch = false;
+ if ((minor_dim < 8) || (minor_dim > 0x7FFF))
+ {
+ // Line is too short or long; must be old format.
+ old_decrunch = true;
+ }
+ else if (filedata[cur_ofs] != 2)
+ {
+ // R is not 2, must be old format
+ old_decrunch = true;
+ }
+ else
+ {
+ // c[0]/red is 2.Check GB and E for validity.
+ color_rgba c;
+ memcpy(&c, &filedata[cur_ofs], 4);
+
+ if ((c[1] != 2) || (c[2] & 0x80))
+ {
+ // G isn't 2, or the high bit of B is set which is impossible (image's > 0x7FFF pixels can't get here). Use old format.
+ old_decrunch = true;
+ }
+ else
+ {
+ // Check B and E. If this isn't the minor_dim in network order, something is wrong. The pixel would also be denormalized, and invalid.
+ uint32_t w = (c[2] << 8) | c[3];
+ if (w != minor_dim)
+ return false;
+
+ cur_ofs += 4;
+ }
+ }
+
+ if (old_decrunch)
+ {
+ uint32_t rshift = 0, x = 0;
+
+ while (x < minor_dim)
+ {
+ if ((filedata.size() - cur_ofs) < 4)
+ return false;
+
+ color_rgba c;
+ memcpy(&c, &filedata[cur_ofs], 4);
+ cur_ofs += 4;
+
+ if ((c[0] == 1) && (c[1] == 1) && (c[2] == 1))
+ {
+ // We'll allow RLE matches to cross scanlines, but not on the very first pixel.
+ if ((!x) && (!y))
+ return false;
+
+ const uint32_t run_len = c[3] << rshift;
+ const vec4F run_color(pDst[-1]);
+
+ if ((x + run_len) > minor_dim)
+ return false;
+
+ for (uint32_t i = 0; i < run_len; i++)
+ *pDst++ = run_color;
+
+ rshift += 8;
+ x += run_len;
+ }
+ else
+ {
+ rshift = 0;
+
+ hdr_convert(c, *pDst);
+ pDst++;
+ x++;
+ }
+ }
+ continue;
+ }
+
+ // New format
+ for (uint32_t s = 0; s < 4; s++)
+ {
+ uint32_t x_ofs = 0;
+ while (x_ofs < minor_dim)
+ {
+ uint32_t num_remaining = minor_dim - x_ofs;
+
+ if (cur_ofs >= filedata.size())
+ return false;
+
+ uint8_t count = filedata[cur_ofs++];
+ if (count > 128)
+ {
+ count -= 128;
+ if (count > num_remaining)
+ return false;
+
+ if (cur_ofs >= filedata.size())
+ return false;
+ const uint8_t val = filedata[cur_ofs++];
+
+ for (uint32_t i = 0; i < count; i++)
+ temp_scanline[x_ofs + i][s] = val;
+
+ x_ofs += count;
+ }
+ else
+ {
+ if ((!count) || (count > num_remaining))
+ return false;
+
+ for (uint32_t i = 0; i < count; i++)
+ {
+ if (cur_ofs >= filedata.size())
+ return false;
+ const uint8_t val = filedata[cur_ofs++];
+
+ temp_scanline[x_ofs + i][s] = val;
+ }
+
+ x_ofs += count;
+ }
+ } // while (x_ofs < minor_dim)
+ } // c
+
+ // Convert all the RGBE pixels to float now
+ for (uint32_t x = 0; x < minor_dim; x++, pDst++)
+ hdr_convert(temp_scanline[x], *pDst);
+
+ assert((pDst - &img(0, y)) == (int)minor_dim);
+
+ } // y
+
+ // at here:
+ // img(width,height)=image pixels as read from file, x=minor axis, y=major axis
+ // width=minor axis dimension
+ // height=major axis dimension
+ // in file, pixels are emitted in minor order, them major (so major=scanlines in the file)
+
+ imagef final_img;
+ if (comp[0] == 0) // if major axis is X
+ final_img.resize(major_dim, minor_dim);
+ else // major axis is Y, minor is X
+ final_img.resize(minor_dim, major_dim);
+
+ // TODO: optimize the identity case
+ for (uint32_t major_iter = 0; major_iter < major_dim; major_iter++)
+ {
+ for (uint32_t minor_iter = 0; minor_iter < minor_dim; minor_iter++)
+ {
+ const vec4F& p = img(minor_iter, major_iter);
+
+ uint32_t dst_x = 0, dst_y = 0;
+
+ // is the minor dim output x?
+ if (comp[1] == 0)
+ {
+ // minor axis is x, major is y
+
+ // is minor axis (which is output x) flipped?
+ if (dir[1] < 0)
+ dst_x = minor_dim - 1 - minor_iter;
+ else
+ dst_x = minor_iter;
+
+ // is major axis (which is output y) flipped? -1=down in raster order, 1=up
+ if (dir[0] < 0)
+ dst_y = major_iter;
+ else
+ dst_y = major_dim - 1 - major_iter;
+ }
+ else
+ {
+ // minor axis is output y, major is output x
+
+ // is minor axis (which is output y) flipped?
+ if (dir[1] < 0)
+ dst_y = minor_iter;
+ else
+ dst_y = minor_dim - 1 - minor_iter;
+
+ // is major axis (which is output x) flipped?
+ if (dir[0] < 0)
+ dst_x = major_dim - 1 - major_iter;
+ else
+ dst_x = major_iter;
+ }
+
+ final_img(dst_x, dst_y) = p;
+ }
+ }
+
+ final_img.swap(img);
+
+ return true;
+ }
+
+ bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info)
+ {
+ uint8_vec filedata;
+ if (!read_file_to_vec(pFilename, filedata))
+ return false;
+ return read_rgbe(filedata, img, hdr_info);
+ }
+
+ static uint8_vec& append_string(uint8_vec& buf, const char* pStr)
+ {
+ const size_t str_len = strlen(pStr);
+ if (!str_len)
+ return buf;
+
+ const size_t ofs = buf.size();
+ buf.resize(ofs + str_len);
+ memcpy(&buf[ofs], pStr, str_len);
+
+ return buf;
+ }
+
+ static uint8_vec& append_string(uint8_vec& buf, const std::string& str)
+ {
+ if (!str.size())
+ return buf;
+ return append_string(buf, str.c_str());
+ }
+
+ static inline void float2rgbe(color_rgba &rgbe, const vec4F &c)
+ {
+ const float red = c[0], green = c[1], blue = c[2];
+ assert(red >= 0.0f && green >= 0.0f && blue >= 0.0f);
+
+ const float max_v = basisu::maximumf(basisu::maximumf(red, green), blue);
+
+ if (max_v < 1e-32f)
+ rgbe.clear();
+ else
+ {
+ int e;
+ const float scale = frexp(max_v, &e) * 256.0f / max_v;
+ rgbe[0] = (uint8_t)(clamp<int>((int)(red * scale), 0, 255));
+ rgbe[1] = (uint8_t)(clamp<int>((int)(green * scale), 0, 255));
+ rgbe[2] = (uint8_t)(clamp<int>((int)(blue * scale), 0, 255));
+ rgbe[3] = (uint8_t)(e + 128);
+ }
+ }
+
+ const bool RGBE_FORCE_RAW = false;
+ const bool RGBE_FORCE_OLD_CRUNCH = false; // note must readers (particularly stb_image.h's) don't properly support this, when they should
+
+ bool write_rgbe(uint8_vec &file_data, imagef& img, rgbe_header_info& hdr_info)
+ {
+ if (!img.get_width() || !img.get_height())
+ return false;
+
+ const uint32_t width = img.get_width(), height = img.get_height();
+
+ file_data.resize(0);
+ file_data.reserve(1024 + img.get_width() * img.get_height() * 4);
+
+ append_string(file_data, "#?RADIANCE\n");
+
+ if (hdr_info.m_has_exposure)
+ append_string(file_data, string_format("EXPOSURE=%g\n", hdr_info.m_exposure));
+
+ if (hdr_info.m_has_gamma)
+ append_string(file_data, string_format("GAMMA=%g\n", hdr_info.m_gamma));
+
+ append_string(file_data, "FORMAT=32-bit_rle_rgbe\n\n");
+ append_string(file_data, string_format("-Y %u +X %u\n", height, width));
+
+ if (((width < 8) || (width > 0x7FFF)) || (RGBE_FORCE_RAW))
+ {
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ color_rgba rgbe;
+ float2rgbe(rgbe, img(x, y));
+ append_vector(file_data, (const uint8_t *)&rgbe, sizeof(rgbe));
+ }
+ }
+ }
+ else if (RGBE_FORCE_OLD_CRUNCH)
+ {
+ for (uint32_t y = 0; y < height; y++)
+ {
+ int prev_r = -1, prev_g = -1, prev_b = -1, prev_e = -1;
+ uint32_t cur_run_len = 0;
+
+ for (uint32_t x = 0; x < width; x++)
+ {
+ color_rgba rgbe;
+ float2rgbe(rgbe, img(x, y));
+
+ if ((rgbe[0] == prev_r) && (rgbe[1] == prev_g) && (rgbe[2] == prev_b) && (rgbe[3] == prev_e))
+ {
+ if (++cur_run_len == 255)
+ {
+ // this ensures rshift stays 0, it's lame but this path is only for testing readers
+ color_rgba f(1, 1, 1, cur_run_len - 1);
+ append_vector(file_data, (const uint8_t*)&f, sizeof(f));
+ append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));
+ cur_run_len = 0;
+ }
+ }
+ else
+ {
+ if (cur_run_len > 0)
+ {
+ color_rgba f(1, 1, 1, cur_run_len);
+ append_vector(file_data, (const uint8_t*)&f, sizeof(f));
+
+ cur_run_len = 0;
+ }
+
+ append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));
+
+ prev_r = rgbe[0];
+ prev_g = rgbe[1];
+ prev_b = rgbe[2];
+ prev_e = rgbe[3];
+ }
+ } // x
+
+ if (cur_run_len > 0)
+ {
+ color_rgba f(1, 1, 1, cur_run_len);
+ append_vector(file_data, (const uint8_t*)&f, sizeof(f));
+ }
+ } // y
+ }
+ else
+ {
+ uint8_vec temp[4];
+ for (uint32_t c = 0; c < 4; c++)
+ temp[c].resize(width);
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ color_rgba rgbe(2, 2, width >> 8, width & 0xFF);
+ append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));
+
+ for (uint32_t x = 0; x < width; x++)
+ {
+ float2rgbe(rgbe, img(x, y));
+
+ for (uint32_t c = 0; c < 4; c++)
+ temp[c][x] = rgbe[c];
+ }
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ int raw_ofs = -1;
+
+ uint32_t x = 0;
+ while (x < width)
+ {
+ const uint32_t num_bytes_remaining = width - x;
+ const uint32_t max_run_len = basisu::minimum<uint32_t>(num_bytes_remaining, 127);
+ const uint8_t cur_byte = temp[c][x];
+
+ uint32_t run_len = 1;
+ while (run_len < max_run_len)
+ {
+ if (temp[c][x + run_len] != cur_byte)
+ break;
+ run_len++;
+ }
+
+ const uint32_t cost_to_keep_raw = ((raw_ofs != -1) ? 0 : 1) + run_len; // 0 or 1 bytes to start a raw run, then the repeated bytes issued as raw
+ const uint32_t cost_to_take_run = 2 + 1; // 2 bytes to issue the RLE, then 1 bytes to start whatever follows it (raw or RLE)
+
+ if ((run_len >= 3) && (cost_to_take_run < cost_to_keep_raw))
+ {
+ file_data.push_back((uint8_t)(128 + run_len));
+ file_data.push_back(cur_byte);
+
+ x += run_len;
+ raw_ofs = -1;
+ }
+ else
+ {
+ if (raw_ofs < 0)
+ {
+ raw_ofs = (int)file_data.size();
+ file_data.push_back(0);
+ }
+
+ if (++file_data[raw_ofs] == 128)
+ raw_ofs = -1;
+
+ file_data.push_back(cur_byte);
+
+ x++;
+ }
+ } // x
+
+ } // c
+ } // y
+ }
+
+ return true;
+ }
+
+ bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info)
+ {
+ uint8_vec file_data;
+ if (!write_rgbe(file_data, img, hdr_info))
+ return false;
+ return write_vec_to_file(pFilename, file_data);
+ }
+
+ bool read_exr(const char* pFilename, imagef& img, int& n_chans)
+ {
+ n_chans = 0;
+
+ int width = 0, height = 0;
+ float* out_rgba = nullptr;
+ const char* err = nullptr;
+
+ int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err);
+ n_chans = 4;
+ if (status != 0)
+ {
+ error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?");
+ FreeEXRErrorMessage(err);
+ free(out_rgba);
+ return false;
+ }
+
+ const uint32_t MAX_SUPPORTED_DIM = 65536;
+ if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM))
+ {
+ error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename);
+ free(out_rgba);
+ return false;
+ }
+
+ img.resize(width, height);
+
+ if (n_chans == 1)
+ {
+ const float* pSrc = out_rgba;
+ vec4F* pDst = img.get_ptr();
+
+ for (int y = 0; y < height; y++)
+ {
+ for (int x = 0; x < width; x++)
+ {
+ (*pDst)[0] = pSrc[0];
+ (*pDst)[1] = pSrc[1];
+ (*pDst)[2] = pSrc[2];
+ (*pDst)[3] = 1.0f;
+
+ pSrc += 4;
+ ++pDst;
+ }
+ }
+ }
+ else
+ {
+ memcpy(img.get_ptr(), out_rgba, sizeof(float) * 4 * img.get_total_pixels());
+ }
+
+ free(out_rgba);
+ return true;
+ }
+
+ bool read_exr(const void* pMem, size_t mem_size, imagef& img)
+ {
+ float* out_rgba = nullptr;
+ int width = 0, height = 0;
+ const char* pErr = nullptr;
+ int res = LoadEXRFromMemory(&out_rgba, &width, &height, (const uint8_t*)pMem, mem_size, &pErr);
+ if (res < 0)
+ {
+ error_printf("Failed loading .EXR image from memory! (TinyEXR error: %s)\n", pErr ? pErr : "?");
+ FreeEXRErrorMessage(pErr);
+ free(out_rgba);
+ return false;
+ }
+
+ img.resize(width, height);
+ memcpy(img.get_ptr(), out_rgba, width * height * sizeof(float) * 4);
+ free(out_rgba);
+
+ return true;
+ }
+
+ bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags)
+ {
+ assert((n_chans == 1) || (n_chans == 3) || (n_chans == 4));
+
+ const bool linear_hint = (flags & WRITE_EXR_LINEAR_HINT) != 0,
+ store_float = (flags & WRITE_EXR_STORE_FLOATS) != 0,
+ no_compression = (flags & WRITE_EXR_NO_COMPRESSION) != 0;
+
+ const uint32_t width = img.get_width(), height = img.get_height();
+ assert(width && height);
+
+ if (!width || !height)
+ return false;
+
+ float_vec layers[4];
+ float* image_ptrs[4];
+ for (uint32_t c = 0; c < n_chans; c++)
+ {
+ layers[c].resize(width * height);
+ image_ptrs[c] = layers[c].get_ptr();
+ }
+
+ // ABGR
+ int chan_order[4] = { 3, 2, 1, 0 };
+
+ if (n_chans == 1)
+ {
+ // Y
+ chan_order[0] = 0;
+ }
+ else if (n_chans == 3)
+ {
+ // BGR
+ chan_order[0] = 2;
+ chan_order[1] = 1;
+ chan_order[2] = 0;
+ }
+ else if (n_chans != 4)
+ {
+ assert(0);
+ return false;
+ }
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ const vec4F& p = img(x, y);
+
+ for (uint32_t c = 0; c < n_chans; c++)
+ layers[c][x + y * width] = p[chan_order[c]];
+ } // x
+ } // y
+
+ EXRHeader header;
+ InitEXRHeader(&header);
+
+ EXRImage image;
+ InitEXRImage(&image);
+
+ image.num_channels = n_chans;
+ image.images = (unsigned char**)image_ptrs;
+ image.width = width;
+ image.height = height;
+
+ header.num_channels = n_chans;
+
+ header.channels = (EXRChannelInfo*)calloc(header.num_channels, sizeof(EXRChannelInfo));
+
+ // Must be (A)BGR order, since most of EXR viewers expect this channel order.
+ for (uint32_t i = 0; i < n_chans; i++)
+ {
+ char c = 'Y';
+ if (n_chans == 3)
+ c = "BGR"[i];
+ else if (n_chans == 4)
+ c = "ABGR"[i];
+
+ header.channels[i].name[0] = c;
+ header.channels[i].name[1] = '\0';
+
+ header.channels[i].p_linear = linear_hint;
+ }
+
+ header.pixel_types = (int*)calloc(header.num_channels, sizeof(int));
+ header.requested_pixel_types = (int*)calloc(header.num_channels, sizeof(int));
+
+ if (!no_compression)
+ header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP;
+
+ for (int i = 0; i < header.num_channels; i++)
+ {
+ // pixel type of input image
+ header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;
+
+ // pixel type of output image to be stored in .EXR
+ header.requested_pixel_types[i] = store_float ? TINYEXR_PIXELTYPE_FLOAT : TINYEXR_PIXELTYPE_HALF;
+ }
+
+ const char* pErr_msg = nullptr;
+
+ int ret = SaveEXRImageToFile(&image, &header, pFilename, &pErr_msg);
+ if (ret != TINYEXR_SUCCESS)
+ {
+ error_printf("Save EXR err: %s\n", pErr_msg);
+ FreeEXRErrorMessage(pErr_msg);
+ }
+
+ free(header.channels);
+ free(header.pixel_types);
+ free(header.requested_pixel_types);
+
+ return (ret == TINYEXR_SUCCESS);
+ }
+
void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...)
{
char buf[2048];
@@ -2103,5 +3480,206 @@ namespace basisu
}
}
}
-
+
+ // Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged.
+ // Only used for debugging/development.
+ void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure)
+ {
+ uint32_t width = hdr_img.get_width(), height = hdr_img.get_height();
+
+ ldr_img.resize(width, height);
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ vec4F c(hdr_img(x, y));
+
+ for (uint32_t t = 0; t < 3; t++)
+ {
+ if (c[t] <= 0.0f)
+ {
+ c[t] = 0.0f;
+ }
+ else
+ {
+ c[t] *= exposure;
+ c[t] = c[t] / (1.0f + c[t]);
+ }
+ }
+
+ c.clamp(0.0f, 1.0f);
+
+ c[0] = linear_to_srgb(c[0]) * 255.0f;
+ c[1] = linear_to_srgb(c[1]) * 255.0f;
+ c[2] = linear_to_srgb(c[2]) * 255.0f;
+ c[3] = c[3] * 255.0f;
+
+ color_rgba& o = ldr_img(x, y);
+
+ o[0] = (uint8_t)std::round(c[0]);
+ o[1] = (uint8_t)std::round(c[1]);
+ o[2] = (uint8_t)std::round(c[2]);
+ o[3] = (uint8_t)std::round(c[3]);
+ }
+ }
+ }
+
+ bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img)
+ {
+ const uint32_t width = hdr_test_img.get_width();
+ const uint32_t height = hdr_test_img.get_height();
+
+ uint16_vec orig_half_img(width * 3 * height);
+ uint16_vec half_img(width * 3 * height);
+
+ int max_shift = 32;
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ const vec4F& p = hdr_test_img(x, y);
+
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ if (p[i] < 0.0f)
+ return false;
+ if (p[i] > basist::MAX_HALF_FLOAT)
+ return false;
+
+ uint32_t h = basist::float_to_half(p[i]);
+ //uint32_t orig_h = h;
+
+ orig_half_img[(x + y * width) * 3 + i] = (uint16_t)h;
+
+ // Rotate sign bit into LSB
+ //h = rot_left16((uint16_t)h, 1);
+ //assert(rot_right16((uint16_t)h, 1) == orig_h);
+ h <<= 1;
+
+ half_img[(x + y * width) * 3 + i] = (uint16_t)h;
+
+ // Determine # of leading zero bits, ignoring the sign bit
+ if (h)
+ {
+ int lz = clz(h) - 16;
+ assert(lz >= 0 && lz <= 16);
+
+ assert((h << lz) <= 0xFFFF);
+
+ max_shift = basisu::minimum<int>(max_shift, lz);
+ }
+ } // i
+ } // x
+ } // y
+
+ //printf("tonemap_image_compressive: Max leading zeros: %i\n", max_shift);
+
+ uint32_t high_hist[256];
+ clear_obj(high_hist);
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ for (uint32_t i = 0; i < 3; i++)
+ {
+ uint16_t& hf = half_img[(x + y * width) * 3 + i];
+
+ assert(((uint32_t)hf << max_shift) <= 65535);
+
+ hf <<= max_shift;
+
+ uint32_t h = (uint8_t)(hf >> 8);
+ high_hist[h]++;
+ }
+ } // x
+ } // y
+
+ uint32_t total_vals_used = 0;
+ int remap_old_to_new[256];
+ for (uint32_t i = 0; i < 256; i++)
+ remap_old_to_new[i] = -1;
+
+ for (uint32_t i = 0; i < 256; i++)
+ {
+ if (high_hist[i] != 0)
+ {
+ remap_old_to_new[i] = total_vals_used;
+ total_vals_used++;
+ }
+ }
+
+ assert(total_vals_used >= 1);
+
+ //printf("tonemap_image_compressive: Total used high byte values: %u, unused: %u\n", total_vals_used, 256 - total_vals_used);
+
+ bool val_used[256];
+ clear_obj(val_used);
+
+ int remap_new_to_old[256];
+ for (uint32_t i = 0; i < 256; i++)
+ remap_new_to_old[i] = -1;
+ BASISU_NOTE_UNUSED(remap_new_to_old);
+
+ int prev_c = -1;
+ BASISU_NOTE_UNUSED(prev_c);
+ for (uint32_t i = 0; i < 256; i++)
+ {
+ if (remap_old_to_new[i] >= 0)
+ {
+ int c;
+ if (total_vals_used <= 1)
+ c = remap_old_to_new[i];
+ else
+ {
+ c = (remap_old_to_new[i] * 255 + ((total_vals_used - 1) / 2)) / (total_vals_used - 1);
+
+ assert(c > prev_c);
+ }
+
+ assert(!val_used[c]);
+
+ remap_new_to_old[c] = i;
+
+ remap_old_to_new[i] = c;
+ prev_c = c;
+
+ //printf("%u ", c);
+
+ val_used[c] = true;
+ }
+ } // i
+ //printf("\n");
+
+ dst_img.resize(width, height);
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ uint16_t& v16 = half_img[(x + y * width) * 3 + c];
+
+ uint32_t hb = v16 >> 8;
+ //uint32_t lb = v16 & 0xFF;
+
+ assert(remap_old_to_new[hb] != -1);
+ assert(remap_old_to_new[hb] <= 255);
+ assert(remap_new_to_old[remap_old_to_new[hb]] == (int)hb);
+
+ hb = remap_old_to_new[hb];
+
+ //v16 = (uint16_t)((hb << 8) | lb);
+
+ dst_img(x, y)[c] = (uint8_t)hb;
+ }
+ } // x
+ } // y
+
+ return true;
+ }
+
} // namespace basisu
diff --git a/thirdparty/basis_universal/encoder/basisu_enc.h b/thirdparty/basis_universal/encoder/basisu_enc.h
index 0efeaa461f..780605e7b8 100644
--- a/thirdparty/basis_universal/encoder/basisu_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_enc.h
@@ -1,5 +1,5 @@
// basisu_enc.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -48,7 +48,8 @@ namespace basisu
// Encoder library initialization.
// This function MUST be called before encoding anything!
- void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
+ // Returns false if library initialization fails.
+ bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
void basisu_encoder_deinit();
// basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1
@@ -70,6 +71,18 @@ namespace basisu
return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
}
+ inline int left_shift32(int val, int shift)
+ {
+ assert((shift >= 0) && (shift < 32));
+ return static_cast<int>(static_cast<uint32_t>(val) << shift);
+ }
+
+ inline uint32_t left_shift32(uint32_t val, int shift)
+ {
+ assert((shift >= 0) && (shift < 32));
+ return val << shift;
+ }
+
inline int32_t clampi(int32_t value, int32_t low, int32_t high)
{
if (value < low)
@@ -130,6 +143,31 @@ namespace basisu
return bits;
}
+
+ // Open interval
+ inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+ inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+
+ // Closed interval
+ inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
+ inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
+
+ inline uint32_t clz(uint32_t x)
+ {
+ if (!x)
+ return 32;
+
+ uint32_t n = 0;
+ while ((x & 0x80000000) == 0)
+ {
+ x <<= 1u;
+ n++;
+ }
+
+ return n;
+ }
+
+ bool string_begins_with(const std::string& str, const char* pPhrase);
// Hashing
@@ -268,6 +306,7 @@ namespace basisu
public:
enum { num_elements = N };
+ typedef T scalar_type;
inline vec() { }
inline vec(eZero) { set_zero(); }
@@ -291,6 +330,7 @@ namespace basisu
inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
+ inline void clear() { set_zero(); }
template <uint32_t OtherN, typename OtherT>
inline vec &set(const vec<OtherN, OtherT> &other)
@@ -391,7 +431,7 @@ namespace basisu
inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
- inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
+ inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
inline vec &clamp(T l, T h)
{
@@ -722,7 +762,7 @@ namespace basisu
void job_thread(uint32_t index);
};
- // Simple 32-bit color class
+ // Simple 64-bit color class
class color_rgba_i16
{
@@ -1116,7 +1156,9 @@ namespace basisu
{
std::string result(s);
for (size_t i = 0; i < result.size(); i++)
- result[i] = (char)tolower((int)result[i]);
+ {
+ result[i] = (char)tolower((uint8_t)(result[i]));
+ }
return result;
}
@@ -1408,7 +1450,7 @@ namespace basisu
size_t get_total_training_vecs() const { return m_training_vecs.size(); }
const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; }
- array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }
+ array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }
void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const
{
@@ -1437,36 +1479,36 @@ namespace basisu
}
void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const
- {
+ {
uint_vec node_stack;
- node_stack.reserve(512);
+ node_stack.reserve(512);
- codebook.resize(0);
- codebook.reserve(max_clusters);
+ codebook.resize(0);
+ codebook.reserve(max_clusters);
- uint32_t node_index = 0;
+ uint32_t node_index = 0;
- while (true)
- {
- const tsvq_node& cur = m_nodes[node_index];
+ while (true)
+ {
+ const tsvq_node& cur = m_nodes[node_index];
- if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
- {
- codebook.resize(codebook.size() + 1);
- codebook.back() = cur.m_training_vecs;
+ if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
+ {
+ codebook.resize(codebook.size() + 1);
+ codebook.back() = cur.m_training_vecs;
- if (node_stack.empty())
- break;
+ if (node_stack.empty())
+ break;
- node_index = node_stack.back();
- node_stack.pop_back();
- continue;
- }
+ node_index = node_stack.back();
+ node_stack.pop_back();
+ continue;
+ }
- node_stack.push_back(cur.m_right_index);
- node_index = cur.m_left_index;
- }
- }
+ node_stack.push_back(cur.m_right_index);
+ node_index = cur.m_left_index;
+ }
+ }
bool generate(uint32_t max_size)
{
@@ -2319,6 +2361,14 @@ namespace basisu
m_total_bits = 0;
}
+ inline void restart()
+ {
+ m_bytes.resize(0);
+ m_bit_buffer = 0;
+ m_bit_buffer_size = 0;
+ m_total_bits = 0;
+ }
+
inline const uint8_vec &get_bytes() const { return m_bytes; }
inline uint64_t get_total_bits() const { return m_total_bits; }
@@ -2920,11 +2970,11 @@ namespace basisu
inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
inline color_rgba *get_ptr() { return &m_pixels[0]; }
- bool has_alpha() const
+ bool has_alpha(uint32_t channel = 3) const
{
for (uint32_t y = 0; y < m_height; ++y)
for (uint32_t x = 0; x < m_width; ++x)
- if ((*this)(x, y).a < 255)
+ if ((*this)(x, y)[channel] < 255)
return true;
return false;
@@ -3130,6 +3180,31 @@ namespace basisu
return *this;
}
+ imagef& crop_dup_borders(uint32_t w, uint32_t h)
+ {
+ const uint32_t orig_w = m_width, orig_h = m_height;
+
+ crop(w, h);
+
+ if (orig_w && orig_h)
+ {
+ if (m_width > orig_w)
+ {
+ for (uint32_t x = orig_w; x < m_width; x++)
+ for (uint32_t y = 0; y < m_height; y++)
+ set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
+ }
+
+ if (m_height > orig_h)
+ {
+ for (uint32_t y = orig_h; y < m_height; y++)
+ for (uint32_t x = 0; x < m_width; x++)
+ set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
+ }
+ }
+ return *this;
+ }
+
inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
@@ -3213,19 +3288,128 @@ namespace basisu
inline const vec4F *get_ptr() const { return &m_pixels[0]; }
inline vec4F *get_ptr() { return &m_pixels[0]; }
+
+ bool clean_astc_hdr_pixels(float highest_mag)
+ {
+ bool status = true;
+ bool nan_msg = false;
+ bool inf_msg = false;
+ bool neg_zero_msg = false;
+ bool neg_msg = false;
+ bool clamp_msg = false;
+
+ for (uint32_t iy = 0; iy < m_height; iy++)
+ {
+ for (uint32_t ix = 0; ix < m_width; ix++)
+ {
+ vec4F& c = (*this)(ix, iy);
+
+ for (uint32_t s = 0; s < 4; s++)
+ {
+ float &p = c[s];
+ union { float f; uint32_t u; } x; x.f = p;
+
+ if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000))
+ {
+ if (std::isnan(p))
+ {
+ if (!nan_msg)
+ {
+ fprintf(stderr, "One or more pixels was NaN, setting to 0.\n");
+ nan_msg = true;
+ }
+ }
+
+ if (std::isinf(p))
+ {
+ if (!inf_msg)
+ {
+ fprintf(stderr, "One or more pixels was INF, setting to 0.\n");
+ inf_msg = true;
+ }
+ }
+
+ if (x.u == 0x80000000)
+ {
+ if (!neg_zero_msg)
+ {
+ fprintf(stderr, "One or more pixels was -0, setting them to 0.\n");
+ neg_zero_msg = true;
+ }
+ }
+
+ p = 0.0f;
+ status = false;
+ }
+ else
+ {
+ //const float o = p;
+ if (p < 0.0f)
+ {
+ p = 0.0f;
+
+ if (!neg_msg)
+ {
+ fprintf(stderr, "One or more pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");
+ neg_msg = true;
+ }
+
+ status = false;
+ }
+
+ if (p > highest_mag)
+ {
+ p = highest_mag;
+
+ if (!clamp_msg)
+ {
+ fprintf(stderr, "One or more pixels had to be clamped to %f.\n", highest_mag);
+ clamp_msg = true;
+ }
+
+ status = false;
+ }
+ }
+ }
+ }
+ }
+
+ return status;
+ }
+
+ imagef& flip_y()
+ {
+ for (uint32_t y = 0; y < m_height / 2; ++y)
+ for (uint32_t x = 0; x < m_width; ++x)
+ std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
+
+ return *this;
+ }
private:
uint32_t m_width, m_height, m_pitch; // all in pixels
vec4F_vec m_pixels;
};
+ // REC 709 coefficients
+ const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f;
+
+ inline float get_luminance(const vec4F &c)
+ {
+ return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B;
+ }
+
+ float linear_to_srgb(float l);
+ float srgb_to_linear(float s);
+
// Image metrics
class image_metrics
{
public:
// TODO: Add ssim
- float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
+ double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
+ bool m_has_neg, m_hf_mag_overflow, m_any_abnormal;
image_metrics()
{
@@ -3240,10 +3424,17 @@ namespace basisu
m_rms = 0;
m_psnr = 0;
m_ssim = 0;
+ m_has_neg = false;
+ m_hf_mag_overflow = false;
+ m_any_abnormal = false;
}
- void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }
+ void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }
+ void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); }
+ void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false);
+ void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
+ void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
};
@@ -3256,6 +3447,8 @@ namespace basisu
bool load_tga(const char* pFilename, image& img);
inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }
+ bool load_qoi(const char* pFilename, image& img);
+
bool load_jpg(const char *pFilename, image& img);
inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
@@ -3263,9 +3456,64 @@ namespace basisu
bool load_image(const char* pFilename, image& img);
inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }
+ // Supports .HDR and most (but not all) .EXR's (see TinyEXR).
+ bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true);
+ inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true) { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear); }
+
+ enum class hdr_image_type
+ {
+ cHITRGBAHalfFloat = 0,
+ cHITRGBAFloat = 1,
+ cHITPNGImage = 2,
+ cHITEXRImage = 3,
+ cHITHDRImage = 4
+ };
+
+ bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear);
+
uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
+ struct rgbe_header_info
+ {
+ std::string m_program;
+
+ // Note no validation is done, either gamma or exposure may be 0.
+ double m_gamma;
+ bool m_has_gamma;
+
+ double m_exposure; // watts/steradian/m^2.
+ bool m_has_exposure;
+
+ void clear()
+ {
+ m_program.clear();
+ m_gamma = 1.0f;
+ m_has_gamma = false;
+ m_exposure = 1.0f;
+ m_has_exposure = false;
+ }
+ };
+
+ bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info);
+ bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info);
+
+ bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info);
+ bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info);
+
+ bool read_exr(const char* pFilename, imagef& img, int& n_chans);
+ bool read_exr(const void* pMem, size_t mem_size, imagef& img);
+
+ enum
+ {
+ WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic
+ WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats
+ WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block)
+ };
+
+ // Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images.
+ bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags);
+
enum
{
cImageSaveGrayscale = 1,
@@ -3276,19 +3524,22 @@ namespace basisu
inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
bool read_file_to_vec(const char* pFilename, uint8_vec& data);
-
+ bool read_file_to_data(const char* pFilename, void *pData, size_t len);
+
bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
-
- float linear_to_srgb(float l);
- float srgb_to_linear(float s);
-
+
bool image_resample(const image &src, image &dst, bool srgb = false,
const char *pFilter = "lanczos4", float filter_scale = 1.0f,
bool wrapping = false,
uint32_t first_comp = 0, uint32_t num_comps = 4);
+ bool image_resample(const imagef& src, imagef& dst,
+ const char* pFilter = "lanczos4", float filter_scale = 1.0f,
+ bool wrapping = false,
+ uint32_t first_comp = 0, uint32_t num_comps = 4);
+
// Timing
typedef uint64_t timer_ticks;
@@ -3319,6 +3570,8 @@ namespace basisu
bool m_started, m_stopped;
};
+ inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); }
+
// 2D array
template<typename T>
@@ -3372,8 +3625,8 @@ namespace basisu
inline const T &operator[] (uint32_t i) const { return m_values[i]; }
inline T &operator[] (uint32_t i) { return m_values[i]; }
- inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
- inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
+ inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
+ inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
void clear()
{
@@ -3450,7 +3703,327 @@ namespace basisu
}
};
typedef basisu::vector<pixel_block> pixel_block_vec;
-
+
+ struct pixel_block_hdr
+ {
+ vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
+
+ inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
+ inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
+
+ inline const vec4F* get_ptr() const { return &m_pixels[0][0]; }
+ inline vec4F* get_ptr() { return &m_pixels[0][0]; }
+
+ inline void clear() { clear_obj(*this); }
+
+ inline bool operator== (const pixel_block& rhs) const
+ {
+ return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
+ }
+ };
+ typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec;
+
+ void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure);
+ bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img);
+
+ // Intersection
+ enum eClear { cClear = 0 };
+ enum eInitExpand { cInitExpand = 0 };
+
+ template<typename vector_type>
+ class ray
+ {
+ public:
+ typedef vector_type vector_t;
+ typedef typename vector_type::scalar_type scalar_type;
+
+ inline ray() { }
+ inline ray(eClear) { clear(); }
+ inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { }
+
+ inline void clear()
+ {
+ m_origin.clear();
+ m_direction.clear();
+ }
+
+ inline const vector_type& get_origin(void) const { return m_origin; }
+ inline void set_origin(const vector_type& origin) { m_origin = origin; }
+
+ inline const vector_type& get_direction(void) const { return m_direction; }
+ inline void set_direction(const vector_type& direction) { m_direction = direction; }
+
+ inline void set_endpoints(const vector_type& start, const vector_type& end)
+ {
+ m_origin = start;
+
+ m_direction = end - start;
+ m_direction.normalize_in_place();
+ }
+
+ inline vector_type eval(scalar_type t) const
+ {
+ return m_origin + m_direction * t;
+ }
+
+ private:
+ vector_type m_origin;
+ vector_type m_direction;
+ };
+
+ typedef ray<vec2F> ray2F;
+ typedef ray<vec3F> ray3F;
+
+ template<typename T>
+ class vec_interval
+ {
+ public:
+ enum { N = T::num_elements };
+ typedef typename T::scalar_type scalar_type;
+
+ inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; }
+ inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; }
+
+ inline vec_interval() { }
+ inline vec_interval(eClear) { clear(); }
+ inline vec_interval(eInitExpand) { init_expand(); }
+
+ inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); }
+
+ inline void init_expand()
+ {
+ m_bounds[0].set(1e+30f, 1e+30f, 1e+30f);
+ m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f);
+ }
+
+ inline vec_interval expand(const T& p)
+ {
+ for (uint32_t c = 0; c < N; c++)
+ {
+ if (p[c] < m_bounds[0][c])
+ m_bounds[0][c] = p[c];
+
+ if (p[c] > m_bounds[1][c])
+ m_bounds[1][c] = p[c];
+ }
+
+ return *this;
+ }
+
+ inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; }
+ inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; }
+
+ const T& get_low() const { return m_bounds[0]; }
+ T& get_low() { return m_bounds[0]; }
+
+ const T& get_high() const { return m_bounds[1]; }
+ T& get_high() { return m_bounds[1]; }
+
+ scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; }
+
+ bool contains(const T& p) const
+ {
+ const T& low = get_low(), high = get_high();
+
+ for (uint32_t i = 0; i < N; i++)
+ {
+ if (p[i] < low[i])
+ return false;
+
+ if (p[i] > high[i])
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ T m_bounds[2];
+ };
+
+ typedef vec_interval<vec1F> vec_interval1F;
+ typedef vec_interval<vec2F> vec_interval2F;
+ typedef vec_interval<vec3F> vec_interval3F;
+ typedef vec_interval<vec4F> vec_interval4F;
+
+ typedef vec_interval2F aabb2F;
+ typedef vec_interval3F aabb3F;
+
+ namespace intersection
+ {
+ enum result
+ {
+ cBackfacing = -1,
+ cFailure = 0,
+ cSuccess,
+ cParallel,
+ cInside,
+ };
+
+ // Returns cInside, cSuccess, or cFailure.
+ // Algorithm: Graphics Gems 1
+ template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
+ result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
+ {
+ enum
+ {
+ cNumDim = vector_type::num_elements,
+ cRight = 0,
+ cLeft = 1,
+ cMiddle = 2
+ };
+
+ bool inside = true;
+ int quadrant[cNumDim];
+ scalar_type candidate_plane[cNumDim];
+
+ for (int i = 0; i < cNumDim; i++)
+ {
+ if (ray.get_origin()[i] < box[0][i])
+ {
+ quadrant[i] = cLeft;
+ candidate_plane[i] = box[0][i];
+ inside = false;
+ }
+ else if (ray.get_origin()[i] > box[1][i])
+ {
+ quadrant[i] = cRight;
+ candidate_plane[i] = box[1][i];
+ inside = false;
+ }
+ else
+ {
+ quadrant[i] = cMiddle;
+ }
+ }
+
+ if (inside)
+ {
+ coord = ray.get_origin();
+ t = 0.0f;
+ return cInside;
+ }
+
+ scalar_type max_t[cNumDim];
+ for (int i = 0; i < cNumDim; i++)
+ {
+ if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f))
+ max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i];
+ else
+ max_t[i] = -1.0f;
+ }
+
+ int which_plane = 0;
+ for (int i = 1; i < cNumDim; i++)
+ if (max_t[which_plane] < max_t[i])
+ which_plane = i;
+
+ if (max_t[which_plane] < 0.0f)
+ return cFailure;
+
+ for (int i = 0; i < cNumDim; i++)
+ {
+ if (i != which_plane)
+ {
+ coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i];
+
+ if ((coord[i] < box[0][i]) || (coord[i] > box[1][i]))
+ return cFailure;
+ }
+ else
+ {
+ coord[i] = candidate_plane[i];
+ }
+
+ assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]);
+ }
+
+ t = max_t[which_plane];
+ return cSuccess;
+ }
+
+ template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
+ result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
+ {
+ if (!box.contains(ray.get_origin()))
+ {
+ started_within = false;
+ return ray_aabb(coord, t, ray, box);
+ }
+
+ started_within = true;
+
+ typename vector_type::T diag_dist = box.diagonal_length() * 1.5f;
+ ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction());
+
+ result res(ray_aabb(coord, t, outside_ray, box));
+ if (res != cSuccess)
+ return res;
+
+ t = basisu::maximum(0.0f, diag_dist - t);
+ return cSuccess;
+ }
+
+ } // intersect
+
+ // This float->half conversion matches how "F32TO16" works on Intel GPU's.
+ // Input cannot be negative, Inf or Nan.
+ inline basist::half_float float_to_half_non_neg_no_nan_inf(float val)
+ {
+ union { float f; int32_t i; uint32_t u; } fi = { val };
+ const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;
+ int e = 0, m = 0;
+
+ assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));
+
+ // not zero or denormal
+ if (flt_e != 0)
+ {
+ int new_exp = flt_e - 127;
+ if (new_exp > 15)
+ e = 31;
+ else if (new_exp < -14)
+ m = lrintf((1 << 24) * fabsf(fi.f));
+ else
+ {
+ e = new_exp + 15;
+ m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
+ }
+ }
+
+ assert((0 <= m) && (m <= 1024));
+ if (m == 1024)
+ {
+ e++;
+ m = 0;
+ }
+
+ assert((e >= 0) && (e <= 31));
+ assert((m >= 0) && (m <= 1023));
+
+ basist::half_float result = (basist::half_float)((e << 10) | m);
+ return result;
+ }
+
+ // Supports positive and denormals only. No NaN or Inf.
+ inline float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
+ {
+ assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
+
+ union fu32
+ {
+ uint32_t u;
+ float f;
+ };
+
+ static const fu32 K = { 0x77800000 };
+
+ fu32 o;
+ o.u = h << 13;
+ o.f *= K.f;
+
+ return o.f;
+ }
+
} // namespace basisu
diff --git a/thirdparty/basis_universal/encoder/basisu_etc.cpp b/thirdparty/basis_universal/encoder/basisu_etc.cpp
index f8bd0f12e5..ba1c14231d 100644
--- a/thirdparty/basis_universal/encoder/basisu_etc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_etc.cpp
@@ -1,5 +1,5 @@
// basis_etc.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_etc.h b/thirdparty/basis_universal/encoder/basisu_etc.h
index 208f2aac1b..5c44bd4812 100644
--- a/thirdparty/basis_universal/encoder/basisu_etc.h
+++ b/thirdparty/basis_universal/encoder/basisu_etc.h
@@ -1,5 +1,5 @@
// basis_etc.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_frontend.cpp b/thirdparty/basis_universal/encoder/basisu_frontend.cpp
index 1f30a33c70..750f706aa5 100644
--- a/thirdparty/basis_universal/encoder/basisu_frontend.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_frontend.cpp
@@ -1,5 +1,5 @@
// basisu_frontend.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -2347,6 +2347,7 @@ namespace basisu
continue;
uint64_t overall_best_err = 0;
+ (void)overall_best_err;
uint64_t total_err[4][4][4];
clear_obj(total_err);
diff --git a/thirdparty/basis_universal/encoder/basisu_frontend.h b/thirdparty/basis_universal/encoder/basisu_frontend.h
index cda73f3984..69fc8d8ec5 100644
--- a/thirdparty/basis_universal/encoder/basisu_frontend.h
+++ b/thirdparty/basis_universal/encoder/basisu_frontend.h
@@ -1,5 +1,5 @@
// basisu_frontend.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
index dec769d5ac..342446b8fd 100644
--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
@@ -1,5 +1,5 @@
// basisu_gpu_texture.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -15,13 +15,15 @@
#include "basisu_gpu_texture.h"
#include "basisu_enc.h"
#include "basisu_pvrtc1_4.h"
-#if BASISU_USE_ASTC_DECOMPRESS
-#include "basisu_astc_decomp.h"
-#endif
+#include "3rdparty/android_astc_decomp.h"
#include "basisu_bc7enc.h"
+#include "../transcoder/basisu_astc_hdr_core.h"
namespace basisu
{
+ //------------------------------------------------------------------------------------------------
+ // ETC2 EAC
+
void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
{
static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
@@ -56,6 +58,8 @@ namespace basisu
pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
}
+ //------------------------------------------------------------------------------------------------
+ // BC1
struct bc1_block
{
enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
@@ -274,6 +278,9 @@ namespace basisu
return used_punchthrough;
}
+ //------------------------------------------------------------------------------------------------
+ // BC3-5
+
struct bc4_block
{
enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
@@ -372,7 +379,8 @@ namespace basisu
unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
}
-
+
+ //------------------------------------------------------------------------------------------------
// ATC isn't officially documented, so I'm assuming these references:
// http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
// https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
@@ -426,6 +434,7 @@ namespace basisu
}
}
+ //------------------------------------------------------------------------------------------------
// BC7 mode 0-7 decompression.
// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
@@ -742,6 +751,255 @@ namespace basisu
return false;
}
+ static inline int bc6h_sign_extend(int val, int bits)
+ {
+ assert((bits >= 1) && (bits < 32));
+ assert((val >= 0) && (val < (1 << bits)));
+ return (val << (32 - bits)) >> (32 - bits);
+ }
+
+ static inline int bc6h_apply_delta(int base, int delta, int num_bits, int is_signed)
+ {
+ int bitmask = ((1 << num_bits) - 1);
+ int v = (base + delta) & bitmask;
+ return is_signed ? bc6h_sign_extend(v, num_bits) : v;
+ }
+
+ static int bc6h_dequantize(int val, int bits, int is_signed)
+ {
+ int result;
+ if (is_signed)
+ {
+ if (bits >= 16)
+ result = val;
+ else
+ {
+ int s_flag = 0;
+ if (val < 0)
+ {
+ s_flag = 1;
+ val = -val;
+ }
+
+ if (val == 0)
+ result = 0;
+ else if (val >= ((1 << (bits - 1)) - 1))
+ result = 0x7FFF;
+ else
+ result = ((val << 15) + 0x4000) >> (bits - 1);
+
+ if (s_flag)
+ result = -result;
+ }
+ }
+ else
+ {
+ if (bits >= 15)
+ result = val;
+ else if (!val)
+ result = 0;
+ else if (val == ((1 << bits) - 1))
+ result = 0xFFFF;
+ else
+ result = ((val << 16) + 0x8000) >> bits;
+ }
+ return result;
+ }
+
+ static inline int bc6h_interpolate(int a, int b, const uint8_t* pWeights, int index)
+ {
+ return (a * (64 - (int)pWeights[index]) + b * (int)pWeights[index] + 32) >> 6;
+ }
+
+ static inline basist::half_float bc6h_convert_to_half(int val, int is_signed)
+ {
+ if (!is_signed)
+ {
+ // scale by 31/64
+ return (basist::half_float)((val * 31) >> 6);
+ }
+
+ // scale by 31/32
+ val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5;
+
+ int s = 0;
+ if (val < 0)
+ {
+ s = 0x8000;
+ val = -val;
+ }
+
+ return (basist::half_float)(s | val);
+ }
+
+ static inline uint32_t bc6h_get_bits(uint32_t num_bits, uint64_t& l, uint64_t& h, uint32_t& total_bits)
+ {
+ assert((num_bits) && (num_bits <= 63));
+
+ uint32_t v = (uint32_t)(l & ((1U << num_bits) - 1U));
+
+ l >>= num_bits;
+ l |= (h << (64U - num_bits));
+ h >>= num_bits;
+
+ total_bits += num_bits;
+ assert(total_bits <= 128);
+
+ return v;
+ }
+
+ static inline uint32_t bc6h_reverse_bits(uint32_t v, uint32_t num_bits)
+ {
+ uint32_t res = 0;
+ for (uint32_t i = 0; i < num_bits; i++)
+ {
+ uint32_t bit = (v & (1u << i)) != 0u;
+ res |= (bit << (num_bits - 1u - i));
+ }
+ return res;
+ }
+
+ static inline uint64_t bc6h_read_le_qword(const void* p)
+ {
+ const uint8_t* pSrc = static_cast<const uint8_t*>(p);
+ return ((uint64_t)read_le_dword(pSrc)) | (((uint64_t)read_le_dword(pSrc + sizeof(uint32_t))) << 32U);
+ }
+
+ bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs)
+ {
+ assert(dest_pitch_in_halfs >= 4 * 3);
+
+ const uint32_t MAX_SUBSETS = 2, MAX_COMPS = 3;
+
+ const uint8_t* pSrc = static_cast<const uint8_t*>(pSrc_block);
+ basist::half_float* pDst = static_cast<basist::half_float*>(pDst_block);
+
+ uint64_t blo = bc6h_read_le_qword(pSrc), bhi = bc6h_read_le_qword(pSrc + sizeof(uint64_t));
+
+ // Unpack mode
+ const int mode = basist::g_bc6h_mode_lookup[blo & 31];
+ if (mode < 0)
+ {
+ for (int y = 0; y < 4; y++)
+ {
+ memset(pDst, 0, sizeof(basist::half_float) * 4);
+ pDst += dest_pitch_in_halfs;
+ }
+ return false;
+ }
+
+ // Skip mode bits
+ uint32_t total_bits_read = 0;
+ bc6h_get_bits((mode < 2) ? 2 : 5, blo, bhi, total_bits_read);
+
+ assert(mode < (int)basist::NUM_BC6H_MODES);
+
+ const uint32_t num_subsets = (mode >= 10) ? 1 : 2;
+ const bool is_mode_9_or_10 = (mode == 9) || (mode == 10);
+
+ // Unpack endpoint components
+ int comps[MAX_SUBSETS][MAX_COMPS][2] = { { { 0 } } }; // [subset][comp][l/h]
+ int part_index = 0;
+
+ uint32_t layout_index = 0;
+ while (layout_index < basist::MAX_BC6H_LAYOUT_INDEX)
+ {
+ const basist::bc6h_bit_layout& layout = basist::g_bc6h_bit_layouts[mode][layout_index];
+
+ if (layout.m_comp < 0)
+ break;
+
+ const int subset = layout.m_index >> 1, lh_index = layout.m_index & 1;
+ assert((layout.m_comp == 3) || ((subset >= 0) && (subset < (int)MAX_SUBSETS)));
+
+ const int last_bit = layout.m_last_bit, first_bit = layout.m_first_bit;
+ assert(last_bit >= 0);
+
+ int& res = (layout.m_comp == 3) ? part_index : comps[subset][layout.m_comp][lh_index];
+
+ if (first_bit < 0)
+ {
+ res |= (bc6h_get_bits(1, blo, bhi, total_bits_read) << last_bit);
+ }
+ else
+ {
+ const int total_bits = iabs(last_bit - first_bit) + 1;
+ const int bit_shift = basisu::minimum(first_bit, last_bit);
+
+ int b = bc6h_get_bits(total_bits, blo, bhi, total_bits_read);
+
+ if (last_bit < first_bit)
+ b = bc6h_reverse_bits(b, total_bits);
+
+ res |= (b << bit_shift);
+ }
+
+ layout_index++;
+ }
+ assert(layout_index != basist::MAX_BC6H_LAYOUT_INDEX);
+
+ // Sign extend/dequantize endpoints
+ const int num_sig_bits = basist::g_bc6h_mode_sig_bits[mode][0];
+ if (is_signed)
+ {
+ for (uint32_t comp = 0; comp < 3; comp++)
+ comps[0][comp][0] = bc6h_sign_extend(comps[0][comp][0], num_sig_bits);
+ }
+
+ if (is_signed || !is_mode_9_or_10)
+ {
+ for (uint32_t subset = 0; subset < num_subsets; subset++)
+ for (uint32_t comp = 0; comp < 3; comp++)
+ for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
+ comps[subset][comp][lh] = bc6h_sign_extend(comps[subset][comp][lh], basist::g_bc6h_mode_sig_bits[mode][1 + comp]);
+ }
+
+ if (!is_mode_9_or_10)
+ {
+ for (uint32_t subset = 0; subset < num_subsets; subset++)
+ for (uint32_t comp = 0; comp < 3; comp++)
+ for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
+ comps[subset][comp][lh] = bc6h_apply_delta(comps[0][comp][0], comps[subset][comp][lh], num_sig_bits, is_signed);
+ }
+
+ for (uint32_t subset = 0; subset < num_subsets; subset++)
+ for (uint32_t comp = 0; comp < 3; comp++)
+ for (uint32_t lh = 0; lh < 2; lh++)
+ comps[subset][comp][lh] = bc6h_dequantize(comps[subset][comp][lh], num_sig_bits, is_signed);
+
+ // Now unpack weights and output texels
+ const int weight_bits = (mode >= 10) ? 4 : 3;
+ const uint8_t* pWeights = (mode >= 10) ? basist::g_bc6h_weight4 : basist::g_bc6h_weight3;
+
+ dest_pitch_in_halfs -= 4 * 3;
+
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ int subset = (num_subsets == 1) ? ((x | y) ? 0 : 0x80) : basist::g_bc6h_2subset_patterns[part_index][y][x];
+ const int num_bits = weight_bits + ((subset & 0x80) ? -1 : 0);
+
+ subset &= 1;
+
+ const int weight_index = bc6h_get_bits(num_bits, blo, bhi, total_bits_read);
+
+ pDst[0] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][0][0], comps[subset][0][1], pWeights, weight_index), is_signed);
+ pDst[1] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][1][0], comps[subset][1][1], pWeights, weight_index), is_signed);
+ pDst[2] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][2][0], comps[subset][2][1], pWeights, weight_index), is_signed);
+
+ pDst += 3;
+ }
+
+ pDst += dest_pitch_in_halfs;
+ }
+
+ assert(total_bits_read == 128);
+ return true;
+ }
+ //------------------------------------------------------------------------------------------------
+ // FXT1 (for fun, and because some modern Intel parts support it, and because a subset is like BC1)
+
struct fxt1_block
{
union
@@ -901,6 +1159,9 @@ namespace basisu
return true;
}
+ //------------------------------------------------------------------------------------------------
+ // PVRTC2 (non-interpolated, hard_flag=1 modulation=0 subset only!)
+
struct pvrtc2_block
{
uint8_t m_modulation[4];
@@ -1015,6 +1276,9 @@ namespace basisu
return true;
}
+ //------------------------------------------------------------------------------------------------
+ // ETC2 EAC R11 or RG11
+
struct etc2_eac_r11
{
uint64_t m_base : 8;
@@ -1085,13 +1349,16 @@ namespace basisu
unpack_etc2_eac_r(pBlock, pPixels, c);
}
}
-
+
+ //------------------------------------------------------------------------------------------------
+ // UASTC
+
void unpack_uastc(const void* p, color_rgba* pPixels)
{
basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false);
}
-
- // Unpacks to RGBA, R, RG, or A
+
+ // Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only.
bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
{
switch (fmt)
@@ -1150,14 +1417,24 @@ namespace basisu
unpack_etc2_eac(pBlock, pPixels);
break;
}
- case texture_format::cASTC4x4:
+ case texture_format::cBC6HSigned:
+ case texture_format::cBC6HUnsigned:
+ case texture_format::cASTC_HDR_4x4:
+ case texture_format::cUASTC_HDR_4x4:
+ {
+ // Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data.
+ assert(0);
+ return false;
+ }
+ case texture_format::cASTC_LDR_4x4:
{
-#if BASISU_USE_ASTC_DECOMPRESS
const bool astc_srgb = false;
- basisu_astc::astc::decompress(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
-#else
- memset(pPixels, 255, 16 * sizeof(color_rgba));
-#endif
+ bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
+ assert(status);
+
+ if (!status)
+ return false;
+
break;
}
case texture_format::cATC_RGB:
@@ -1206,6 +1483,66 @@ namespace basisu
return true;
}
+ bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels)
+ {
+ switch (fmt)
+ {
+ case texture_format::cASTC_HDR_4x4:
+ case texture_format::cUASTC_HDR_4x4:
+ {
+#if 1
+ bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4);
+ assert(status);
+ if (!status)
+ return false;
+#else
+ basist::half_float half_block[16][4];
+
+ astc_helpers::log_astc_block log_blk;
+ if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4))
+ return false;
+ if (!astc_helpers::decode_block(log_blk, half_block, 4, 4, astc_helpers::cDecodeModeHDR16))
+ return false;
+
+ for (uint32_t p = 0; p < 16; p++)
+ {
+ pPixels[p][0] = basist::half_to_float(half_block[p][0]);
+ pPixels[p][1] = basist::half_to_float(half_block[p][1]);
+ pPixels[p][2] = basist::half_to_float(half_block[p][2]);
+ pPixels[p][3] = basist::half_to_float(half_block[p][3]);
+ }
+
+ //memset(pPixels, 0, sizeof(vec4F) * 16);
+#endif
+ return true;
+ }
+ case texture_format::cBC6HSigned:
+ case texture_format::cBC6HUnsigned:
+ {
+ basist::half_float half_block[16][3];
+
+ unpack_bc6h(pBlock, half_block, fmt == texture_format::cBC6HSigned);
+
+ for (uint32_t p = 0; p < 16; p++)
+ {
+ pPixels[p][0] = basist::half_to_float(half_block[p][0]);
+ pPixels[p][1] = basist::half_to_float(half_block[p][1]);
+ pPixels[p][2] = basist::half_to_float(half_block[p][2]);
+ pPixels[p][3] = 1.0f;
+ }
+
+ return true;
+ }
+ default:
+ {
+ break;
+ }
+ }
+
+ assert(0);
+ return false;
+ }
+
bool gpu_image::unpack(image& img) const
{
img.resize(get_pixel_width(), get_pixel_height());
@@ -1252,7 +1589,48 @@ namespace basisu
return success;
}
+
+ bool gpu_image::unpack_hdr(imagef& img) const
+ {
+ if ((m_fmt != texture_format::cASTC_HDR_4x4) &&
+ (m_fmt != texture_format::cUASTC_HDR_4x4) &&
+ (m_fmt != texture_format::cBC6HUnsigned) &&
+ (m_fmt != texture_format::cBC6HSigned))
+ {
+ // Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.)
+ assert(0);
+ return false;
+ }
+
+ img.resize(get_pixel_width(), get_pixel_height());
+ img.set_all(vec4F(0.0f));
+
+ if (!img.get_width() || !img.get_height())
+ return true;
+
+ assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
+ vec4F pixels[cMaxBlockSize * cMaxBlockSize];
+ clear_obj(pixels);
+
+ bool success = true;
+
+ for (uint32_t by = 0; by < m_blocks_y; by++)
+ {
+ for (uint32_t bx = 0; bx < m_blocks_x; bx++)
+ {
+ const void* pBlock = get_block_ptr(bx, by);
+
+ if (!unpack_block_hdr(m_fmt, pBlock, pixels))
+ success = false;
+
+ img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
+ } // bx
+ } // by
+
+ return success;
+ }
+ // KTX1 texture file writing
static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
// KTX/GL enums
@@ -1273,6 +1651,8 @@ namespace basisu
KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,
KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,
KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,
+ KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E,
+ KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F,
KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
@@ -1319,6 +1699,7 @@ namespace basisu
uint32_t width = 0, height = 0, total_levels = 0;
basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
+ // Sanity check the input
if (cubemap_flag)
{
if ((gpu_images.size() % 6) != 0)
@@ -1327,7 +1708,7 @@ namespace basisu
return false;
}
}
-
+
for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
{
const gpu_image_vec &levels = gpu_images[array_index];
@@ -1426,6 +1807,18 @@ namespace basisu
base_internal_fmt = KTX_RGBA;
break;
}
+ case texture_format::cBC6HSigned:
+ {
+ internal_fmt = KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
+ base_internal_fmt = KTX_RGBA;
+ break;
+ }
+ case texture_format::cBC6HUnsigned:
+ {
+ internal_fmt = KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;
+ base_internal_fmt = KTX_RGBA;
+ break;
+ }
case texture_format::cBC7:
{
internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
@@ -1443,7 +1836,10 @@ namespace basisu
base_internal_fmt = KTX_RGBA;
break;
}
- case texture_format::cASTC4x4:
+ // We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC.
+ case texture_format::cASTC_LDR_4x4:
+ case texture_format::cASTC_HDR_4x4:
+ case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC
{
internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
base_internal_fmt = KTX_RGBA;
@@ -1496,17 +1892,17 @@ namespace basisu
return false;
}
}
-
+
ktx_header header;
header.clear();
memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
header.m_endianness = KTX_ENDIAN;
-
+
header.m_pixelWidth = width;
header.m_pixelHeight = height;
-
+
header.m_glTypeSize = 1;
-
+
header.m_glInternalFormat = internal_fmt;
header.m_glBaseInternalFormat = base_internal_fmt;
@@ -1517,12 +1913,12 @@ namespace basisu
header.m_numberOfMipmapLevels = total_levels;
header.m_numberOfFaces = cubemap_flag ? 6 : 1;
- append_vector(ktx_data, (uint8_t *)&header, sizeof(header));
+ append_vector(ktx_data, (uint8_t*)&header, sizeof(header));
for (uint32_t level_index = 0; level_index < total_levels; level_index++)
{
uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();
-
+
if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))
{
img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);
@@ -1531,9 +1927,10 @@ namespace basisu
assert(img_size && ((img_size & 3) == 0));
packed_uint<4> packed_img_size(img_size);
- append_vector(ktx_data, (uint8_t *)&packed_img_size, sizeof(packed_img_size));
+ append_vector(ktx_data, (uint8_t*)&packed_img_size, sizeof(packed_img_size));
uint32_t bytes_written = 0;
+ (void)bytes_written;
for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)
{
@@ -1541,11 +1938,11 @@ namespace basisu
{
const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];
- append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes());
-
+ append_vector(ktx_data, (uint8_t*)img.get_ptr(), img.get_size_in_bytes());
+
bytes_written += img.get_size_in_bytes();
}
-
+
} // array_index
} // level_index
@@ -1553,7 +1950,58 @@ namespace basisu
return true;
}
- bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag)
+ bool does_dds_support_format(texture_format fmt)
+ {
+ switch (fmt)
+ {
+ case texture_format::cBC1_NV:
+ case texture_format::cBC1_AMD:
+ case texture_format::cBC1:
+ case texture_format::cBC3:
+ case texture_format::cBC4:
+ case texture_format::cBC5:
+ case texture_format::cBC6HSigned:
+ case texture_format::cBC6HUnsigned:
+ case texture_format::cBC7:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ }
+
+ // Only supports the basic DirectX BC texture formats.
+ // gpu_images array is: [face/layer][mipmap level]
+ // For cubemap arrays, # of face/layers must be a multiple of 6.
+ // Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped
+ // and cubemap, cubemap mipmapped, and cubemap array mipmapped.
+ bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
+ {
+ return false;
+ }
+
+ bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
+ {
+ uint8_vec dds_data;
+
+ if (!write_dds_file(dds_data, gpu_images, cubemap_flag, use_srgb_format))
+ return false;
+
+ if (!write_vec_to_file(pFilename, dds_data))
+ {
+ fprintf(stderr, "write_dds_file: Failed writing DDS file data\n");
+ return false;
+ }
+
+ return true;
+ }
+
+ bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips, basisu::vector<imagef>& hdr_mips)
+ {
+ return false;
+ }
+
+ bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format)
{
std::string extension(string_tolower(string_get_extension(pFilename)));
@@ -1570,8 +2018,8 @@ namespace basisu
}
else if (extension == "dds")
{
- // TODO
- return false;
+ if (!write_dds_file(filedata, g, cubemap_flag, use_srgb_format))
+ return false;
}
else
{
@@ -1583,11 +2031,18 @@ namespace basisu
return basisu::write_vec_to_file(pFilename, filedata);
}
- bool write_compressed_texture_file(const char* pFilename, const gpu_image& g)
+ bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format)
+ {
+ basisu::vector<gpu_image_vec> a;
+ a.push_back(g);
+ return write_compressed_texture_file(pFilename, a, false, use_srgb_format);
+ }
+
+ bool write_compressed_texture_file(const char* pFilename, const gpu_image& g, bool use_srgb_format)
{
basisu::vector<gpu_image_vec> v;
enlarge_vector(v, 1)->push_back(g);
- return write_compressed_texture_file(pFilename, v, false);
+ return write_compressed_texture_file(pFilename, v, false, use_srgb_format);
}
//const uint32_t OUT_FILE_MAGIC = 'TEXC';
@@ -1626,5 +2081,49 @@ namespace basisu
return fclose(pFile) != EOF;
}
+
+ // The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting
+ // its usefulness/relevance.
+ // https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md
+ bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y)
+ {
+ assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0));
+
+ uint8_vec file_data;
+ file_data.push_back(0x13);
+ file_data.push_back(0xAB);
+ file_data.push_back(0xA1);
+ file_data.push_back(0x5C);
+
+ file_data.push_back((uint8_t)block_width);
+ file_data.push_back((uint8_t)block_height);
+ file_data.push_back(1);
+
+ file_data.push_back((uint8_t)dim_x);
+ file_data.push_back((uint8_t)(dim_x >> 8));
+ file_data.push_back((uint8_t)(dim_x >> 16));
+
+ file_data.push_back((uint8_t)dim_y);
+ file_data.push_back((uint8_t)(dim_y >> 8));
+ file_data.push_back((uint8_t)(dim_y >> 16));
+
+ file_data.push_back((uint8_t)1);
+ file_data.push_back((uint8_t)0);
+ file_data.push_back((uint8_t)0);
+
+ const uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width;
+ const uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height;
+
+ const uint32_t total_bytes = num_blocks_x * num_blocks_y * 16;
+
+ const size_t cur_size = file_data.size();
+
+ file_data.resize(cur_size + total_bytes);
+
+ memcpy(&file_data[cur_size], pBlocks, total_bytes);
+
+ return write_vec_to_file(pFilename, file_data);
+ }
+
} // basisu
diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.h b/thirdparty/basis_universal/encoder/basisu_gpu_texture.h
index 619926f5f9..67c2a2bc5e 100644
--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.h
+++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.h
@@ -1,5 +1,5 @@
// basisu_gpu_texture.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -48,6 +48,7 @@ namespace basisu
}
inline texture_format get_format() const { return m_fmt; }
+ inline bool is_hdr() const { return is_hdr_texture_format(m_fmt); }
// Width/height in pixels
inline uint32_t get_pixel_width() const { return m_width; }
@@ -100,9 +101,13 @@ namespace basisu
m_blocks.resize(m_blocks_x * m_blocks_y * m_qwords_per_block);
}
+ // Unpacks LDR textures only.
bool unpack(image& img) const;
+
+ // Unpacks HDR textures only.
+ bool unpack_hdr(imagef& img) const;
- void override_dimensions(uint32_t w, uint32_t h)
+ inline void override_dimensions(uint32_t w, uint32_t h)
{
m_width = w;
m_height = h;
@@ -116,39 +121,50 @@ namespace basisu
typedef basisu::vector<gpu_image> gpu_image_vec;
- // KTX file writing
-
+ // KTX1 file writing
bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag);
-
- bool write_compressed_texture_file(const char *pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag);
- inline bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec &g)
- {
- basisu::vector<gpu_image_vec> a;
- a.push_back(g);
- return write_compressed_texture_file(pFilename, a, false);
- }
+ bool does_dds_support_format(texture_format fmt);
+ bool write_dds_file(uint8_vec& dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format);
+ bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format);
+
+ // Currently reads 2D 32bpp RGBA, 16-bit HALF RGBA, or 32-bit FLOAT RGBA, with or without mipmaps. No tex arrays or cubemaps, yet.
+ bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image>& ldr_mips, basisu::vector<imagef>& hdr_mips);
- bool write_compressed_texture_file(const char *pFilename, const gpu_image &g);
+ // Supports DDS and KTX
+ bool write_compressed_texture_file(const char *pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format);
+ bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format);
+ bool write_compressed_texture_file(const char *pFilename, const gpu_image &g, bool use_srgb_format);
bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi);
// GPU texture block unpacking
+ // For ETC1, use in basisu_etc.h: bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha)
void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels);
bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha);
void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride);
bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels);
void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels);
bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels);
- bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels);
+ bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); // full format
+ bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs = 4 * 3); // full format, outputs HALF values, RGB texels only (not RGBA)
void unpack_atc(const void* pBlock_bits, color_rgba* pPixels);
+ // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.
bool unpack_fxt1(const void* p, color_rgba* pPixels);
+ // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.
bool unpack_pvrtc2(const void* p, color_rgba* pPixels);
void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c);
void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels);
-
+
// unpack_block() is primarily intended to unpack texture data created by the transcoder.
- // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not a complete implementation.
+ // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not yet a complete implementation.
+ // Unpacks LDR texture formats only.
bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels);
-
+
+ // Unpacks HDR texture formats only.
+ bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels);
+
+ bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y);
+
} // namespace basisu
+
diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_declares.h b/thirdparty/basis_universal/encoder/basisu_kernels_declares.h
index b03e2ea6e8..9b85a594ee 100644
--- a/thirdparty/basis_universal/encoder/basisu_kernels_declares.h
+++ b/thirdparty/basis_universal/encoder/basisu_kernels_declares.h
@@ -1,5 +1,5 @@
// basisu_kernels_declares.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_imp.h b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
index dcf1ce069a..123862b1dd 100644
--- a/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
+++ b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
@@ -1,5 +1,5 @@
// basisu_kernels_imp.h - Do not directly include
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp b/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp
index 4f15a5a12b..36a493d7ed 100644
--- a/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp
@@ -1,5 +1,5 @@
// basisu_kernels_sse.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -22,22 +22,6 @@
#include <intrin.h>
#endif
-#if !defined(_MSC_VER)
- #if __AVX__ || __AVX2__ || __AVX512F__
- #error Please check your compiler options
- #endif
-
- #if CPPSPMD_SSE2
- #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__
- #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file
- #endif
- #else
- #if !__SSE4_1__ || !__SSE3__ || !__SSSE3__
- #error Please check your compiler options
- #endif
- #endif
-#endif
-
#include "cppspmd_sse.h"
#include "cppspmd_type_aliases.h"
diff --git a/thirdparty/basis_universal/encoder/basisu_miniz.h b/thirdparty/basis_universal/encoder/basisu_miniz.h
index 18de997232..dab38f9f92 100644
--- a/thirdparty/basis_universal/encoder/basisu_miniz.h
+++ b/thirdparty/basis_universal/encoder/basisu_miniz.h
@@ -3,7 +3,7 @@
Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/
- Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+ Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -1973,7 +1973,7 @@ static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahe
(TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
if (!probe_len)
{
- *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break;
+ *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); break;
}
else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len)
{
@@ -2101,7 +2101,7 @@ static mz_bool tdefl_compress_fast(tdefl_compressor *d)
total_lz_bytes += cur_match_len;
lookahead_pos += cur_match_len;
- dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE);
+ dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE);
cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
MZ_ASSERT(lookahead_size >= cur_match_len);
lookahead_size -= cur_match_len;
@@ -2129,7 +2129,7 @@ static mz_bool tdefl_compress_fast(tdefl_compressor *d)
d->m_huff_count[0][lit]++;
lookahead_pos++;
- dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE);
+ dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE);
cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
lookahead_size--;
@@ -2283,7 +2283,7 @@ static mz_bool tdefl_compress_normal(tdefl_compressor *d)
d->m_lookahead_pos += len_to_move;
MZ_ASSERT(d->m_lookahead_size >= len_to_move);
d->m_lookahead_size -= len_to_move;
- d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE);
+ d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE);
// Check if it's time to flush the current LZ codes to the internal output buffer.
if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) )
diff --git a/thirdparty/basis_universal/encoder/basisu_opencl.cpp b/thirdparty/basis_universal/encoder/basisu_opencl.cpp
index 81e3090a26..e0611c18ee 100644
--- a/thirdparty/basis_universal/encoder/basisu_opencl.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_opencl.cpp
@@ -1,5 +1,5 @@
// basisu_opencl.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_opencl.h b/thirdparty/basis_universal/encoder/basisu_opencl.h
index 4194a08418..2546a18dab 100644
--- a/thirdparty/basis_universal/encoder/basisu_opencl.h
+++ b/thirdparty/basis_universal/encoder/basisu_opencl.h
@@ -1,5 +1,5 @@
// basisu_opencl.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Note: Undefine or set BASISU_SUPPORT_OPENCL to 0 to completely OpenCL support.
//
diff --git a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp
index 596fc197e6..4bf9516f90 100644
--- a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp
@@ -1,5 +1,5 @@
// basisu_pvrtc1_4.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h
index db6985a439..a9fe6b27aa 100644
--- a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h
+++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h
@@ -1,5 +1,5 @@
// basisu_pvrtc1_4.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -231,7 +231,18 @@ namespace basisu
inline void set_to_black()
{
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes());
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
}
inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const
diff --git a/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp
index 597cb3f618..46cd837376 100644
--- a/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp
@@ -1,5 +1,5 @@
// basisu_resampler_filters.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_resampler.cpp b/thirdparty/basis_universal/encoder/basisu_resampler.cpp
index f4cedf0031..a00c63335d 100644
--- a/thirdparty/basis_universal/encoder/basisu_resampler.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_resampler.cpp
@@ -1,5 +1,5 @@
// basisu_resampler.cpp
-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_resampler.h b/thirdparty/basis_universal/encoder/basisu_resampler.h
index dc0978caeb..ac1ef73d7f 100644
--- a/thirdparty/basis_universal/encoder/basisu_resampler.h
+++ b/thirdparty/basis_universal/encoder/basisu_resampler.h
@@ -1,5 +1,5 @@
// basisu_resampler.h
-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_resampler_filters.h b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h
index 0ebb51c334..4d66ac2c70 100644
--- a/thirdparty/basis_universal/encoder/basisu_resampler_filters.h
+++ b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h
@@ -1,5 +1,5 @@
// basisu_resampler_filters.h
-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_ssim.cpp b/thirdparty/basis_universal/encoder/basisu_ssim.cpp
index cceb400b88..608ce937fc 100644
--- a/thirdparty/basis_universal/encoder/basisu_ssim.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_ssim.cpp
@@ -1,5 +1,5 @@
// basisu_ssim.cpp
-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_ssim.h b/thirdparty/basis_universal/encoder/basisu_ssim.h
index 986ca3bbdf..51cd2d78fd 100644
--- a/thirdparty/basis_universal/encoder/basisu_ssim.h
+++ b/thirdparty/basis_universal/encoder/basisu_ssim.h
@@ -1,5 +1,5 @@
// basisu_ssim.h
-// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
index 271bbc6f1d..51f6e979d4 100644
--- a/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
@@ -1,5 +1,5 @@
// basisu_uastc_enc.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -13,11 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "basisu_uastc_enc.h"
-
-#if BASISU_USE_ASTC_DECOMPRESS
-#include "basisu_astc_decomp.h"
-#endif
-
+#include "3rdparty/android_astc_decomp.h"
#include "basisu_gpu_texture.h"
#include "basisu_bc7enc.h"
@@ -384,6 +380,7 @@ namespace basisu
}
uint32_t total_endpoint_bits = 0;
+ (void)total_endpoint_bits;
for (uint32_t i = 0; i < total_tq_values; i++)
{
@@ -428,6 +425,8 @@ namespace basisu
#endif
uint32_t total_weight_bits = 0;
+ (void)total_weight_bits;
+
const uint32_t plane_shift = (total_planes == 2) ? 1 : 0;
for (uint32_t i = 0; i < 16 * total_planes; i++)
{
@@ -3175,6 +3174,7 @@ namespace basisu
const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0);
//const bool etc1_perceptual = true;
+ // TODO: This uses 64KB of stack space!
uastc_encode_results results[MAX_ENCODE_RESULTS];
level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow);
@@ -3567,7 +3567,6 @@ namespace basisu
success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false);
VALIDATE(success);
-#if BASISU_USE_ASTC_DECOMPRESS
// Now round trip to packed ASTC and back, then decode to pixels.
uint32_t astc_data[4];
@@ -3580,7 +3579,7 @@ namespace basisu
}
color_rgba decoded_astc_block[4][4];
- success = basisu_astc::astc::decompress((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4);
+ success = basisu_astc::astc::decompress_ldr((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4);
VALIDATE(success);
for (uint32_t y = 0; y < 4; y++)
@@ -3595,7 +3594,6 @@ namespace basisu
VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a);
}
}
-#endif
}
#endif
@@ -3789,8 +3787,9 @@ namespace basisu
{
uint64_t m_sel;
uint32_t m_ofs;
+ uint32_t m_pad; // avoid implicit padding for selector_bitsequence_hash
selector_bitsequence() { }
- selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs) { }
+ selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs), m_pad(0) { }
bool operator== (const selector_bitsequence& other) const
{
return (m_ofs == other.m_ofs) && (m_sel == other.m_sel);
@@ -3811,7 +3810,7 @@ namespace basisu
{
std::size_t operator()(selector_bitsequence const& s) const noexcept
{
- return static_cast<std::size_t>(hash_hsieh((uint8_t *)&s, sizeof(s)) ^ s.m_sel);
+ return hash_hsieh((const uint8_t*)&s, sizeof(s));
}
};
diff --git a/thirdparty/basis_universal/encoder/basisu_uastc_enc.h b/thirdparty/basis_universal/encoder/basisu_uastc_enc.h
index ba39a558b3..54d39380e6 100644
--- a/thirdparty/basis_universal/encoder/basisu_uastc_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.h
@@ -1,5 +1,5 @@
// basisu_uastc_enc.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/cppspmd_flow.h b/thirdparty/basis_universal/encoder/cppspmd_flow.h
index f6930476aa..93934173c4 100644
--- a/thirdparty/basis_universal/encoder/cppspmd_flow.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_flow.h
@@ -1,7 +1,7 @@
// Do not include this header directly.
// Control flow functionality in common between all the headers.
//
-// Copyright 2020-2021 Binomial LLC
+// Copyright 2020-2024 Binomial LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/cppspmd_math.h b/thirdparty/basis_universal/encoder/cppspmd_math.h
index e7b3202b8e..3032df865f 100644
--- a/thirdparty/basis_universal/encoder/cppspmd_math.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_math.h
@@ -1,6 +1,6 @@
// Do not include this header directly.
//
-// Copyright 2020-2021 Binomial LLC
+// Copyright 2020-2024 Binomial LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -646,7 +646,7 @@ CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x)
{
vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555);
vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333);
- return VUINT_SHIFT_RIGHT(((v1 + VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F) * 0x1010101), 24);
+ return VUINT_SHIFT_RIGHT(((v1 + (VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F)) * 0x1010101), 24);
}
CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b)
diff --git a/thirdparty/basis_universal/encoder/cppspmd_math_declares.h b/thirdparty/basis_universal/encoder/cppspmd_math_declares.h
index cdb6447b62..f76c9b7e38 100644
--- a/thirdparty/basis_universal/encoder/cppspmd_math_declares.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_math_declares.h
@@ -1,7 +1,7 @@
// Do not include this header directly.
// This header defines shared struct spmd_kernel helpers.
//
-// Copyright 2020-2021 Binomial LLC
+// Copyright 2020-2024 Binomial LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/cppspmd_sse.h b/thirdparty/basis_universal/encoder/cppspmd_sse.h
index 4c61bab7b1..79dfa1561a 100644
--- a/thirdparty/basis_universal/encoder/cppspmd_sse.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_sse.h
@@ -450,7 +450,7 @@ struct spmd_kernel
CPPSPMD_FORCE_INLINE explicit operator vint() const;
private:
- vbool& operator=(const vbool&);
+ //vbool& operator=(const vbool&);
};
friend vbool operator!(const vbool& v);
@@ -481,7 +481,7 @@ struct spmd_kernel
CPPSPMD_FORCE_INLINE explicit vfloat(int value) : m_value(_mm_set1_ps((float)value)) { }
private:
- vfloat& operator=(const vfloat&);
+ //vfloat& operator=(const vfloat&);
};
CPPSPMD_FORCE_INLINE vfloat& store(vfloat& dst, const vfloat& src)
@@ -514,7 +514,7 @@ struct spmd_kernel
float* m_pValue;
private:
- float_lref& operator=(const float_lref&);
+ //float_lref& operator=(const float_lref&);
};
CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref& dst, const vfloat& src)
@@ -561,7 +561,7 @@ struct spmd_kernel
float* m_pValue;
private:
- float_vref& operator=(const float_vref&);
+ //float_vref& operator=(const float_vref&);
};
// Varying ref to varying float
@@ -571,7 +571,7 @@ struct spmd_kernel
vfloat* m_pValue;
private:
- vfloat_vref& operator=(const vfloat_vref&);
+ //vfloat_vref& operator=(const vfloat_vref&);
};
// Varying ref to varying int
@@ -581,7 +581,7 @@ struct spmd_kernel
vint* m_pValue;
private:
- vint_vref& operator=(const vint_vref&);
+ //vint_vref& operator=(const vint_vref&);
};
CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src);
@@ -624,7 +624,7 @@ struct spmd_kernel
int* m_pValue;
private:
- int_lref& operator=(const int_lref&);
+ //int_lref& operator=(const int_lref&);
};
CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src)
@@ -663,7 +663,7 @@ struct spmd_kernel
int16_t* m_pValue;
private:
- int16_lref& operator=(const int16_lref&);
+ //int16_lref& operator=(const int16_lref&);
};
CPPSPMD_FORCE_INLINE const int16_lref& store(const int16_lref& dst, const vint& src)
@@ -720,7 +720,7 @@ struct spmd_kernel
const int* m_pValue;
private:
- cint_lref& operator=(const cint_lref&);
+ //cint_lref& operator=(const cint_lref&);
};
CPPSPMD_FORCE_INLINE vint load(const cint_lref& src)
@@ -742,7 +742,7 @@ struct spmd_kernel
int* m_pValue;
private:
- int_vref& operator=(const int_vref&);
+ //int_vref& operator=(const int_vref&);
};
// Varying ref to constant ints
@@ -752,7 +752,7 @@ struct spmd_kernel
const int* m_pValue;
private:
- cint_vref& operator=(const cint_vref&);
+ //cint_vref& operator=(const cint_vref&);
};
// Varying int
@@ -810,7 +810,7 @@ struct spmd_kernel
}
private:
- vint& operator=(const vint&);
+ //vint& operator=(const vint&);
};
// Load/store linear int
@@ -1206,7 +1206,7 @@ struct spmd_kernel
CPPSPMD_FORCE_INLINE vint load_all(const vint_vref& src)
{
// TODO: There's surely a better way
- __m128i k;
+ __m128i k = _mm_setzero_si128();
k = insert_x(k, ((int*)(&src.m_pValue[extract_x(src.m_vindex)]))[0]);
k = insert_y(k, ((int*)(&src.m_pValue[extract_y(src.m_vindex)]))[1]);
@@ -1261,7 +1261,7 @@ struct spmd_kernel
}
private:
- lint& operator=(const lint&);
+ //lint& operator=(const lint&);
};
CPPSPMD_FORCE_INLINE lint& store_all(lint& dst, const lint& src)
diff --git a/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h b/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h
index 0dfb28b88f..2600481239 100644
--- a/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h
+++ b/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h
@@ -1,7 +1,7 @@
// cppspmd_type_aliases.h
// Do not include this file directly
//
-// Copyright 2020-2021 Binomial LLC
+// Copyright 2020-2024 Binomial LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/encoder/pvpngreader.cpp b/thirdparty/basis_universal/encoder/pvpngreader.cpp
index 46639f2796..6b32f66cbe 100644
--- a/thirdparty/basis_universal/encoder/pvpngreader.cpp
+++ b/thirdparty/basis_universal/encoder/pvpngreader.cpp
@@ -163,7 +163,7 @@ public:
{
if ((sizeof(size_t) == sizeof(uint32_t)) && (new_size > 0x7FFFFFFFUL))
return 0;
- m_buf.resize(new_size);
+ m_buf.resize((size_t)new_size);
}
memcpy(&m_buf[(size_t)m_ofs], pBuf, len);
@@ -178,11 +178,11 @@ public:
return 0;
uint64_t max_bytes = minimum<uint64_t>(len, m_buf.size() - m_ofs);
- memcpy(pBuf, &m_buf[(size_t)m_ofs], max_bytes);
+ memcpy(pBuf, &m_buf[(size_t)m_ofs], (size_t)max_bytes);
m_ofs += max_bytes;
- return max_bytes;
+ return (size_t)max_bytes;
}
};
@@ -249,11 +249,11 @@ public:
return 0;
uint64_t max_bytes = minimum<uint64_t>(len, m_buf_size - m_ofs);
- memcpy(pBuf, &m_pBuf[(size_t)m_ofs], max_bytes);
+ memcpy(pBuf, &m_pBuf[(size_t)m_ofs], (size_t)max_bytes);
m_ofs += max_bytes;
- return max_bytes;
+ return (size_t)max_bytes;
}
};
@@ -1626,8 +1626,8 @@ int png_decoder::png_decode_start()
if (m_ihdr.m_ilace_type == 1)
{
- int i;
- uint32_t total_lines, lines_processed;
+ //int i;
+ //uint32_t total_lines, lines_processed;
m_adam7_pass_size_x[0] = adam7_pass_size(m_ihdr.m_width, 0, 8);
m_adam7_pass_size_x[1] = adam7_pass_size(m_ihdr.m_width, 4, 8);
@@ -1651,10 +1651,12 @@ int png_decoder::png_decode_start()
m_pass_y_left = 0;
+#if 0
total_lines = lines_processed = 0;
for (i = 0; i < 7; i++)
total_lines += m_adam7_pass_size_y[i];
+#endif
for (; ; )
{
@@ -1675,7 +1677,7 @@ int png_decoder::png_decode_start()
}
}
- lines_processed++;
+ //lines_processed++;
}
m_adam7_decoded_flag = TRUE;
diff --git a/thirdparty/basis_universal/patches/external-jpgd.patch b/thirdparty/basis_universal/patches/external-jpgd.patch
new file mode 100644
index 0000000000..7a805d00cb
--- /dev/null
+++ b/thirdparty/basis_universal/patches/external-jpgd.patch
@@ -0,0 +1,13 @@
+diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
+index c431ceaf12..e87dd636a2 100644
+--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
++++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
+@@ -409,7 +409,7 @@ namespace basisu
+ bool load_jpg(const char *pFilename, image& img)
+ {
+ int width = 0, height = 0, actual_comps = 0;
+- uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
++ uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagBoxChromaFiltering);
+ if (!pImage_data)
+ return false;
+
diff --git a/thirdparty/basis_universal/patches/external-tinyexr.patch b/thirdparty/basis_universal/patches/external-tinyexr.patch
new file mode 100644
index 0000000000..665af13300
--- /dev/null
+++ b/thirdparty/basis_universal/patches/external-tinyexr.patch
@@ -0,0 +1,23 @@
+diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
+index 6c0ac0ad370..2bf486a0287 100644
+--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
++++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
+@@ -27,7 +27,7 @@
+ #ifndef TINYEXR_USE_ZFP
+ #define TINYEXR_USE_ZFP (1)
+ #endif
+-#include "3rdparty/tinyexr.h"
++#include <tinyexr.h>
+
+ #ifndef MINIZ_HEADER_FILE_ONLY
+ #define MINIZ_HEADER_FILE_ONLY
+@@ -3257,7 +3257,8 @@ namespace basisu
+ float* out_rgba = nullptr;
+ const char* err = nullptr;
+
+- int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans);
++ int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err);
++ n_chans = 4;
+ if (status != 0)
+ {
+ error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?");
diff --git a/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch b/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch
new file mode 100644
index 0000000000..a4d176602d
--- /dev/null
+++ b/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch
@@ -0,0 +1,446 @@
+diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
+index 2bf486a0287..fff98e83014 100644
+--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
++++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
+@@ -37,9 +37,6 @@
+ #endif
+ #include "basisu_miniz.h"
+
+-#define QOI_IMPLEMENTATION
+-#include "3rdparty/qoi.h"
+-
+ #if defined(_WIN32)
+ // For QueryPerformanceCounter/QueryPerformanceFrequency
+ #define WIN32_LEAN_AND_MEAN
+@@ -408,16 +405,7 @@ namespace basisu
+
+ bool load_qoi(const char* pFilename, image& img)
+ {
+- qoi_desc desc;
+- clear_obj(desc);
+-
+- void* p = qoi_read(pFilename, &desc, 4);
+- if (!p)
+- return false;
+-
+- img.grant_ownership(static_cast<color_rgba *>(p), desc.width, desc.height);
+-
+- return true;
++ return false;
+ }
+
+ bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename)
+diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
+index 000869a5337..342446b8fd4 100644
+--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
++++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
+@@ -19,9 +19,6 @@
+ #include "basisu_bc7enc.h"
+ #include "../transcoder/basisu_astc_hdr_core.h"
+
+-#define TINYDDS_IMPLEMENTATION
+-#include "3rdparty/tinydds.h"
+-
+ namespace basisu
+ {
+ //------------------------------------------------------------------------------------------------
+@@ -1979,208 +1976,8 @@ namespace basisu
+ // Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped
+ // and cubemap, cubemap mipmapped, and cubemap array mipmapped.
+ bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
+- {
+- if (!gpu_images.size())
+- {
+- assert(0);
+- return false;
+- }
+-
+- // Sanity check the input
+- uint32_t slices = 1;
+- if (cubemap_flag)
+- {
+- if ((gpu_images.size() % 6) != 0)
+- {
+- assert(0);
+- return false;
+- }
+- slices = gpu_images.size() / 6;
+- }
+- else
+- {
+- slices = gpu_images.size();
+- }
+-
+- uint32_t width = 0, height = 0, total_levels = 0;
+- basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
+-
+- // Sanity check the input for consistent # of dimensions and mip levels
+- for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
+- {
+- const gpu_image_vec& levels = gpu_images[array_index];
+-
+- if (!levels.size())
+- {
+- // Empty mip chain
+- assert(0);
+- return false;
+- }
+-
+- if (!array_index)
+- {
+- width = levels[0].get_pixel_width();
+- height = levels[0].get_pixel_height();
+- total_levels = (uint32_t)levels.size();
+- fmt = levels[0].get_format();
+- }
+- else
+- {
+- if ((width != levels[0].get_pixel_width()) ||
+- (height != levels[0].get_pixel_height()) ||
+- (total_levels != levels.size()))
+- {
+- // All cubemap/texture array faces must be the same dimension
+- assert(0);
+- return false;
+- }
+- }
+-
+- for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
+- {
+- if (level_index)
+- {
+- if ((levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
+- (levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)))
+- {
+- // Malformed mipmap chain
+- assert(0);
+- return false;
+- }
+- }
+-
+- if (fmt != levels[level_index].get_format())
+- {
+- // All input textures must use the same GPU format
+- assert(0);
+- return false;
+- }
+- }
+- }
+-
+- // No mipmap levels
+- if (!total_levels)
+- {
+- assert(0);
+- return false;
+- }
+-
+- // Create the DDS mipmap level data
+- uint8_vec mipmaps[32];
+-
+- // See https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dds-file-layout-for-cubic-environment-maps
+- // DDS cubemap organization is cubemap face 0 followed by all mips, then cubemap face 1 followed by all mips, etc.
+- // Unfortunately tinydds.h's writer doesn't handle this case correctly, so we work around it here.
+- // This also applies with 2D texture arrays, too. RenderDoc and ddsview (DirectXTex) views each type (cubemap array and 2D texture array) correctly.
+- // Also see "Using Texture Arrays in Direct3D 10/11":
+- // https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide
+- for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
+- {
+- const gpu_image_vec& levels = gpu_images[array_index];
+-
+- for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
+- {
+- append_vector(mipmaps[0], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes());
+-
+- } // level_index
+- } // array_index
+-
+-#if 0
+- // This organization, required by tinydds.h's API, is wrong.
+- {
+- for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
+- {
+- const gpu_image_vec& levels = gpu_images[array_index];
+-
+- for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
+- {
+- append_vector(mipmaps[level_index], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes());
+-
+- } // level_index
+- } // array_index
+- }
+-#endif
+-
+- // Write DDS file using tinydds
+- TinyDDS_WriteCallbacks cbs;
+- cbs.error = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); };
+- cbs.alloc = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); };
+- cbs.free = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); };
+- cbs.write = [](void* user, void const* buffer, size_t byteCount) { BASISU_NOTE_UNUSED(user); uint8_vec* pVec = (uint8_vec*)user; append_vector(*pVec, (const uint8_t*)buffer, byteCount); };
+-
+- uint32_t mipmap_sizes[32];
+- const void* mipmap_ptrs[32];
+-
+- clear_obj(mipmap_sizes);
+- clear_obj(mipmap_ptrs);
+-
+- assert(total_levels < 32);
+- for (uint32_t i = 0; i < total_levels; i++)
+- {
+- mipmap_sizes[i] = mipmaps[i].size_in_bytes();
+- mipmap_ptrs[i] = mipmaps[i].get_ptr();
+- }
+-
+- // Select tinydds texture format
+- uint32_t tinydds_fmt = 0;
+-
+- switch (fmt)
+- {
+- case texture_format::cBC1_NV:
+- case texture_format::cBC1_AMD:
+- case texture_format::cBC1:
+- tinydds_fmt = use_srgb_format ? TDDS_BC1_RGBA_SRGB_BLOCK : TDDS_BC1_RGBA_UNORM_BLOCK;
+- break;
+- case texture_format::cBC3:
+- tinydds_fmt = use_srgb_format ? TDDS_BC3_SRGB_BLOCK : TDDS_BC3_UNORM_BLOCK;
+- break;
+- case texture_format::cBC4:
+- tinydds_fmt = TDDS_BC4_UNORM_BLOCK;
+- break;
+- case texture_format::cBC5:
+- tinydds_fmt = TDDS_BC5_UNORM_BLOCK;
+- break;
+- case texture_format::cBC6HSigned:
+- tinydds_fmt = TDDS_BC6H_SFLOAT_BLOCK;
+- break;
+- case texture_format::cBC6HUnsigned:
+- tinydds_fmt = TDDS_BC6H_UFLOAT_BLOCK;
+- break;
+- case texture_format::cBC7:
+- tinydds_fmt = use_srgb_format ? TDDS_BC7_SRGB_BLOCK : TDDS_BC7_UNORM_BLOCK;
+- break;
+- default:
+- {
+- fprintf(stderr, "Warning: Unsupported format in write_dds_file().\n");
+- return false;
+- }
+- }
+-
+- // DirectXTex's DDSView doesn't handle odd sizes textures correctly. RenderDoc loads them fine, however.
+- // Trying to work around this here results in invalid mipmaps.
+- //width = (width + 3) & ~3;
+- //height = (height + 3) & ~3;
+-
+- bool status = TinyDDS_WriteImage(&cbs,
+- &dds_data,
+- width,
+- height,
+- 1,
+- slices,
+- total_levels,
+- (TinyDDS_Format)tinydds_fmt,
+- cubemap_flag,
+- true,
+- mipmap_sizes,
+- mipmap_ptrs);
+-
+- if (!status)
+- {
+- fprintf(stderr, "write_dds_file: Failed creating DDS file\n");
+- return false;
+- }
+-
+- return true;
++ {
++ return false;
+ }
+
+ bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
+@@ -2201,188 +1998,6 @@ namespace basisu
+
+ bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips, basisu::vector<imagef>& hdr_mips)
+ {
+- const uint32_t MAX_IMAGE_DIM = 16384;
+-
+- TinyDDS_Callbacks cbs;
+-
+- cbs.errorFn = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); };
+- cbs.allocFn = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); };
+- cbs.freeFn = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); };
+- cbs.readFn = [](void* user, void* buffer, size_t byteCount) -> size_t { return (size_t)fread(buffer, 1, byteCount, (FILE*)user); };
+-
+-#ifdef _MSC_VER
+- cbs.seekFn = [](void* user, int64_t ofs) -> bool { return _fseeki64((FILE*)user, ofs, SEEK_SET) == 0; };
+- cbs.tellFn = [](void* user) -> int64_t { return _ftelli64((FILE*)user); };
+-#else
+- cbs.seekFn = [](void* user, int64_t ofs) -> bool { return fseek((FILE*)user, (long)ofs, SEEK_SET) == 0; };
+- cbs.tellFn = [](void* user) -> int64_t { return (int64_t)ftell((FILE*)user); };
+-#endif
+-
+- FILE* pFile = fopen_safe(pFilename, "rb");
+- if (!pFile)
+- {
+- error_printf("Can't open .DDS file \"%s\"\n", pFilename);
+- return false;
+- }
+-
+- // These are the formats AMD Compressonator supports in its UI.
+- enum dds_fmt
+- {
+- cRGBA32,
+- cRGBA_HALF,
+- cRGBA_FLOAT
+- };
+-
+- bool status = false;
+- dds_fmt fmt = cRGBA32;
+- uint32_t width = 0, height = 0;
+- bool hdr_flag = false;
+- TinyDDS_Format tfmt = TDDS_UNDEFINED;
+-
+- TinyDDS_ContextHandle ctx = TinyDDS_CreateContext(&cbs, pFile);
+- if (!ctx)
+- goto failure;
+-
+- status = TinyDDS_ReadHeader(ctx);
+- if (!status)
+- {
+- error_printf("Failed parsing DDS header in file \"%s\"\n", pFilename);
+- goto failure;
+- }
+-
+- if ((!TinyDDS_Is2D(ctx)) || (TinyDDS_ArraySlices(ctx) > 1) || (TinyDDS_IsCubemap(ctx)))
+- {
+- error_printf("Unsupported DDS texture type in file \"%s\"\n", pFilename);
+- goto failure;
+- }
+-
+- width = TinyDDS_Width(ctx);
+- height = TinyDDS_Height(ctx);
+-
+- if (!width || !height)
+- {
+- error_printf("DDS texture dimensions invalid in file \"%s\"\n", pFilename);
+- goto failure;
+- }
+-
+- if ((width > MAX_IMAGE_DIM) || (height > MAX_IMAGE_DIM))
+- {
+- error_printf("DDS texture dimensions too large in file \"%s\"\n", pFilename);
+- goto failure;
+- }
+-
+- tfmt = TinyDDS_GetFormat(ctx);
+- switch (tfmt)
+- {
+- case TDDS_R8G8B8A8_SRGB:
+- case TDDS_R8G8B8A8_UNORM:
+- case TDDS_B8G8R8A8_SRGB:
+- case TDDS_B8G8R8A8_UNORM:
+- fmt = cRGBA32;
+- break;
+- case TDDS_R16G16B16A16_SFLOAT:
+- fmt = cRGBA_HALF;
+- hdr_flag = true;
+- break;
+- case TDDS_R32G32B32A32_SFLOAT:
+- fmt = cRGBA_FLOAT;
+- hdr_flag = true;
+- break;
+- default:
+- error_printf("File \"%s\" has an unsupported DDS texture format (only supports RGBA/BGRA 32bpp, RGBA HALF float, or RGBA FLOAT)\n", pFilename);
+- goto failure;
+- }
+-
+- if (hdr_flag)
+- hdr_mips.resize(TinyDDS_NumberOfMipmaps(ctx));
+- else
+- ldr_mips.resize(TinyDDS_NumberOfMipmaps(ctx));
+-
+- for (uint32_t level = 0; level < TinyDDS_NumberOfMipmaps(ctx); level++)
+- {
+- const uint32_t level_width = TinyDDS_MipMapReduce(width, level);
+- const uint32_t level_height = TinyDDS_MipMapReduce(height, level);
+- const uint32_t total_level_texels = level_width * level_height;
+-
+- const void* pImage = TinyDDS_ImageRawData(ctx, level);
+- const uint32_t image_size = TinyDDS_ImageSize(ctx, level);
+-
+- if (fmt == cRGBA32)
+- {
+- ldr_mips[level].resize(level_width, level_height);
+-
+- if ((ldr_mips[level].get_total_pixels() * sizeof(uint32_t) != image_size))
+- {
+- assert(0);
+- goto failure;
+- }
+-
+- memcpy(ldr_mips[level].get_ptr(), pImage, image_size);
+-
+- if ((tfmt == TDDS_B8G8R8A8_SRGB) || (tfmt == TDDS_B8G8R8A8_UNORM))
+- {
+- // Swap R and B components.
+- uint32_t *pTexels = (uint32_t *)ldr_mips[level].get_ptr();
+- for (uint32_t i = 0; i < total_level_texels; i++)
+- {
+- const uint32_t v = pTexels[i];
+- const uint32_t r = (v >> 16) & 0xFF;
+- const uint32_t b = v & 0xFF;
+- pTexels[i] = r | (b << 16) | (v & 0xFF00FF00);
+- }
+- }
+- }
+- else if (fmt == cRGBA_FLOAT)
+- {
+- hdr_mips[level].resize(level_width, level_height);
+-
+- if ((hdr_mips[level].get_total_pixels() * sizeof(float) * 4 != image_size))
+- {
+- assert(0);
+- goto failure;
+- }
+-
+- memcpy(hdr_mips[level].get_ptr(), pImage, image_size);
+- }
+- else if (fmt == cRGBA_HALF)
+- {
+- hdr_mips[level].resize(level_width, level_height);
+-
+- if ((hdr_mips[level].get_total_pixels() * sizeof(basist::half_float) * 4 != image_size))
+- {
+- assert(0);
+- goto failure;
+- }
+-
+- // Unpack half to float.
+- const basist::half_float* pSrc_comps = static_cast<const basist::half_float*>(pImage);
+- vec4F* pDst_texels = hdr_mips[level].get_ptr();
+-
+- for (uint32_t i = 0; i < total_level_texels; i++)
+- {
+- (*pDst_texels)[0] = basist::half_to_float(pSrc_comps[0]);
+- (*pDst_texels)[1] = basist::half_to_float(pSrc_comps[1]);
+- (*pDst_texels)[2] = basist::half_to_float(pSrc_comps[2]);
+- (*pDst_texels)[3] = basist::half_to_float(pSrc_comps[3]);
+-
+- pSrc_comps += 4;
+- pDst_texels++;
+- } // y
+- }
+- } // level
+-
+- TinyDDS_DestroyContext(ctx);
+- fclose(pFile);
+-
+- return true;
+-
+- failure:
+- if (ctx)
+- TinyDDS_DestroyContext(ctx);
+-
+- if (pFile)
+- fclose(pFile);
+-
+ return false;
+ }
+
diff --git a/thirdparty/basis_universal/transcoder/basisu.h b/thirdparty/basis_universal/transcoder/basisu.h
index 1230b59ec6..939ee79e62 100644
--- a/thirdparty/basis_universal/transcoder/basisu.h
+++ b/thirdparty/basis_universal/transcoder/basisu.h
@@ -1,5 +1,5 @@
// basisu.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -117,13 +117,26 @@ namespace basisu
typedef basisu::vector<uint64_t> uint64_vec;
typedef basisu::vector<int> int_vec;
typedef basisu::vector<bool> bool_vec;
+ typedef basisu::vector<float> float_vec;
void enable_debug_printf(bool enabled);
void debug_printf(const char *pFmt, ...);
-
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
+
template <typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); }
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
+
template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; }
template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
@@ -162,10 +175,45 @@ namespace basisu
template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; }
template<typename T> inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; }
+ // Open interval
+ inline bool in_bounds(int v, int l, int h)
+ {
+ return (v >= l) && (v < h);
+ }
+
+ // Closed interval
+ inline bool in_range(int v, int l, int h)
+ {
+ return (v >= l) && (v <= h);
+ }
+
inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; }
template<typename T> inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); }
+ inline uint32_t get_bit(uint32_t src, int ndx)
+ {
+ assert(in_bounds(ndx, 0, 32));
+ return (src >> ndx) & 1;
+ }
+
+ inline bool is_bit_set(uint32_t src, int ndx)
+ {
+ return get_bit(src, ndx) != 0;
+ }
+
+ inline uint32_t get_bits(uint32_t val, int low, int high)
+ {
+ const int num_bits = (high - low) + 1;
+ assert(in_range(num_bits, 1, 32));
+
+ val >>= low;
+ if (num_bits != 32)
+ val &= ((1u << num_bits) - 1);
+
+ return val;
+ }
+
template<typename T, typename R> inline void append_vector(T &vec, const R *pObjs, size_t n)
{
if (n)
@@ -267,6 +315,11 @@ namespace basisu
return true;
}
+ static inline uint32_t read_le_word(const uint8_t* pBytes)
+ {
+ return (pBytes[1] << 8U) | (pBytes[0]);
+ }
+
static inline uint32_t read_le_dword(const uint8_t *pBytes)
{
return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]);
@@ -303,6 +356,10 @@ namespace basisu
return *this;
}
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
inline operator uint32_t() const
{
switch (NumBytes)
@@ -354,6 +411,9 @@ namespace basisu
}
}
}
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
};
enum eZero { cZero };
@@ -402,8 +462,11 @@ namespace basisu
cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block)
cBC4, // DXT5A
cBC5, // 3DC/DXN (two BC4/DXT5A blocks)
+ cBC6HSigned, // HDR
+ cBC6HUnsigned, // HDR
cBC7,
- cASTC4x4, // LDR only
+ cASTC_LDR_4x4, // ASTC 4x4 LDR only
+ cASTC_HDR_4x4, // ASTC 4x4 HDR only (but may use LDR ASTC blocks internally)
cPVRTC1_4_RGB,
cPVRTC1_4_RGBA,
cATC_RGB,
@@ -413,17 +476,22 @@ namespace basisu
cETC2_R11_EAC,
cETC2_RG11_EAC,
cUASTC4x4,
+ cUASTC_HDR_4x4,
cBC1_NV,
cBC1_AMD,
-
+
// Uncompressed/raw pixels
cRGBA32,
cRGB565,
cBGR565,
cRGBA4444,
- cABGR4444
+ cABGR4444,
+ cRGBA_HALF,
+ cRGB_HALF,
+ cRGB_9E5
};
+ // This is bytes per block for GPU formats, or bytes per texel for uncompressed formats.
inline uint32_t get_bytes_per_block(texture_format fmt)
{
switch (fmt)
@@ -443,13 +511,27 @@ namespace basisu
case texture_format::cETC2_R11_EAC:
return 8;
case texture_format::cRGBA32:
- return sizeof(uint32_t) * 16;
+ case texture_format::cRGB_9E5:
+ return sizeof(uint32_t);
+ case texture_format::cRGB_HALF:
+ return sizeof(uint16_t) * 3;
+ case texture_format::cRGBA_HALF:
+ return sizeof(uint16_t) * 4;
+ case texture_format::cRGB565:
+ case texture_format::cBGR565:
+ case texture_format::cRGBA4444:
+ case texture_format::cABGR4444:
+ return sizeof(uint16_t);
+
default:
break;
}
+
+ // Everything else is 16 bytes/block.
return 16;
}
+ // This is qwords per block for GPU formats, or not valid for uncompressed formats.
inline uint32_t get_qwords_per_block(texture_format fmt)
{
return get_bytes_per_block(fmt) >> 3;
@@ -473,6 +555,17 @@ namespace basisu
BASISU_NOTE_UNUSED(fmt);
return 4;
}
+
+ inline bool is_hdr_texture_format(texture_format fmt)
+ {
+ if (fmt == texture_format::cASTC_HDR_4x4)
+ return true;
+ if (fmt == texture_format::cUASTC_HDR_4x4)
+ return true;
+ if ((fmt == texture_format::cBC6HSigned) || (fmt == texture_format::cBC6HUnsigned))
+ return true;
+ return false;
+ }
} // namespace basisu
diff --git a/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
new file mode 100644
index 0000000000..82dcd2bfe1
--- /dev/null
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
@@ -0,0 +1,102 @@
+// File: basisu_astc_hdr_core.h
+#pragma once
+#include "basisu_astc_helpers.h"
+
+namespace basist
+{
+ struct astc_blk
+ {
+ uint8_t m_vals[16];
+ };
+
+ // ASTC_HDR_MAX_VAL is the maximum color component value that can be encoded.
+ // If the input has values higher than this, they need to be linearly scaled so all values are between [0,ASTC_HDR_MAX_VAL], and the linear scaling inverted in the shader.
+ const float ASTC_HDR_MAX_VAL = 65216.0f; // actually MAX_QLOG12_VAL
+
+ // Maximum usable QLOG encodings, and their floating point equivalent values, that don't result in NaN/Inf's.
+ const uint32_t MAX_QLOG7 = 123;
+ //const float MAX_QLOG7_VAL = 55296.0f;
+
+ const uint32_t MAX_QLOG8 = 247;
+ //const float MAX_QLOG8_VAL = 60416.0f;
+
+ const uint32_t MAX_QLOG9 = 495;
+ //const float MAX_QLOG9_VAL = 62976.0f;
+
+ const uint32_t MAX_QLOG10 = 991;
+ //const float MAX_QLOG10_VAL = 64256.0f;
+
+ const uint32_t MAX_QLOG11 = 1983;
+ //const float MAX_QLOG11_VAL = 64896.0f;
+
+ const uint32_t MAX_QLOG12 = 3967;
+ //const float MAX_QLOG12_VAL = 65216.0f;
+
+ const uint32_t MAX_QLOG16 = 63487;
+ const float MAX_QLOG16_VAL = 65504.0f;
+
+ const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;
+
+ // Notes:
+ // qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
+ // However, this is not lossless in the general sense.
+ inline half_float qlog16_to_half_slow(uint32_t qlog16)
+ {
+ assert(qlog16 <= 0xFFFF);
+
+ int C = qlog16;
+
+ int E = (C & 0xF800) >> 11;
+ int M = C & 0x7FF;
+
+ int Mt;
+ if (M < 512)
+ Mt = 3 * M;
+ else if (M >= 1536)
+ Mt = 5 * M - 2048;
+ else
+ Mt = 4 * M - 512;
+
+ int Cf = (E << 10) + (Mt >> 3);
+ return (half_float)Cf;
+ }
+
+ // This is not lossless
+ inline half_float qlog_to_half_slow(uint32_t qlog, uint32_t bits)
+ {
+ assert((bits >= 7U) && (bits <= 16U));
+ assert(qlog < (1U << bits));
+
+ int C = qlog << (16 - bits);
+ return qlog16_to_half_slow(C);
+ }
+
+ void astc_hdr_core_init();
+
+ void decode_mode7_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale);
+
+ bool decode_mode7_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale,
+ uint32_t ise_endpoint_range);
+
+ void decode_mode11_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3]);
+
+ bool decode_mode11_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ uint32_t ise_endpoint_range);
+
+ bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk);
+ bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk);
+
+ bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk);
+ bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk);
+
+} // namespace basist
diff --git a/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
new file mode 100644
index 0000000000..09a234b2ae
--- /dev/null
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
@@ -0,0 +1,3587 @@
+// basisu_astc_helpers.h
+// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.
+#pragma once
+#ifndef BASISU_ASTC_HELPERS_HEADER
+#define BASISU_ASTC_HELPERS_HEADER
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <fenv.h>
+
+namespace astc_helpers
+{
+ const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64]
+ const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
+ const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
+ const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
+
+ static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
+ extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
+
+ // The Color Endpoint Modes (CEM's)
+ enum cems
+ {
+ CEM_LDR_LUM_DIRECT = 0,
+ CEM_LDR_LUM_BASE_PLUS_OFS = 1,
+ CEM_HDR_LUM_LARGE_RANGE = 2,
+ CEM_HDR_LUM_SMALL_RANGE = 3,
+ CEM_LDR_LUM_ALPHA_DIRECT = 4,
+ CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,
+ CEM_LDR_RGB_BASE_SCALE = 6,
+ CEM_HDR_RGB_BASE_SCALE = 7,
+ CEM_LDR_RGB_DIRECT = 8,
+ CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,
+ CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,
+ CEM_HDR_RGB = 11,
+ CEM_LDR_RGBA_DIRECT = 12,
+ CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,
+ CEM_HDR_RGB_LDR_ALPHA = 14,
+ CEM_HDR_RGB_HDR_ALPHA = 15
+ };
+
+ // All Bounded Integer Sequence Coding (BISE or ISE) ranges.
+ // Weights: Ranges [0,11] are valid.
+ // Endpoints: Ranges [4,20] are valid.
+ enum bise_levels
+ {
+ BISE_2_LEVELS = 0,
+ BISE_3_LEVELS = 1,
+ BISE_4_LEVELS = 2,
+ BISE_5_LEVELS = 3,
+ BISE_6_LEVELS = 4,
+ BISE_8_LEVELS = 5,
+ BISE_10_LEVELS = 6,
+ BISE_12_LEVELS = 7,
+ BISE_16_LEVELS = 8,
+ BISE_20_LEVELS = 9,
+ BISE_24_LEVELS = 10,
+ BISE_32_LEVELS = 11,
+ BISE_40_LEVELS = 12,
+ BISE_48_LEVELS = 13,
+ BISE_64_LEVELS = 14,
+ BISE_80_LEVELS = 15,
+ BISE_96_LEVELS = 16,
+ BISE_128_LEVELS = 17,
+ BISE_160_LEVELS = 18,
+ BISE_192_LEVELS = 19,
+ BISE_256_LEVELS = 20
+ };
+
+ const uint32_t TOTAL_ISE_RANGES = 21;
+
+ // Valid endpoint ISE ranges
+ const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4
+ const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20
+ const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;
+
+ // Valid weight ISE ranges
+ const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0
+ const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11
+ const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;
+
+ // The ISE range table.
+ extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)
+
+ // Possible Color Component Select values, used in dual plane mode.
+ // The CCS component will be interpolated using the 2nd weight plane.
+ enum ccs
+ {
+ CCS_GBA_R = 0,
+ CCS_RBA_G = 1,
+ CCS_RGA_B = 2,
+ CCS_RGB_A = 3
+ };
+
+ struct astc_block
+ {
+ uint32_t m_vals[4];
+ };
+
+ const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode
+ const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode
+ const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits)
+ const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block
+
+ struct log_astc_block
+ {
+ bool m_error_flag;
+
+ bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
+ uint16_t m_solid_color[4];
+
+ // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
+ uint32_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block
+
+ bool m_dual_plane;
+
+ uint32_t m_weight_ise_range; // 0-11
+ uint32_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
+
+ uint32_t m_color_component_selector; // 0-3, 0=GBA R, 1=RBA G, 2=RGA B, 3=RGB A, only used in dual plane mode
+
+ uint32_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode)
+ uint32_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1
+
+ uint32_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
+
+ // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc.
+ uint8_t m_weights[MAX_GRID_WEIGHTS];
+
+ // ISE endpoint values
+ // Endpoint order examples:
+ // 1 subset LA : LL0 LH0 AL0 AH0
+ // 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0
+ // 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0
+ // 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1
+ // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1
+ // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1
+ uint8_t m_endpoints[MAX_ENDPOINTS];
+
+ void clear()
+ {
+ memset(this, 0, sizeof(*this));
+ }
+ };
+
+ // Open interval
+ inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+ inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
+
+ inline uint32_t get_bits(uint32_t val, int low, int high)
+ {
+ const int num_bits = (high - low) + 1;
+ assert((num_bits >= 1) && (num_bits <= 32));
+
+ val >>= low;
+ if (num_bits != 32)
+ val &= ((1u << num_bits) - 1);
+
+ return val;
+ }
+
+ // Returns the number of levels in the given ISE range.
+ inline uint32_t get_ise_levels(uint32_t ise_range)
+ {
+ assert(ise_range < TOTAL_ISE_RANGES);
+ return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];
+ }
+
+ inline int get_ise_sequence_bits(int count, int range)
+ {
+ // See 18.22 Data Size Determination
+ int total_bits = g_ise_range_table[range][0] * count;
+ total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
+ total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
+ return total_bits;
+ }
+
+ inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)
+ {
+ assert(w <= MAX_WEIGHT_VALUE);
+ return (l * (64 - w) + h * w + 32) >> 6;
+ }
+
+ void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range);
+
+ // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
+ bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr);
+
+ // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
+ void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a);
+
+ // Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
+ void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah);
+
+ // These helpers are all quite slow, but are useful for table preparation.
+
+ // Dequantizes ISE encoded endpoint val to [0,255]
+ uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11
+
+ // Dequantizes ISE encoded weight val to [0,64]
+ uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10
+
+ uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);
+ uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);
+
+ void create_quant_tables(
+ uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
+ uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
+ uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
+ uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
+ uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
+ bool weight_flag); // false if block endpoints, true if weights
+
+ // True if the CEM is LDR.
+ bool is_cem_ldr(uint32_t mode);
+ inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }
+
+ // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).
+ bool is_valid_block_size(uint32_t w, uint32_t h);
+
+ bool block_has_any_hdr_cems(const log_astc_block& log_blk);
+ bool block_has_any_ldr_cems(const log_astc_block& log_blk);
+
+ // Returns the # of endpoint values for the given CEM.
+ inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }
+
+ struct dequant_table
+ {
+ basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
+ basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
+ basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels]
+ basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
+
+ void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs)
+ {
+ m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256);
+ m_ISE_to_val.resize(num_levels);
+ if (init_rank_tabs)
+ {
+ m_ISE_to_rank.resize(num_levels);
+ m_rank_to_ISE.resize(num_levels);
+ }
+ }
+ };
+
+ struct dequant_tables
+ {
+ dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];
+ dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];
+
+ const dequant_table& get_weight_tab(uint32_t range) const
+ {
+ assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
+ return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
+ }
+
+ dequant_table& get_weight_tab(uint32_t range)
+ {
+ assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
+ return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
+ }
+
+ const dequant_table& get_endpoint_tab(uint32_t range) const
+ {
+ assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
+ return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
+ }
+
+ dequant_table& get_endpoint_tab(uint32_t range)
+ {
+ assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
+ return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
+ }
+
+ void init(bool init_rank_tabs)
+ {
+ for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)
+ {
+ const uint32_t num_levels = get_ise_levels(range);
+ dequant_table& tab = get_weight_tab(range);
+
+ tab.init(true, num_levels, init_rank_tabs);
+
+ create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true);
+ }
+
+ for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)
+ {
+ const uint32_t num_levels = get_ise_levels(range);
+ dequant_table& tab = get_endpoint_tab(range);
+
+ tab.init(false, num_levels, init_rank_tabs);
+
+ create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false);
+ }
+ }
+ };
+
+ extern dequant_tables g_dequant_tables;
+ void init_tables(bool init_rank_tabs);
+
+ // Procedurally returns the texel partition/subset index given the block coordinate and config.
+ int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
+
+ void blue_contract(
+ int r, int g, int b, int a,
+ int& dr, int& dg, int& db, int& da);
+
+ void bit_transfer_signed(int& a, int& b);
+
+ void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);
+
+ typedef uint16_t half_float;
+ half_float float_to_half(float val, bool toward_zero);
+ float half_to_float(half_float hval);
+
+ const int MAX_RGB9E5 = 0xff80;
+ void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
+ uint32_t pack_rgb9e5(float r, float g, float b);
+
+ enum decode_mode
+ {
+ cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks
+ cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks
+ cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks
+ cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
+ };
+
+ // Decodes logical block to output pixels.
+ // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)
+ bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);
+
+ void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);
+
+ // Unpack a physical ASTC encoded GPU texture block to a logical block description.
+ bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);
+
+} // namespace astc_helpers
+
+#endif // BASISU_ASTC_HELPERS_HEADER
+
+//------------------------------------------------------------------
+
+#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION
+
+namespace astc_helpers
+{
+ template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }
+ template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }
+
+ const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = {
+ { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 },
+ { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 },
+ { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 },
+ { 12, 10 }, { 12, 12 }
+ };
+
+ const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =
+ {
+ //b t q
+ //2 3 5 // rng ise_index notes
+ { 1, 0, 0 }, // 0..1 0
+ { 0, 1, 0 }, // 0..2 1
+ { 2, 0, 0 }, // 0..3 2
+ { 0, 0, 1 }, // 0..4 3
+ { 1, 1, 0 }, // 0..5 4 min endpoint ISE index
+ { 3, 0, 0 }, // 0..7 5
+ { 1, 0, 1 }, // 0..9 6
+ { 2, 1, 0 }, // 0..11 7
+ { 4, 0, 0 }, // 0..15 8
+ { 2, 0, 1 }, // 0..19 9
+ { 3, 1, 0 }, // 0..23 10
+ { 5, 0, 0 }, // 0..31 11 max weight ISE index
+ { 3, 0, 1 }, // 0..39 12
+ { 4, 1, 0 }, // 0..47 13
+ { 6, 0, 0 }, // 0..63 14
+ { 4, 0, 1 }, // 0..79 15
+ { 5, 1, 0 }, // 0..95 16
+ { 7, 0, 0 }, // 0..127 17
+ { 5, 0, 1 }, // 0..159 18
+ { 6, 1, 0 }, // 0..191 19
+ { 8, 0, 0 }, // 0..255 20
+ };
+
+ static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)
+ {
+ uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
+
+ assert(codesize <= 9);
+ if (codesize)
+ {
+ uint32_t byte_bit_offset = bit_offset & 7;
+ uint32_t val = code << byte_bit_offset;
+
+ uint32_t index = bit_offset >> 3;
+ pBuf[index] |= (uint8_t)val;
+
+ if (codesize > (8 - byte_bit_offset))
+ pBuf[index + 1] |= (uint8_t)(val >> 8);
+
+ bit_offset += codesize;
+ }
+ }
+
+ static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
+ {
+ return (bits >> low) & ((1 << (high - low + 1)) - 1);
+ }
+
+ // Writes bits to output in an endian safe way
+ static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)
+ {
+ assert(total_bits <= 31);
+ assert(value < (1u << total_bits));
+
+ uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
+
+ while (total_bits)
+ {
+ const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));
+
+ pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
+
+ bit_pos += bits_to_write;
+ total_bits -= bits_to_write;
+ value >>= bits_to_write;
+ }
+ }
+
+ static const uint8_t g_astc_quint_encode[125] =
+ {
+ 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
+ 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
+ 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
+ 126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/
+ };
+
+ // Encodes 3 values to output, usable for any range that uses quints and bits
+ static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
+ {
+ // First extract the quints and the bits from the 3 input values
+ int quints = 0, bits[3];
+ const uint32_t bit_mask = (1 << n) - 1;
+ for (int i = 0; i < 3; i++)
+ {
+ static const int s_muls[3] = { 1, 5, 25 };
+
+ const int t = pValues[i] >> n;
+
+ quints += t * s_muls[i];
+ bits[i] = pValues[i] & bit_mask;
+ }
+
+ // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
+ // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
+
+ assert(quints < 125);
+ const int T = g_astc_quint_encode[quints];
+
+ // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
+ astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
+ (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
+ }
+
+ static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
+ 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
+ 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
+ 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
+ 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
+ 191, 223, 124, 125, 126 };
+
+ // Encodes 5 values to output, usable for any range that uses trits and bits
+ static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
+ {
+ // First extract the trits and the bits from the 5 input values
+ int trits = 0, bits[5];
+ const uint32_t bit_mask = (1 << n) - 1;
+ for (int i = 0; i < 5; i++)
+ {
+ static const int s_muls[5] = { 1, 3, 9, 27, 81 };
+
+ const int t = pValues[i] >> n;
+
+ trits += t * s_muls[i];
+ bits[i] = pValues[i] & bit_mask;
+ }
+
+ // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
+ // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
+
+ assert(trits < 243);
+ const int T = g_astc_trit_encode[trits];
+
+ // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
+ astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
+
+ astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
+ (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
+ }
+
+ // Packs values using ASTC's BISE to output buffer.
+ void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range)
+ {
+ uint32_t temp[5] = { 0 };
+
+ const int num_bits = g_ise_range_table[range][0];
+
+ int group_size = 0;
+ if (g_ise_range_table[range][1])
+ group_size = 5;
+ else if (g_ise_range_table[range][2])
+ group_size = 3;
+
+#ifndef NDEBUG
+ const uint32_t num_levels = get_ise_levels(range);
+ for (int i = 0; i < num_vals; i++)
+ {
+ assert(pSrc_vals[i] < num_levels);
+ }
+#endif
+
+ if (group_size)
+ {
+ // Range has trits or quints - pack each group of 5 or 3 values
+ const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
+
+ for (int group_index = 0; group_index < total_groups; group_index++)
+ {
+ uint8_t vals[5] = { 0 };
+
+ const int limit = my_min(group_size, num_vals - group_index * group_size);
+ for (int i = 0; i < limit; i++)
+ vals[i] = pSrc_vals[group_index * group_size + i];
+
+ if (group_size == 5)
+ astc_encode_trits(temp, vals, bit_pos, num_bits);
+ else
+ astc_encode_quints(temp, vals, bit_pos, num_bits);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < num_vals; i++)
+ astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
+ }
+
+ // TODO: Could this write too many bits on incomplete blocks?
+ pDst[0] |= temp[0]; pDst[1] |= temp[1];
+ pDst[2] |= temp[2]; pDst[3] |= temp[3];
+ }
+
+ inline uint32_t rev_dword(uint32_t bits)
+ {
+ uint32_t v = (bits << 16) | (bits >> 16);
+ v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);
+ v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);
+ return v;
+ }
+
+ static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }
+
+ static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)
+ {
+ config_bits = 0;
+
+ const int W = log_block.m_grid_width, H = log_block.m_grid_height;
+
+ const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision
+ const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits
+
+ // See Tables 81-82
+ // Compute p from weight range
+ uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);
+
+ // Rearrange p's bits to p0 p2 p1
+ p = (p >> 1) + ((p & 1) << 2);
+
+ // Try encoding each row of table 82.
+
+ // W+4 H+2
+ if (is_packable(W - 4, 2) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);
+ return true;
+ }
+
+ // W+8 H+2
+ if (is_packable(W - 8, 2) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);
+ return true;
+ }
+
+ // W+2 H+8
+ if (is_packable(W - 2, 2) && is_packable(H - 8, 2))
+ {
+ config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);
+ return true;
+ }
+
+ // W+2 H+6
+ if (is_packable(W - 2, 2) && is_packable(H - 6, 1))
+ {
+ config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
+ return true;
+ }
+
+ // W+2 H+2
+ if (is_packable(W - 2, 1) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
+ return true;
+ }
+
+ // 12 H+2
+ if ((W == 12) && is_packable(H - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);
+ return true;
+ }
+
+ // W+2 12
+ if ((H == 12) && is_packable(W - 2, 2))
+ {
+ config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);
+ return true;
+ }
+
+ // 6 10
+ if ((W == 6) && (H == 10))
+ {
+ config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);
+ return true;
+ }
+
+ // 10 6
+ if ((W == 10) && (H == 6))
+ {
+ config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);
+ return true;
+ }
+
+ // W+6 H+6 (no dual plane or high prec)
+ if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))
+ {
+ config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);
+ return true;
+ }
+
+ // Failed: unsupported weight grid dimensions or config.
+ return false;
+ }
+
+ bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range)
+ {
+ memset(&phys_block, 0, sizeof(phys_block));
+
+ if (pExpected_endpoint_range)
+ *pExpected_endpoint_range = -1;
+
+ assert(!log_block.m_error_flag);
+ if (log_block.m_error_flag)
+ return false;
+
+ if (log_block.m_solid_color_flag_ldr)
+ {
+ pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
+ return true;
+ }
+ else if (log_block.m_solid_color_flag_hdr)
+ {
+ pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
+ return true;
+ }
+
+ if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))
+ return false;
+
+ // Max usable weight range is 11
+ if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)
+ return false;
+
+ // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
+ if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
+ return false;
+
+ if (log_block.m_color_component_selector > 3)
+ return false;
+
+ uint32_t config_bits = 0;
+ if (!get_config_bits(log_block, config_bits))
+ return false;
+
+ uint32_t bit_pos = 0;
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
+
+ const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
+ const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
+
+ // 18.24 Illegal Encodings
+ if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
+ return false;
+
+ uint32_t total_extra_bits = 0;
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
+
+ if (log_block.m_num_partitions > 1)
+ {
+ if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)
+ return false;
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
+
+ uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
+ for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
+ {
+ highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]);
+ lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]);
+ }
+
+ if (highest_cem > 15)
+ return false;
+
+ // Ensure CEM range is contiguous
+ if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))
+ return false;
+
+ // See tables 79/80
+ uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;
+ if (lowest_cem != highest_cem)
+ {
+ encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));
+
+ // See tables at 23.11 Color Endpoint Mode
+ for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
+ {
+ const int M = log_block.m_color_endpoint_modes[j] & 3;
+
+ const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);
+ if ((C & 1) != C)
+ return false;
+
+ encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));
+ }
+
+ total_extra_bits = 3 * log_block.m_num_partitions - 4;
+
+ if ((total_weight_bits + total_extra_bits) > 128)
+ return false;
+
+ uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
+ astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
+ }
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
+ }
+ else
+ {
+ if (log_block.m_partition_id)
+ return false;
+ if (log_block.m_color_endpoint_modes[0] > 15)
+ return false;
+
+ astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
+ }
+
+ if (log_block.m_dual_plane)
+ {
+ if (log_block.m_num_partitions > 3)
+ return false;
+
+ total_extra_bits += 2;
+
+ uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
+ astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
+ }
+
+ const uint32_t total_config_bits = bit_pos + total_extra_bits;
+ const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
+ if (num_remaining_bits < 0)
+ return false;
+
+ uint32_t total_cem_vals = 0;
+ for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
+ total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);
+
+ if (total_cem_vals > MAX_ENDPOINTS)
+ return false;
+
+ int endpoint_ise_range = -1;
+ for (int k = 20; k > 0; k--)
+ {
+ int bits = get_ise_sequence_bits(total_cem_vals, k);
+ if (bits <= num_remaining_bits)
+ {
+ endpoint_ise_range = k;
+ break;
+ }
+ }
+
+ // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
+ if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
+ return false;
+
+ // Ensure the caller utilized the right endpoint ISE range.
+ if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)
+ {
+ if (pExpected_endpoint_range)
+ *pExpected_endpoint_range = endpoint_ise_range;
+ return false;
+ }
+
+ // Pack endpoints forwards
+ encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
+
+ // Pack weights backwards
+ uint32_t weight_data[4] = { 0 };
+ encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);
+
+ for (uint32_t i = 0; i < 4; i++)
+ phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);
+
+ return true;
+ }
+
+ static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)
+ {
+ assert(num_src_bits <= num_dst_bits);
+ assert((src & ((1 << num_src_bits) - 1)) == src);
+
+ uint32_t dst = 0;
+ for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)
+ dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
+
+ return dst;
+ }
+
+ uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)
+ {
+ assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));
+ assert(val < get_ise_levels(ise_range));
+
+ uint32_t u = 0;
+
+ switch (ise_range)
+ {
+ case 5:
+ {
+ u = bit_replication_scale(val, 3, 8);
+ break;
+ }
+ case 8:
+ {
+ u = bit_replication_scale(val, 4, 8);
+ break;
+ }
+ case 11:
+ {
+ u = bit_replication_scale(val, 5, 8);
+ break;
+ }
+ case 14:
+ {
+ u = bit_replication_scale(val, 6, 8);
+ break;
+ }
+ case 17:
+ {
+ u = bit_replication_scale(val, 7, 8);
+ break;
+ }
+ case 20:
+ {
+ u = val;
+ break;
+ }
+ case 4:
+ case 6:
+ case 7:
+ case 9:
+ case 10:
+ case 12:
+ case 13:
+ case 15:
+ case 16:
+ case 18:
+ case 19:
+ {
+ const uint32_t num_bits = g_ise_range_table[ise_range][0];
+ const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
+ const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
+
+ // compute Table 103 row index
+ const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;
+
+ assert(range_index >= 0 && range_index <= 10);
+
+ uint32_t bits = val & ((1 << num_bits) - 1);
+ uint32_t tval = val >> num_bits;
+
+ assert(tval < (num_trits ? 3U : 5U));
+
+ uint32_t a = bits & 1;
+ uint32_t b = (bits >> 1) & 1;
+ uint32_t c = (bits >> 2) & 1;
+ uint32_t d = (bits >> 3) & 1;
+ uint32_t e = (bits >> 4) & 1;
+ uint32_t f = (bits >> 5) & 1;
+
+ uint32_t A = a ? 511 : 0;
+ uint32_t B = 0;
+
+ switch (range_index)
+ {
+ case 2:
+ {
+ // 876543210
+ // b000b0bb0
+ B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
+ break;
+ }
+ case 3:
+ {
+ // 876543210
+ // b0000bb00
+ B = (b << 2) | (b << 3) | (b << 8);
+ break;
+ }
+ case 4:
+ {
+ // 876543210
+ // cb000cbcb
+ B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);
+ break;
+ }
+ case 5:
+ {
+ // 876543210
+ // cb0000cbc
+ B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);
+ break;
+ }
+ case 6:
+ {
+ // 876543210
+ // dcb000dcb
+ B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);
+ break;
+ }
+ case 7:
+ {
+ // 876543210
+ // dcb0000dc
+ B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);
+ break;
+ }
+ case 8:
+ {
+ // 876543210
+ // edcb000ed
+ B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);
+ break;
+ }
+ case 9:
+ {
+ // 876543210
+ // edcb0000e
+ B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);
+ break;
+ }
+ case 10:
+ {
+ // 876543210
+ // fedcb000f
+ B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);
+ break;
+ }
+ default:
+ break;
+ }
+
+ static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
+ uint32_t C = C_vals[range_index];
+ uint32_t D = tval;
+
+ u = D * C + B;
+ u = u ^ A;
+ u = (A & 0x80) | (u >> 2);
+
+ break;
+ }
+ default:
+ {
+ assert(0);
+ break;
+ }
+ }
+
+ return u;
+ }
+
+ uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)
+ {
+ assert(val < get_ise_levels(ise_range));
+
+ uint32_t u = 0;
+ switch (ise_range)
+ {
+ case 0:
+ {
+ u = val ? 63 : 0;
+ break;
+ }
+ case 1: // 0-2
+ {
+ const uint8_t s_tab_0_2[3] = { 0, 32, 63 };
+ u = s_tab_0_2[val];
+ break;
+ }
+ case 2: // 0-3
+ {
+ u = bit_replication_scale(val, 2, 6);
+ break;
+ }
+ case 3: // 0-4
+ {
+ const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };
+ u = s_tab_0_4[val];
+ break;
+ }
+ case 5: // 0-7
+ {
+ u = bit_replication_scale(val, 3, 6);
+ break;
+ }
+ case 8: // 0-15
+ {
+ u = bit_replication_scale(val, 4, 6);
+ break;
+ }
+ case 11: // 0-31
+ {
+ u = bit_replication_scale(val, 5, 6);
+ break;
+ }
+ case 4: // 0-5
+ case 6: // 0-9
+ case 7: // 0-11
+ case 9: // 0-19
+ case 10: // 0-23
+ {
+ const uint32_t num_bits = g_ise_range_table[ise_range][0];
+ const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
+ const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
+
+ // compute Table 103 row index
+ const int range_index = num_bits * 2 + (num_quints ? 1 : 0);
+
+ // Extract bits and tris/quints from value
+ const uint32_t bits = val & ((1u << num_bits) - 1);
+ const uint32_t D = val >> num_bits;
+
+ assert(D < (num_trits ? 3U : 5U));
+
+ // Now dequantize
+ // See Table 103. ASTC weight unquantization parameters
+ static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };
+
+ const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;
+
+ const uint32_t A = (a == 0) ? 0 : 0x7F;
+
+ uint32_t B = 0;
+ if (range_index == 4)
+ B = ((b << 6) | (b << 2) | (b << 0));
+ else if (range_index == 5)
+ B = ((b << 6) | (b << 1));
+ else if (range_index == 6)
+ B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));
+
+ const uint32_t C = C_table[range_index - 2];
+
+ u = D * C + B;
+ u = u ^ A;
+ u = (A & 0x20) | (u >> 2);
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+
+ if (u > 32)
+ u++;
+
+ return u;
+ }
+
+ // Returns the nearest ISE symbol given a [0,255] endpoint value.
+ uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)
+ {
+ assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);
+
+ const uint32_t total_levels = get_ise_levels(ise_range);
+ int best_e = INT_MAX, best_index = 0;
+ for (uint32_t i = 0; i < total_levels; i++)
+ {
+ const int qv = dequant_bise_endpoint(i, ise_range);
+ int e = labs(v - qv);
+ if (e < best_e)
+ {
+ best_e = e;
+ best_index = i;
+ if (!best_e)
+ break;
+ }
+ }
+ return best_index;
+ }
+
+ // Returns the nearest ISE weight given a [0,64] endpoint value.
+ uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)
+ {
+ assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
+ assert(v <= (int)MAX_WEIGHT_VALUE);
+
+ const uint32_t total_levels = get_ise_levels(ise_range);
+ int best_e = INT_MAX, best_index = 0;
+ for (uint32_t i = 0; i < total_levels; i++)
+ {
+ const int qv = dequant_bise_weight(i, ise_range);
+ int e = labs(v - qv);
+ if (e < best_e)
+ {
+ best_e = e;
+ best_index = i;
+ if (!best_e)
+ break;
+ }
+ }
+ return best_index;
+ }
+
+ void create_quant_tables(
+ uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
+ uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
+ uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
+ uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
+ uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
+ bool weight_flag) // false if block endpoints, true if weights
+ {
+ const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256;
+
+ for (uint32_t i = 0; i < num_dequant_vals; i++)
+ {
+ uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);
+
+ if (pVal_to_ise)
+ pVal_to_ise[i] = (uint8_t)bise_index;
+
+ if (pISE_to_val)
+ pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);
+ }
+
+ if (pISE_to_rank || pRank_to_ISE)
+ {
+ const uint32_t num_levels = get_ise_levels(ise_range);
+
+ if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])
+ {
+ // Only bits
+ for (uint32_t i = 0; i < num_levels; i++)
+ {
+ if (pISE_to_rank)
+ pISE_to_rank[i] = (uint8_t)i;
+
+ if (pRank_to_ISE)
+ pRank_to_ISE[i] = (uint8_t)i;
+ }
+ }
+ else
+ {
+ // Range has trits or quints
+ uint32_t vals[256];
+ for (uint32_t i = 0; i < num_levels; i++)
+ {
+ uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);
+
+ // Low=ISE value
+ // High=dequantized value
+ vals[i] = (v << 16) | i;
+ }
+
+ // Sorts by dequantized value
+ std::sort(vals, vals + num_levels);
+
+ for (uint32_t rank = 0; rank < num_levels; rank++)
+ {
+ uint32_t ise_val = (uint8_t)vals[rank];
+
+ if (pISE_to_rank)
+ pISE_to_rank[ise_val] = (uint8_t)rank;
+
+ if (pRank_to_ISE)
+ pRank_to_ISE[rank] = (uint8_t)ise_val;
+ }
+ }
+ }
+ }
+
+ void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
+ {
+ uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
+ memset(pDst, 0xFF, 16);
+
+ pDst[0] = 0b11111100;
+ pDst[1] = 0b11111101;
+
+ pDst[8] = (uint8_t)rh;
+ pDst[9] = (uint8_t)(rh >> 8);
+ pDst[10] = (uint8_t)gh;
+ pDst[11] = (uint8_t)(gh >> 8);
+ pDst[12] = (uint8_t)bh;
+ pDst[13] = (uint8_t)(bh >> 8);
+ pDst[14] = (uint8_t)ah;
+ pDst[15] = (uint8_t)(ah >> 8);
+ }
+
+ // rh-ah are half-floats
+ void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
+ {
+ uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
+ memset(pDst, 0xFF, 16);
+
+ pDst[0] = 0b11111100;
+
+ pDst[8] = (uint8_t)rh;
+ pDst[9] = (uint8_t)(rh >> 8);
+ pDst[10] = (uint8_t)gh;
+ pDst[11] = (uint8_t)(gh >> 8);
+ pDst[12] = (uint8_t)bh;
+ pDst[13] = (uint8_t)(bh >> 8);
+ pDst[14] = (uint8_t)ah;
+ pDst[15] = (uint8_t)(ah >> 8);
+ }
+
+ bool is_cem_ldr(uint32_t mode)
+ {
+ switch (mode)
+ {
+ case CEM_LDR_LUM_DIRECT:
+ case CEM_LDR_LUM_BASE_PLUS_OFS:
+ case CEM_LDR_LUM_ALPHA_DIRECT:
+ case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
+ case CEM_LDR_RGB_BASE_SCALE:
+ case CEM_LDR_RGB_DIRECT:
+ case CEM_LDR_RGB_BASE_PLUS_OFFSET:
+ case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
+ case CEM_LDR_RGBA_DIRECT:
+ case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ bool is_valid_block_size(uint32_t w, uint32_t h)
+ {
+ assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM));
+ assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM));
+
+#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;
+ SIZECHK(4, 4);
+ SIZECHK(5, 4);
+
+ SIZECHK(5, 5);
+
+ SIZECHK(6, 5);
+ SIZECHK(6, 6);
+
+ SIZECHK(8, 5);
+ SIZECHK(8, 6);
+ SIZECHK(10, 5);
+ SIZECHK(10, 6);
+
+ SIZECHK(8, 8);
+ SIZECHK(10, 8);
+ SIZECHK(10, 10);
+
+ SIZECHK(12, 10);
+ SIZECHK(12, 12);
+#undef SIZECHK
+
+ return false;
+ }
+
+ bool block_has_any_hdr_cems(const log_astc_block& log_blk)
+ {
+ assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
+
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
+ return true;
+
+ return false;
+ }
+
+ bool block_has_any_ldr_cems(const log_astc_block& log_blk)
+ {
+ assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
+
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
+ return true;
+
+ return false;
+ }
+
+ dequant_tables g_dequant_tables;
+
+ void precompute_texel_partitions_4x4();
+
+ void init_tables(bool init_rank_tabs)
+ {
+ g_dequant_tables.init(init_rank_tabs);
+
+ precompute_texel_partitions_4x4();
+ }
+
+ struct weighted_sample
+ {
+ uint8_t m_src_x;
+ uint8_t m_src_y;
+ uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
+ };
+
+ static void compute_upsample_weights(
+ int block_width, int block_height,
+ int weight_grid_width, int weight_grid_height,
+ weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
+ {
+ const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);
+ const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);
+
+ for (int texelY = 0; texelY < block_height; texelY++)
+ {
+ for (int texelX = 0; texelX < block_width; texelX++)
+ {
+ const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;
+ const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;
+ const uint32_t jX = gX >> 4;
+ const uint32_t jY = gY >> 4;
+ const uint32_t fX = gX & 0xf;
+ const uint32_t fY = gY & 0xf;
+ const uint32_t w11 = (fX * fY + 8) >> 4;
+ const uint32_t w10 = fY - w11;
+ const uint32_t w01 = fX - w11;
+ const uint32_t w00 = 16 - fX - fY + w11;
+
+ weighted_sample& s = pWeights[texelX + texelY * block_width];
+ s.m_src_x = (uint8_t)jX;
+ s.m_src_y = (uint8_t)jY;
+ s.m_weights[0][0] = (uint8_t)w00;
+ s.m_weights[0][1] = (uint8_t)w01;
+ s.m_weights[1][0] = (uint8_t)w10;
+ s.m_weights[1][1] = (uint8_t)w11;
+ }
+ }
+ }
+
+ // Should be dequantized [0,64] weights
+ static void upsample_weight_grid(
+ uint32_t bx, uint32_t by, // destination/to dimension
+ uint32_t wx, uint32_t wy, // source/from dimension
+ const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
+ uint8_t* pDst_weights) // [by][bx]
+ {
+ assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
+ assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
+
+ const uint32_t total_src_weights = wx * wy;
+ const uint32_t total_dst_weights = bx * by;
+
+ if (total_src_weights == total_dst_weights)
+ {
+ memcpy(pDst_weights, pSrc_weights, total_src_weights);
+ return;
+ }
+
+ weighted_sample weights[12 * 12];
+ compute_upsample_weights(bx, by, wx, wy, weights);
+
+ const weighted_sample* pS = weights;
+
+ for (uint32_t y = 0; y < by; y++)
+ {
+ for (uint32_t x = 0; x < bx; x++, ++pS)
+ {
+ const uint32_t w00 = pS->m_weights[0][0];
+ const uint32_t w01 = pS->m_weights[0][1];
+ const uint32_t w10 = pS->m_weights[1][0];
+ const uint32_t w11 = pS->m_weights[1][1];
+
+ assert(w00 || w01 || w10 || w11);
+
+ const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;
+
+ uint32_t total = 8;
+ if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;
+ if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;
+ if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;
+ if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;
+
+ pDst_weights[x + y * bx] = (uint8_t)(total >> 4);
+ }
+ }
+ }
+
+ inline uint32_t hash52(uint32_t v)
+ {
+ uint32_t p = v;
+ p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
+ p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
+ p ^= p << 6; p ^= p >> 17;
+ return p;
+ }
+
+ int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
+ {
+ assert(zIn == 0);
+
+ const uint32_t x = small_block ? xIn << 1 : xIn;
+ const uint32_t y = small_block ? yIn << 1 : yIn;
+ const uint32_t z = small_block ? zIn << 1 : zIn;
+ const uint32_t seed = seedIn + 1024 * (num_partitions - 1);
+ const uint32_t rnum = hash52(seed);
+
+ uint8_t seed1 = (uint8_t)(rnum & 0xf);
+ uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);
+ uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);
+ uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);
+ uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);
+ uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);
+ uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);
+ uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);
+ uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);
+ uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);
+ uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);
+ uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
+
+ seed1 = (uint8_t)(seed1 * seed1);
+ seed2 = (uint8_t)(seed2 * seed2);
+ seed3 = (uint8_t)(seed3 * seed3);
+ seed4 = (uint8_t)(seed4 * seed4);
+ seed5 = (uint8_t)(seed5 * seed5);
+ seed6 = (uint8_t)(seed6 * seed6);
+ seed7 = (uint8_t)(seed7 * seed7);
+ seed8 = (uint8_t)(seed8 * seed8);
+ seed9 = (uint8_t)(seed9 * seed9);
+ seed10 = (uint8_t)(seed10 * seed10);
+ seed11 = (uint8_t)(seed11 * seed11);
+ seed12 = (uint8_t)(seed12 * seed12);
+
+ const int shA = (seed & 2) != 0 ? 4 : 5;
+ const int shB = (num_partitions == 3) ? 6 : 5;
+ const int sh1 = (seed & 1) != 0 ? shA : shB;
+ const int sh2 = (seed & 1) != 0 ? shB : shA;
+ const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
+
+ seed1 = (uint8_t)(seed1 >> sh1);
+ seed2 = (uint8_t)(seed2 >> sh2);
+ seed3 = (uint8_t)(seed3 >> sh1);
+ seed4 = (uint8_t)(seed4 >> sh2);
+ seed5 = (uint8_t)(seed5 >> sh1);
+ seed6 = (uint8_t)(seed6 >> sh2);
+ seed7 = (uint8_t)(seed7 >> sh1);
+ seed8 = (uint8_t)(seed8 >> sh2);
+ seed9 = (uint8_t)(seed9 >> sh3);
+ seed10 = (uint8_t)(seed10 >> sh3);
+ seed11 = (uint8_t)(seed11 >> sh3);
+ seed12 = (uint8_t)(seed12 >> sh3);
+
+ const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
+ const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
+ const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
+ const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
+
+ return (a >= b && a >= c && a >= d) ? 0
+ : (b >= c && b >= d) ? 1
+ : (c >= d) ? 2
+ : 3;
+ }
+
+ static uint32_t g_texel_partitions_4x4[1024][2];
+
+ void precompute_texel_partitions_4x4()
+ {
+ for (uint32_t p = 0; p < 1024; p++)
+ {
+ uint32_t v2 = 0, v3 = 0;
+
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ const uint32_t shift = x * 2 + y * 8;
+ v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift);
+ v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift);
+ }
+ }
+
+ g_texel_partitions_4x4[p][0] = v2;
+ g_texel_partitions_4x4[p][1] = v3;
+ }
+ }
+
+ static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
+ {
+ assert(g_texel_partitions_4x4[1][0]);
+ assert(seed < 1024);
+ assert((x <= 3) && (y <= 3));
+ assert((num_partitions >= 2) && (num_partitions <= 3));
+
+ const uint32_t shift = x * 2 + y * 8;
+ return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;
+ }
+
+ void blue_contract(
+ int r, int g, int b, int a,
+ int &dr, int &dg, int &db, int &da)
+ {
+ dr = (r + b) >> 1;
+ dg = (g + b) >> 1;
+ db = b;
+ da = a;
+ }
+
+ inline void bit_transfer_signed(int& a, int& b)
+ {
+ b >>= 1;
+ b |= (a & 0x80);
+ a >>= 1;
+ a &= 0x3F;
+ if ((a & 0x20) != 0)
+ a -= 0x40;
+ }
+
+ static inline int clamp(int a, int l, int h)
+ {
+ if (a < l)
+ a = l;
+ else if (a > h)
+ a = h;
+ return a;
+ }
+
+ static inline float clampf(float a, float l, float h)
+ {
+ if (a < l)
+ a = l;
+ else if (a > h)
+ a = h;
+ return a;
+ }
+
+ inline int sign_extend(int src, int num_src_bits)
+ {
+ assert((num_src_bits >= 2) && (num_src_bits <= 31));
+
+ const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
+ if (negative)
+ return src | ~((1 << num_src_bits) - 1);
+ else
+ return src & ((1 << num_src_bits) - 1);
+ }
+
+ // endpoints is [4][2]
+ void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)
+ {
+ assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);
+
+ int v0 = pE[0], v1 = pE[1];
+
+ int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];
+ int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];
+
+ switch (cem_index)
+ {
+ case CEM_LDR_LUM_DIRECT:
+ {
+ e0_r = v0; e1_r = v1;
+ e0_g = v0; e1_g = v1;
+ e0_b = v0; e1_b = v1;
+ e0_a = 0xFF; e1_a = 0xFF;
+ break;
+ }
+ case CEM_LDR_LUM_BASE_PLUS_OFS:
+ {
+ int l0 = (v0 >> 2) | (v1 & 0xc0);
+ int l1 = l0 + (v1 & 0x3f);
+
+ if (l1 > 0xFF)
+ l1 = 0xFF;
+
+ e0_r = l0; e1_r = l1;
+ e0_g = l0; e1_g = l1;
+ e0_b = l0; e1_b = l1;
+ e0_a = 0xFF; e1_a = 0xFF;
+ break;
+ }
+ case CEM_LDR_LUM_ALPHA_DIRECT:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ e0_r = v0; e1_r = v1;
+ e0_g = v0; e1_g = v1;
+ e0_b = v0; e1_b = v1;
+ e0_a = v2; e1_a = v3;
+ break;
+ }
+ case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ bit_transfer_signed(v1, v0);
+ bit_transfer_signed(v3, v2);
+
+ e0_r = v0; e1_r = v0 + v1;
+ e0_g = v0; e1_g = v0 + v1;
+ e0_b = v0; e1_b = v0 + v1;
+ e0_a = v2; e1_a = v2 + v3;
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
+ pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGB_BASE_SCALE:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ e0_r = (v0 * v3) >> 8; e1_r = v0;
+ e0_g = (v1 * v3) >> 8; e1_g = v1;
+ e0_b = (v2 * v3) >> 8; e1_b = v2;
+ e0_a = 0xFF; e1_a = 0xFF;
+
+ break;
+ }
+ case CEM_LDR_RGB_DIRECT:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ if ((v1 + v3 + v5) >= (v0 + v2 + v4))
+ {
+ e0_r = v0; e1_r = v1;
+ e0_g = v2; e1_g = v3;
+ e0_b = v4; e1_b = v5;
+ e0_a = 0xFF; e1_a = 0xFF;
+ }
+ else
+ {
+ blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGB_BASE_PLUS_OFFSET:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ bit_transfer_signed(v1, v0);
+ bit_transfer_signed(v3, v2);
+ bit_transfer_signed(v5, v4);
+
+ if ((v1 + v3 + v5) >= 0)
+ {
+ e0_r = v0; e1_r = v0 + v1;
+ e0_g = v2; e1_g = v2 + v3;
+ e0_b = v4; e1_b = v4 + v5;
+ e0_a = 0xFF; e1_a = 0xFF;
+ }
+ else
+ {
+ blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
+ pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ e0_r = (v0 * v3) >> 8; e1_r = v0;
+ e0_g = (v1 * v3) >> 8; e1_g = v1;
+ e0_b = (v2 * v3) >> 8; e1_b = v2;
+ e0_a = v4; e1_a = v5;
+
+ break;
+ }
+ case CEM_LDR_RGBA_DIRECT:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
+
+ if ((v1 + v3 + v5) >= (v0 + v2 + v4))
+ {
+ e0_r = v0; e1_r = v1;
+ e0_g = v2; e1_g = v3;
+ e0_b = v4; e1_b = v5;
+ e0_a = v6; e1_a = v7;
+ }
+ else
+ {
+ blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ break;
+ }
+ case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
+
+ bit_transfer_signed(v1, v0);
+ bit_transfer_signed(v3, v2);
+ bit_transfer_signed(v5, v4);
+ bit_transfer_signed(v7, v6);
+
+ if ((v1 + v3 + v5) >= 0)
+ {
+ e0_r = v0; e1_r = v0 + v1;
+ e0_g = v2; e1_g = v2 + v3;
+ e0_b = v4; e1_b = v4 + v5;
+ e0_a = v6; e1_a = v6 + v7;
+ }
+ else
+ {
+ blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);
+ blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
+ }
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
+ pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
+ }
+
+ break;
+ }
+ case CEM_HDR_LUM_LARGE_RANGE:
+ {
+ int y0, y1;
+ if (v1 >= v0)
+ {
+ y0 = (v0 << 4);
+ y1 = (v1 << 4);
+ }
+ else
+ {
+ y0 = (v1 << 4) + 8;
+ y1 = (v0 << 4) - 8;
+ }
+
+ e0_r = y0; e1_r = y1;
+ e0_g = y0; e1_g = y1;
+ e0_b = y0; e1_b = y1;
+ e0_a = 0x780; e1_a = 0x780;
+
+ break;
+ }
+ case CEM_HDR_LUM_SMALL_RANGE:
+ {
+ int y0, y1, d;
+
+ if ((v0 & 0x80) != 0)
+ {
+ y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
+ d = (v1 & 0x1F) << 2;
+ }
+ else
+ {
+ y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
+ d = (v1 & 0x0F) << 1;
+ }
+
+ y1 = y0 + d;
+ if (y1 > 0xFFF)
+ y1 = 0xFFF;
+
+ e0_r = y0; e1_r = y1;
+ e0_g = y0; e1_g = y1;
+ e0_b = y0; e1_b = y1;
+ e0_a = 0x780; e1_a = 0x780;
+
+ break;
+ }
+ case CEM_HDR_RGB_BASE_SCALE:
+ {
+ int v2 = pE[2], v3 = pE[3];
+
+ int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
+
+ int majcomp, mode;
+ if ((modeval & 0xC) != 0xC)
+ {
+ majcomp = modeval >> 2;
+ mode = modeval & 3;
+ }
+ else if (modeval != 0xF)
+ {
+ majcomp = modeval & 3;
+ mode = 4;
+ }
+ else
+ {
+ majcomp = 0;
+ mode = 5;
+ }
+
+ int red = v0 & 0x3f;
+ int green = v1 & 0x1f;
+ int blue = v2 & 0x1f;
+ int scale = v3 & 0x1f;
+
+ int x0 = (v1 >> 6) & 1;
+ int x1 = (v1 >> 5) & 1;
+ int x2 = (v2 >> 6) & 1;
+ int x3 = (v2 >> 5) & 1;
+ int x4 = (v3 >> 7) & 1;
+ int x5 = (v3 >> 6) & 1;
+ int x6 = (v3 >> 5) & 1;
+
+ int ohm = 1 << mode;
+ if (ohm & 0x30) green |= x0 << 6;
+ if (ohm & 0x3A) green |= x1 << 5;
+ if (ohm & 0x30) blue |= x2 << 6;
+ if (ohm & 0x3A) blue |= x3 << 5;
+ if (ohm & 0x3D) scale |= x6 << 5;
+ if (ohm & 0x2D) scale |= x5 << 6;
+ if (ohm & 0x04) scale |= x4 << 7;
+ if (ohm & 0x3B) red |= x4 << 6;
+ if (ohm & 0x04) red |= x3 << 6;
+ if (ohm & 0x10) red |= x5 << 7;
+ if (ohm & 0x0F) red |= x2 << 7;
+ if (ohm & 0x05) red |= x1 << 8;
+ if (ohm & 0x0A) red |= x0 << 8;
+ if (ohm & 0x05) red |= x0 << 9;
+ if (ohm & 0x02) red |= x6 << 9;
+ if (ohm & 0x01) red |= x3 << 10;
+ if (ohm & 0x02) red |= x5 << 10;
+
+ static const int s_shamts[6] = { 1,1,2,3,4,5 };
+
+ const int shamt = s_shamts[mode];
+ red <<= shamt;
+ green <<= shamt;
+ blue <<= shamt;
+ scale <<= shamt;
+
+ if (mode != 5)
+ {
+ green = red - green;
+ blue = red - blue;
+ }
+
+ if (majcomp == 1)
+ std::swap(red, green);
+
+ if (majcomp == 2)
+ std::swap(red, blue);
+
+ e1_r = clamp(red, 0, 0xFFF);
+ e1_g = clamp(green, 0, 0xFFF);
+ e1_b = clamp(blue, 0, 0xFFF);
+ e1_a = 0x780;
+
+ e0_r = clamp(red - scale, 0, 0xFFF);
+ e0_g = clamp(green - scale, 0, 0xFFF);
+ e0_b = clamp(blue - scale, 0, 0xFFF);
+ e0_a = 0x780;
+
+ break;
+ }
+ case CEM_HDR_RGB_HDR_ALPHA:
+ case CEM_HDR_RGB_LDR_ALPHA:
+ case CEM_HDR_RGB:
+ {
+ int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
+
+ int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
+
+ e0_a = 0x780;
+ e1_a = 0x780;
+
+ if (majcomp == 3)
+ {
+ e0_r = v0 << 4;
+ e0_g = v2 << 4;
+ e0_b = (v4 & 0x7f) << 5;
+
+ e1_r = v1 << 4;
+ e1_g = v3 << 4;
+ e1_b = (v5 & 0x7f) << 5;
+ }
+ else
+ {
+ int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);
+ int va = v0 | ((v1 & 0x40) << 2);
+ int vb0 = v2 & 0x3f;
+ int vb1 = v3 & 0x3f;
+ int vc = v1 & 0x3f;
+ int vd0 = v4 & 0x7f;
+ int vd1 = v5 & 0x7f;
+
+ static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };
+ vd0 = sign_extend(vd0, s_dbitstab[mode]);
+ vd1 = sign_extend(vd1, s_dbitstab[mode]);
+
+ int x0 = (v2 >> 6) & 1;
+ int x1 = (v3 >> 6) & 1;
+ int x2 = (v4 >> 6) & 1;
+ int x3 = (v5 >> 6) & 1;
+ int x4 = (v4 >> 5) & 1;
+ int x5 = (v5 >> 5) & 1;
+
+ int ohm = 1 << mode;
+ if (ohm & 0xA4) va |= x0 << 9;
+ if (ohm & 0x08) va |= x2 << 9;
+ if (ohm & 0x50) va |= x4 << 9;
+ if (ohm & 0x50) va |= x5 << 10;
+ if (ohm & 0xA0) va |= x1 << 10;
+ if (ohm & 0xC0) va |= x2 << 11;
+ if (ohm & 0x04) vc |= x1 << 6;
+ if (ohm & 0xE8) vc |= x3 << 6;
+ if (ohm & 0x20) vc |= x2 << 7;
+ if (ohm & 0x5B) vb0 |= x0 << 6;
+ if (ohm & 0x5B) vb1 |= x1 << 6;
+ if (ohm & 0x12) vb0 |= x2 << 7;
+ if (ohm & 0x12) vb1 |= x3 << 7;
+
+ int shamt = (mode >> 1) ^ 3;
+ va = (uint32_t)va << shamt;
+ vb0 = (uint32_t)vb0 << shamt;
+ vb1 = (uint32_t)vb1 << shamt;
+ vc = (uint32_t)vc << shamt;
+ vd0 = (uint32_t)vd0 << shamt;
+ vd1 = (uint32_t)vd1 << shamt;
+
+ e1_r = clamp(va, 0, 0xFFF);
+ e1_g = clamp(va - vb0, 0, 0xFFF);
+ e1_b = clamp(va - vb1, 0, 0xFFF);
+
+ e0_r = clamp(va - vc, 0, 0xFFF);
+ e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);
+ e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);
+
+ if (majcomp == 1)
+ {
+ std::swap(e0_r, e0_g);
+ std::swap(e1_r, e1_g);
+ }
+ else if (majcomp == 2)
+ {
+ std::swap(e0_r, e0_b);
+ std::swap(e1_r, e1_b);
+ }
+ }
+
+ if (cem_index == CEM_HDR_RGB_LDR_ALPHA)
+ {
+ int v6 = pE[6], v7 = pE[7];
+
+ e0_a = v6;
+ e1_a = v7;
+ }
+ else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)
+ {
+ int v6 = pE[6], v7 = pE[7];
+
+ // Extract mode bits
+ int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
+ v6 &= 0x7F;
+ v7 &= 0x7F;
+
+ if (mode == 3)
+ {
+ e0_a = v6 << 5;
+ e1_a = v7 << 5;
+ }
+ else
+ {
+ v6 |= (v7 << (mode + 1)) & 0x780;
+ v7 &= (0x3F >> mode);
+ v7 ^= (0x20 >> mode);
+ v7 -= (0x20 >> mode);
+ v6 <<= (4 - mode);
+ v7 <<= (4 - mode);
+
+ v7 += v6;
+ v7 = clamp(v7, 0, 0xFFF);
+ e0_a = v6;
+ e1_a = v7;
+ }
+ }
+
+ break;
+ }
+ default:
+ {
+ assert(0);
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ pEndpoints[c][0] = 0;
+ pEndpoints[c][1] = 0;
+ }
+ break;
+ }
+ }
+ }
+
+ static inline bool is_half_inf_or_nan(half_float v)
+ {
+ return get_bits(v, 10, 14) == 31;
+ }
+
+ // This float->half conversion matches how "F32TO16" works on Intel GPU's.
+ half_float float_to_half(float val, bool toward_zero)
+ {
+ union { float f; int32_t i; uint32_t u; } fi = { val };
+ const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
+ int s = flt_s, e = 0, m = 0;
+
+ // inf/NaN
+ if (flt_e == 0xff)
+ {
+ e = 31;
+ if (flt_m != 0) // NaN
+ m = 1;
+ }
+ // not zero or denormal
+ else if (flt_e != 0)
+ {
+ int new_exp = flt_e - 127;
+ if (new_exp > 15)
+ e = 31;
+ else if (new_exp < -14)
+ {
+ if (toward_zero)
+ m = (int)truncf((1 << 24) * fabsf(fi.f));
+ else
+ m = lrintf((1 << 24) * fabsf(fi.f));
+ }
+ else
+ {
+ e = new_exp + 15;
+ if (toward_zero)
+ m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
+ else
+ m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
+ }
+ }
+
+ assert((0 <= m) && (m <= 1024));
+ if (m == 1024)
+ {
+ e++;
+ m = 0;
+ }
+
+ assert((s >= 0) && (s <= 1));
+ assert((e >= 0) && (e <= 31));
+ assert((m >= 0) && (m <= 1023));
+
+ half_float result = (half_float)((s << 15) | (e << 10) | m);
+ return result;
+ }
+
+ float half_to_float(half_float hval)
+ {
+ union { float f; uint32_t u; } x = { 0 };
+
+ uint32_t s = ((uint32_t)hval >> 15) & 1;
+ uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
+ uint32_t m = (uint32_t)hval & 0x3FF;
+
+ if (!e)
+ {
+ if (!m)
+ {
+ // +- 0
+ x.u = s << 31;
+ return x.f;
+ }
+ else
+ {
+ // denormalized
+ while (!(m & 0x00000400))
+ {
+ m <<= 1;
+ --e;
+ }
+
+ ++e;
+ m &= ~0x00000400;
+ }
+ }
+ else if (e == 31)
+ {
+ if (m == 0)
+ {
+ // +/- INF
+ x.u = (s << 31) | 0x7f800000;
+ return x.f;
+ }
+ else
+ {
+ // +/- NaN
+ x.u = (s << 31) | 0x7f800000 | (m << 13);
+ return x.f;
+ }
+ }
+
+ e = e + (127 - 15);
+ m = m << 13;
+
+ assert(s <= 1);
+ assert(m <= 0x7FFFFF);
+ assert(e <= 255);
+
+ x.u = m | (e << 23) | (s << 31);
+ return x.f;
+ }
+
+ static inline half_float qlog16_to_half(int k)
+ {
+ assert((k >= 0) && (k <= 0xFFFF));
+
+ int E = (k & 0xF800) >> 11;
+ int M = k & 0x7FF;
+
+ int Mt;
+ if (M < 512)
+ Mt = 3 * M;
+ else if (M >= 1536)
+ Mt = 5 * M - 2048;
+ else
+ Mt = 4 * M - 512;
+
+ return (half_float)((E << 10) + (Mt >> 3));
+ }
+
+ // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
+ const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
+ const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
+ const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);
+ const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);
+ //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));
+ const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));
+
+ void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)
+ {
+ int x = packed & 511;
+ int y = (packed >> 9) & 511;
+ int z = (packed >> 18) & 511;
+ int w = (packed >> 27) & 31;
+
+ const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
+
+ r = x * scale;
+ g = y * scale;
+ b = z * scale;
+ }
+
+ // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.
+ static inline int floor_log2(float x)
+ {
+ union float754
+ {
+ unsigned int raw;
+ float value;
+ };
+
+ float754 f;
+ f.value = x;
+ // Extract float exponent
+ return ((f.raw >> 23) & 0xFF) - 127;
+ }
+
+ static inline int maximumi(int a, int b) { return (a > b) ? a : b; }
+ static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
+
+ uint32_t pack_rgb9e5(float r, float g, float b)
+ {
+ r = clampf(r, 0.0f, MAX_RGB9E5);
+ g = clampf(g, 0.0f, MAX_RGB9E5);
+ b = clampf(b, 0.0f, MAX_RGB9E5);
+
+ float maxrgb = maximumf(maximumf(r, g), b);
+ int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
+ assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));
+
+ float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
+
+ int maxm = (int)floorf((maxrgb / denom) + 0.5f);
+ if (maxm == (MAX_RGB9E5_MANTISSA + 1))
+ {
+ denom *= 2;
+ exp_shared += 1;
+ assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
+ }
+ else
+ {
+ assert(maxm <= MAX_RGB9E5_MANTISSA);
+ }
+
+ int rm = (int)floorf((r / denom) + 0.5f);
+ int gm = (int)floorf((g / denom) + 0.5f);
+ int bm = (int)floorf((b / denom) + 0.5f);
+
+ assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));
+ assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));
+ assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));
+
+ return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);
+ }
+
+ static inline int clz17(uint32_t x)
+ {
+ assert(x <= 0x1FFFF);
+ x &= 0x1FFFF;
+
+ if (!x)
+ return 17;
+
+ uint32_t n = 0;
+ while ((x & 0x10000) == 0)
+ {
+ x <<= 1u;
+ n++;
+ }
+
+ return n;
+ }
+
+ static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)
+ {
+ int lz = clz17(Cr | Cg | Cb | 1);
+ if (Cr == 65535) { Cr = 65536; lz = 0; }
+ if (Cg == 65535) { Cg = 65536; lz = 0; }
+ if (Cb == 65535) { Cb = 65536; lz = 0; }
+ Cr <<= lz; Cg <<= lz; Cb <<= lz;
+ Cr = (Cr >> 8) & 0x1FF;
+ Cg = (Cg >> 8) & 0x1FF;
+ Cb = (Cb >> 8) & 0x1FF;
+ uint32_t exponent = 16 - lz;
+ uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;
+ return texel;
+ }
+
+ static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)
+ {
+ if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;
+ if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;
+ if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;
+ int Re = (Cr >> 10) & 0x1F;
+ int Ge = (Cg >> 10) & 0x1F;
+ int Be = (Cb >> 10) & 0x1F;
+ int Rex = (Re == 0) ? 1 : Re;
+ int Gex = (Ge == 0) ? 1 : Ge;
+ int Bex = (Be == 0) ? 1 : Be;
+ int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;
+ int Xe = Re | Ge | Be;
+ uint32_t rshift, gshift, bshift, expo;
+
+ if (Xe == 0)
+ {
+ expo = rshift = gshift = bshift = Xm;
+ }
+ else if (Re >= Ge && Re >= Be)
+ {
+ expo = Rex + 1;
+ rshift = 2;
+ gshift = Rex - Gex + 2;
+ bshift = Rex - Bex + 2;
+ }
+ else if (Ge >= Be)
+ {
+ expo = Gex + 1;
+ rshift = Gex - Rex + 2;
+ gshift = 2;
+ bshift = Gex - Bex + 2;
+ }
+ else
+ {
+ expo = Bex + 1;
+ rshift = Bex - Rex + 2;
+ gshift = Bex - Gex + 2;
+ bshift = 2;
+ }
+
+ int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);
+ int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);
+ int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);
+ Rm = (Rm >> rshift) & 0x1FF;
+ Gm = (Gm >> gshift) & 0x1FF;
+ Bm = (Bm >> bshift) & 0x1FF;
+
+ uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);
+ return texel;
+ }
+
+ // Important: pPixels is either 32-bit/texel or 64-bit/texel.
+ bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)
+ {
+ assert(is_valid_block_size(blk_width, blk_height));
+
+ assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
+ if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())
+ return false;
+
+ const uint32_t num_blk_pixels = blk_width * blk_height;
+
+ // Write block error color
+ if (dec_mode == cDecodeModeHDR16)
+ {
+ // NaN's
+ memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);
+ }
+ else if (dec_mode == cDecodeModeRGB9E5)
+ {
+ const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = purple_9e5;
+ }
+ else
+ {
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = 0xFFFF00FF;
+ }
+
+ if (log_blk.m_error_flag)
+ {
+ // Should this return false? It's not an invalid logical block config, though.
+ return false;
+ }
+
+ // Handle solid color blocks
+ if (log_blk.m_solid_color_flag_ldr)
+ {
+ // LDR solid block
+ if (dec_mode == cDecodeModeHDR16)
+ {
+ // Convert LDR pixels to half-float
+ half_float h[4];
+ for (uint32_t c = 0; c < 4; c++)
+ h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);
+ }
+ else if (dec_mode == cDecodeModeRGB9E5)
+ {
+ float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));
+ float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));
+ float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));
+
+ const uint32_t packed = pack_rgb9e5(r, g, b);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = packed;
+ }
+ else
+ {
+ // Convert LDR pixels to 8-bits
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ for (uint32_t c = 0; c < 4; c++)
+ ((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);
+ }
+
+ return true;
+ }
+ else if (log_blk.m_solid_color_flag_hdr)
+ {
+ // HDR solid block, decode mode must be half-float or RGB9E5
+ if (dec_mode == cDecodeModeHDR16)
+ {
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);
+ }
+ else if (dec_mode == cDecodeModeRGB9E5)
+ {
+ float r = half_to_float(log_blk.m_solid_color[0]);
+ float g = half_to_float(log_blk.m_solid_color[1]);
+ float b = half_to_float(log_blk.m_solid_color[2]);
+
+ const uint32_t packed = pack_rgb9e5(r, g, b);
+
+ for (uint32_t i = 0; i < num_blk_pixels; i++)
+ ((uint32_t*)pPixels)[i] = packed;
+ }
+ else
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ // Sanity check block's config
+ if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))
+ return false;
+ if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
+ return false;
+
+ if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
+ return false;
+ if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))
+ return false;
+ if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))
+ return false;
+ if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))
+ return false;
+ if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)
+ return false;
+ if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))
+ return false;
+ if (log_blk.m_color_component_selector > 3)
+ return false;
+
+ const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);
+ const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);
+
+ bool is_ldr_endpoints[MAX_PARTITIONS];
+
+ // Check CEM's
+ uint32_t total_cem_vals = 0;
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ {
+ if (log_blk.m_color_endpoint_modes[i] > 15)
+ return false;
+
+ total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);
+
+ is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);
+ }
+
+ if (total_cem_vals > MAX_ENDPOINTS)
+ return false;
+
+ const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
+ const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
+
+ // Dequantized endpoints to [0,255]
+ uint8_t dequantized_endpoints[MAX_ENDPOINTS];
+ for (uint32_t i = 0; i < total_cem_vals; i++)
+ {
+ if (log_blk.m_endpoints[i] >= total_endpoint_levels)
+ return false;
+ dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
+ }
+
+ // Dequantize weights to [0,64]
+ uint8_t dequantized_weights[2][12 * 12];
+
+ const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
+ const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
+
+ const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
+ for (uint32_t i = 0; i < total_weight_vals; i++)
+ {
+ if (log_blk.m_weights[i] >= total_weight_levels)
+ return false;
+
+ const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;
+ const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;
+
+ dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
+ }
+
+ // Upsample weight grid. [0,64] weights
+ uint8_t upsampled_weights[2][12 * 12];
+
+ upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);
+ if (log_blk.m_dual_plane)
+ upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);
+
+ // Decode CEM's
+ int endpoints[4][4][2]; // [subset][comp][l/h]
+
+ uint32_t endpoint_val_index = 0;
+ for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
+ {
+ const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];
+
+ decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
+
+ endpoint_val_index += get_num_cem_values(cem_index);
+ }
+
+ // Decode texels
+ const bool small_block = num_blk_pixels < 31;
+ const bool use_precomputed_texel_partitions = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
+ const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
+
+ bool success = true;
+
+ if (dec_mode == cDecodeModeRGB9E5)
+ {
+ // returns uint32_t's
+ for (uint32_t y = 0; y < blk_height; y++)
+ {
+ for (uint32_t x = 0; x < blk_width; x++)
+ {
+ const uint32_t pixel_index = x + y * blk_width;
+ const uint32_t subset = (log_blk.m_num_partitions > 1) ?
+ (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
+ : 0;
+
+ int comp[3];
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
+
+ if (is_ldr_endpoints[subset])
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
+
+ int le = endpoints[subset][c][0];
+ int he = endpoints[subset][c][1];
+
+ le = (le << 8) | le;
+ he = (he << 8) | he;
+
+ int k = weight_interpolate(le, he, w);
+ assert((k >= 0) && (k <= 0xFFFF));
+
+ comp[c] = k; // 1.0
+ }
+ else
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
+
+ int le = endpoints[subset][c][0] << 4;
+ int he = endpoints[subset][c][1] << 4;
+
+ int qlog16 = weight_interpolate(le, he, w);
+
+ comp[c] = qlog16_to_half(qlog16);
+
+ if (is_half_inf_or_nan((half_float)comp[c]))
+ comp[c] = 0x7BFF;
+ }
+
+ } // c
+
+ uint32_t packed;
+ if (is_ldr_endpoints[subset])
+ packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);
+ else
+ packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);
+
+ ((uint32_t*)pPixels)[pixel_index] = packed;
+
+ } // x
+ } // y
+ }
+ else if (dec_mode == cDecodeModeHDR16)
+ {
+ // Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)
+
+ // returns half floats
+ for (uint32_t y = 0; y < blk_height; y++)
+ {
+ for (uint32_t x = 0; x < blk_width; x++)
+ {
+ const uint32_t pixel_index = x + y * blk_width;
+ const uint32_t subset = (log_blk.m_num_partitions > 1) ?
+ (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
+ : 0;
+
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
+
+ half_float o;
+
+ if ( (is_ldr_endpoints[subset]) ||
+ ((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
+
+ int le = endpoints[subset][c][0];
+ int he = endpoints[subset][c][1];
+
+ le = (le << 8) | le;
+ he = (he << 8) | he;
+
+ int k = weight_interpolate(le, he, w);
+ assert((k >= 0) && (k <= 0xFFFF));
+
+ if (k == 0xFFFF)
+ o = 0x3C00; // 1.0
+ else
+ o = float_to_half((float)k * (1.0f / 65536.0f), true);
+ }
+ else
+ {
+ assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
+ assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
+
+ int le = endpoints[subset][c][0] << 4;
+ int he = endpoints[subset][c][1] << 4;
+
+ int qlog16 = weight_interpolate(le, he, w);
+
+ o = qlog16_to_half(qlog16);
+
+ if (is_half_inf_or_nan(o))
+ o = 0x7BFF;
+ }
+
+ ((half_float*)pPixels)[pixel_index * 4 + c] = o;
+ }
+
+ } // x
+ } // y
+ }
+ else
+ {
+ // returns uint8_t's
+ for (uint32_t y = 0; y < blk_height; y++)
+ {
+ for (uint32_t x = 0; x < blk_width; x++)
+ {
+ const uint32_t pixel_index = x + y * blk_width;
+
+ const uint32_t subset = (log_blk.m_num_partitions > 1) ?
+ (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
+ : 0;
+
+ if (!is_ldr_endpoints[subset])
+ {
+ ((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF;
+ success = false;
+ }
+ else
+ {
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
+
+ int le = endpoints[subset][c][0];
+ int he = endpoints[subset][c][1];
+
+ // FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder
+ //if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))
+ // See https://github.com/ARM-software/astc-encoder/issues/447
+ if (dec_mode == cDecodeModeSRGB8)
+ {
+ le = (le << 8) | 0x80;
+ he = (he << 8) | 0x80;
+ }
+ else
+ {
+ le = (le << 8) | le;
+ he = (he << 8) | he;
+ }
+
+ uint32_t k = weight_interpolate(le, he, w);
+
+ // FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does
+ // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16.
+ // It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.
+ ((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);
+ }
+ }
+
+ } // x
+ } // y
+ }
+
+ return success;
+ }
+
+ //------------------------------------------------
+ // Physical to logical block decoding
+
+ // unsigned 128-bit int, with some signed helpers
+ class uint128
+ {
+ uint64_t m_lo, m_hi;
+
+ public:
+ uint128() = default;
+ inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }
+ inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }
+ inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }
+
+ inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }
+ inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }
+
+ inline explicit operator uint8_t () const { return (uint8_t)m_lo; }
+ inline explicit operator uint16_t () const { return (uint16_t)m_lo; }
+ inline explicit operator uint32_t () const { return (uint32_t)m_lo; }
+ inline explicit operator uint64_t () const { return m_lo; }
+
+ inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }
+ inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }
+
+ inline uint64_t get_low() const { return m_lo; }
+ inline uint64_t& get_low() { return m_lo; }
+
+ inline uint64_t get_high() const { return m_hi; }
+ inline uint64_t& get_high() { return m_hi; }
+
+ inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }
+ inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }
+
+ inline bool operator< (const uint128& rhs) const
+ {
+ if (m_hi < rhs.m_hi)
+ return true;
+
+ if (m_hi == rhs.m_hi)
+ {
+ if (m_lo < rhs.m_lo)
+ return true;
+ }
+
+ return false;
+ }
+
+ inline bool operator> (const uint128& rhs) const { return (rhs < *this); }
+
+ inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }
+ inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }
+
+ inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }
+ inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }
+ inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }
+ inline explicit operator bool() const { return is_non_zero(); }
+ inline bool is_signed() const { return ((int64_t)m_hi) < 0; }
+
+ inline bool signed_less(const uint128& rhs) const
+ {
+ const bool l_signed = is_signed(), r_signed = rhs.is_signed();
+
+ if (l_signed == r_signed)
+ return *this < rhs;
+
+ if (l_signed && !r_signed)
+ return true;
+
+ assert(!l_signed && r_signed);
+ return false;
+ }
+
+ inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }
+ inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }
+ inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }
+
+ double get_double() const
+ {
+ double res = 0;
+
+ if (m_hi)
+ res = (double)m_hi * pow(2.0f, 64.0f);
+
+ res += (double)m_lo;
+
+ return res;
+ }
+
+ double get_signed_double() const
+ {
+ if (is_signed())
+ return -(uint128(*this).abs().get_double());
+ else
+ return get_double();
+ }
+
+ inline uint128 abs() const
+ {
+ uint128 res(*this);
+ if (res.is_signed())
+ res = -res;
+ return res;
+ }
+
+ inline uint128& operator<<= (int shift)
+ {
+ assert(shift >= 0);
+ if (shift < 0)
+ return *this;
+
+ m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);
+
+ if ((shift) && (shift < 64))
+ m_hi |= (m_lo >> (64 - shift));
+
+ m_lo = (shift >= 64) ? 0 : (m_lo << shift);
+
+ return *this;
+ }
+
+ inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }
+
+ inline uint128& operator>>= (int shift)
+ {
+ assert(shift >= 0);
+ if (shift < 0)
+ return *this;
+
+ m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);
+
+ if ((shift) && (shift < 64))
+ m_lo |= (m_hi << (64 - shift));
+
+ m_hi = (shift >= 64) ? 0 : (m_hi >> shift);
+
+ return *this;
+ }
+
+ inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }
+
+ inline uint128 signed_shift_right(int shift) const
+ {
+ uint128 res(*this);
+ res >>= shift;
+
+ if (is_signed())
+ {
+ uint128 x(0U);
+ x = ~x;
+ x >>= shift;
+ res |= (~x);
+ }
+
+ return res;
+ }
+
+ inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }
+ inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }
+
+ inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }
+ inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; }
+
+ inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }
+ inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; }
+
+ inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }
+
+ inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }
+
+ // prefix
+ inline uint128 operator ++()
+ {
+ if (++m_lo == 0)
+ ++m_hi;
+ return *this;
+ }
+
+ // postfix
+ inline uint128 operator ++(int)
+ {
+ uint128 res(*this);
+ if (++m_lo == 0)
+ ++m_hi;
+ return res;
+ }
+
+ // prefix
+ inline uint128 operator --()
+ {
+ const uint64_t t = m_lo;
+ if (--m_lo > t)
+ --m_hi;
+ return *this;
+ }
+
+ // postfix
+ inline uint128 operator --(int)
+ {
+ const uint64_t t = m_lo;
+ uint128 res(*this);
+ if (--m_lo > t)
+ --m_hi;
+ return res;
+ }
+
+ inline uint128& operator+= (const uint128& rhs)
+ {
+ const uint64_t t = m_lo + rhs.m_lo;
+ m_hi = m_hi + rhs.m_hi + (t < m_lo);
+ m_lo = t;
+ return *this;
+ }
+
+ inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }
+
+ inline uint128& operator-= (const uint128& rhs)
+ {
+ const uint64_t t = m_lo - rhs.m_lo;
+ m_hi = m_hi - rhs.m_hi - (t > m_lo);
+ m_lo = t;
+ return *this;
+ }
+
+ inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }
+
+ // computes bit by bit, very slow
+ uint128& operator*=(const uint128& rhs)
+ {
+ uint128 temp(*this), result(0U);
+
+ for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)
+ if (bitmask.get_low() & 1)
+ result += temp;
+
+ *this = result;
+ return *this;
+ }
+
+ uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }
+
+ // computes bit by bit, very slow
+ friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)
+ {
+ remainder = 0;
+
+ if (!divisor)
+ {
+ assert(0);
+ return ~uint128(0U);
+ }
+
+ uint128 quotient(0), one(1);
+
+ for (int i = 127; i >= 0; i--)
+ {
+ remainder = (remainder << 1) | ((dividend >> i) & one);
+ if (remainder >= divisor)
+ {
+ remainder -= divisor;
+ quotient |= (one << i);
+ }
+ }
+
+ return quotient;
+ }
+
+ uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }
+ uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }
+
+ uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }
+ uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }
+
+ void print_hex(FILE* pFile) const
+ {
+ fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);
+ }
+
+ void format_unsigned(std::string& res) const
+ {
+ basisu::vector<uint8_t> digits;
+ digits.reserve(39 + 1);
+
+ uint128 k(*this), ten(10);
+ do
+ {
+ uint128 r;
+ k = divide(k, ten, r);
+ digits.push_back((uint8_t)r);
+ } while (k);
+
+ for (int i = (int)digits.size() - 1; i >= 0; i--)
+ res += ('0' + digits[i]);
+ }
+
+ void format_signed(std::string& res) const
+ {
+ uint128 val(*this);
+
+ if (val.is_signed())
+ {
+ res.push_back('-');
+ val = -val;
+ }
+
+ val.format_unsigned(res);
+ }
+
+ void print_unsigned(FILE* pFile)
+ {
+ std::string str;
+ format_unsigned(str);
+ fprintf(pFile, "%s", str.c_str());
+ }
+
+ void print_signed(FILE* pFile)
+ {
+ std::string str;
+ format_signed(str);
+ fprintf(pFile, "%s", str.c_str());
+ }
+
+ uint128 get_reversed_bits() const
+ {
+ uint128 res;
+
+ const uint32_t* pSrc = (const uint32_t*)this;
+ uint32_t* pDst = (uint32_t*)&res;
+
+ pDst[0] = rev_dword(pSrc[3]);
+ pDst[1] = rev_dword(pSrc[2]);
+ pDst[2] = rev_dword(pSrc[1]);
+ pDst[3] = rev_dword(pSrc[0]);
+
+ return res;
+ }
+
+ uint128 get_byteswapped() const
+ {
+ uint128 res;
+
+ const uint8_t* pSrc = (const uint8_t*)this;
+ uint8_t* pDst = (uint8_t*)&res;
+
+ for (uint32_t i = 0; i < 16; i++)
+ pDst[i] = pSrc[15 - i];
+
+ return res;
+ }
+
+ inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const
+ {
+ assert(bit_ofs < 128);
+ assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));
+
+ uint128 res(*this);
+ res >>= bit_ofs;
+
+ const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);
+ return res.get_low() & bitmask;
+ }
+
+ inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const
+ {
+ assert(bit_len <= 32);
+ return (uint32_t)get_bits64(bit_ofs, bit_len);
+ }
+
+ inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const
+ {
+ assert(len && (len <= 32));
+ uint32_t x = get_bits(bit_ofs, len);
+ bit_ofs += len;
+ return x;
+ }
+
+ inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)
+ {
+ assert(bit_ofs < 128);
+ assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));
+
+ uint128 bitmask(1);
+ bitmask = (bitmask << num_bits) - 1;
+ assert(uint128(val) <= bitmask);
+
+ bitmask <<= bit_ofs;
+ *this &= ~bitmask;
+
+ *this = *this | (uint128(val) << bit_ofs);
+ return *this;
+ }
+ };
+
+ static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)
+ {
+ if (bits.get_bits(10, 2) != 0b11)
+ return false;
+
+ uint32_t bit_ofs = 12;
+ const uint32_t min_s = bits.next_bits(bit_ofs, 13);
+ const uint32_t max_s = bits.next_bits(bit_ofs, 13);
+ const uint32_t min_t = bits.next_bits(bit_ofs, 13);
+ const uint32_t max_t = bits.next_bits(bit_ofs, 13);
+ assert(bit_ofs == 64);
+
+ const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);
+
+ if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))
+ return false;
+
+ const bool hdr_flag = bits.get_bits(9, 1) != 0;
+
+ if (hdr_flag)
+ log_blk.m_solid_color_flag_hdr = true;
+ else
+ log_blk.m_solid_color_flag_ldr = true;
+
+ log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);
+ log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);
+ log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);
+ log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);
+
+ if (log_blk.m_solid_color_flag_hdr)
+ {
+ for (uint32_t c = 0; c < 4; c++)
+ if (is_half_inf_or_nan(log_blk.m_solid_color[c]))
+ return false;
+ }
+
+ return true;
+ }
+
+ struct astc_dec_row
+ {
+ int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
+ };
+
+ static const astc_dec_row s_dec_rows[10] =
+ {
+ // Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
+ { 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2
+ { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2
+ { 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8
+ { 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6
+
+ { 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2
+ { 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2
+ { 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12
+ { 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10
+
+ { 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6
+ { -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6
+ };
+
+ static bool decode_config(const uint128& bits, log_astc_block& log_blk)
+ {
+ // Reserved
+ if (bits.get_bits(0, 4) == 0)
+ return false;
+
+ // Reserved
+ if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))
+ {
+ if (bits.get_bits(2, 4) != 0b1111)
+ return false;
+ }
+
+ // Void extent
+ if (bits.get_bits(0, 9) == 0b111111100)
+ return decode_void_extent(bits, log_blk);
+
+ // Check rows
+ const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);
+ const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);
+ const uint32_t x7_2 = bits.get_bits(7, 2);
+
+ int row_index = -1;
+ if (x0_2 == 0)
+ {
+ if (x7_2 == 0b00)
+ row_index = 5;
+ else if (x7_2 == 0b01)
+ row_index = 6;
+ else if (x5_4 == 0b1100)
+ row_index = 7;
+ else if (x5_4 == 0b1101)
+ row_index = 8;
+ else if (x7_2 == 0b10)
+ row_index = 9;
+ }
+ else
+ {
+ if (x2_2 == 0b00)
+ row_index = 0;
+ else if (x2_2 == 0b01)
+ row_index = 1;
+ else if (x2_2 == 0b10)
+ row_index = 2;
+ else if ((x2_2 == 0b11) && (x8_1 == 0))
+ row_index = 3;
+ else if ((x2_2 == 0b11) && (x8_1 == 1))
+ row_index = 4;
+ }
+ if (row_index < 0)
+ return false;
+
+ const astc_dec_row& r = s_dec_rows[row_index];
+
+ bool P = false, Dp = false;
+ uint32_t W = r.W_bias, H = r.H_bias;
+
+ if (r.P_ofs >= 0)
+ P = bits.get_bits(r.P_ofs, 1) != 0;
+
+ if (r.Dp_ofs >= 0)
+ Dp = bits.get_bits(r.Dp_ofs, 1) != 0;
+
+ if (r.W_size)
+ W += bits.get_bits(r.W_ofs, r.W_size);
+
+ if (r.H_size)
+ H += bits.get_bits(r.H_ofs, r.H_size);
+
+ assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));
+ assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));
+
+ int p0 = bits.get_bits(r.p0_ofs, 1);
+ int p1 = bits.get_bits(r.p1_ofs, 1);
+ int p2 = bits.get_bits(r.p2_ofs, 1);
+
+ uint32_t p = p0 | (p1 << 1) | (p2 << 2);
+ if (p < 2)
+ return false;
+
+ log_blk.m_grid_width = W;
+ log_blk.m_grid_height = H;
+
+ log_blk.m_weight_ise_range = (p - 2) + (P * BISE_10_LEVELS);
+ assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
+
+ log_blk.m_dual_plane = Dp;
+
+ return true;
+ }
+
+ static inline uint32_t read_le_dword(const uint8_t* pBytes)
+ {
+ return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);
+ }
+
+ // See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.
+ static const uint8_t s_trit_decode[256][5] =
+ {
+ {0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},
+ {0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},
+ {0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},
+ {0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},
+ {0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},
+ {0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},
+ {0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},
+ {0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},
+ {0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},
+ {0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},
+ {0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},
+ {0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},
+ {0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},
+ {0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},
+ {0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},
+ {0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},
+ {0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},
+ {0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},
+ {0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},
+ {0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},
+ {0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},
+ {0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},
+ {0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},
+ {0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},
+ {0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},
+ {0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},
+ {0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},
+ {0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},
+ {0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},
+ {0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},
+ {0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},
+ {0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}
+ };
+
+ static const uint8_t s_quint_decode[128][3] =
+ {
+ {0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},
+ {0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},
+ {0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},
+ {0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},
+ {0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},
+ {0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},
+ {0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},
+ {0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},
+ {0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},
+ {0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},
+ {0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},
+ {0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},
+ {0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},
+ {0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},
+ {0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},
+ {0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}
+ };
+
+ static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
+ {
+ assert((num_vals >= 1) && (num_vals <= 5));
+ uint32_t m[5] = { 0 }, T = 0;
+
+ static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };
+
+ for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
+ {
+ if (bits_per_val)
+ m[c] = bits.next_bits(bit_ofs, bits_per_val);
+ T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
+ T_ofs += s_t_bits[c];
+ }
+
+ const uint8_t (&p_trits)[5] = s_trit_decode[T];
+
+ for (uint32_t i = 0; i < num_vals; i++)
+ pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);
+ }
+
+ static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
+ {
+ assert((num_vals >= 1) && (num_vals <= 3));
+ uint32_t m[3] = { 0 }, T = 0;
+
+ static const uint8_t s_t_bits[3] = { 3, 2, 2 };
+
+ for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
+ {
+ if (bits_per_val)
+ m[c] = bits.next_bits(bit_ofs, bits_per_val);
+ T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
+ T_ofs += s_t_bits[c];
+ }
+
+ const uint8_t (&p_quints)[3] = s_quint_decode[T];
+
+ for (uint32_t i = 0; i < num_vals; i++)
+ pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);
+ }
+
+ static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)
+ {
+ assert(num_vals && (ise_range < TOTAL_ISE_RANGES));
+
+ const uint32_t bits_per_val = g_ise_range_table[ise_range][0];
+
+ if (g_ise_range_table[ise_range][1])
+ {
+ // Trits+bits, 5 vals per block, 7 bits extra per block
+ const uint32_t total_blocks = (num_vals + 4) / 5;
+ for (uint32_t b = 0; b < total_blocks; b++)
+ {
+ const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);
+ decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
+ }
+ }
+ else if (g_ise_range_table[ise_range][2])
+ {
+ // Quints+bits, 3 vals per block, 8 bits extra per block
+ const uint32_t total_blocks = (num_vals + 2) / 3;
+ for (uint32_t b = 0; b < total_blocks; b++)
+ {
+ const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);
+ decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
+ }
+ }
+ else
+ {
+ assert(bits_per_val);
+
+ // Only bits
+ for (uint32_t i = 0; i < num_vals; i++)
+ pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);
+ }
+ }
+
+ void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)
+ {
+ const uint128 bits(
+ (uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),
+ (uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));
+
+ return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);
+ }
+
+ // Decodes a physical ASTC block to a logical ASTC block.
+ // blk_width/blk_height are only used to validate the weight grid's dimensions.
+ bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)
+ {
+ assert(is_valid_block_size(blk_width, blk_height));
+
+ const uint8_t* pS = (uint8_t*)pASTC_block;
+
+ log_blk.clear();
+ log_blk.m_error_flag = true;
+
+ const uint128 bits(
+ (uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),
+ (uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));
+
+ const uint128 rev_bits(bits.get_reversed_bits());
+
+ if (!decode_config(bits, log_blk))
+ return false;
+
+ if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)
+ {
+ // Void extent
+ log_blk.m_error_flag = false;
+ return true;
+ }
+
+ // Check grid dimensions
+ if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
+ return false;
+
+ // Now we have the grid width/height, dual plane, weight ISE range
+
+ const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);
+ const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);
+
+ // 18.24 Illegal Encodings
+ if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
+ return false;
+
+ const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;
+
+ uint32_t total_extra_bits = 0;
+
+ // Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.
+
+ log_blk.m_num_partitions = bits.get_bits(11, 2) + 1;
+ if (log_blk.m_num_partitions == 1)
+ log_blk.m_color_endpoint_modes[0] = bits.get_bits(13, 4); // read CEM bits
+ else
+ {
+ // 2 or more partitions
+ if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
+ return false;
+
+ log_blk.m_partition_id = bits.get_bits(13, 10);
+
+ uint32_t cem_bits = bits.get_bits(23, 6);
+
+ if ((cem_bits & 3) == 0)
+ {
+ // All CEM's the same
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ log_blk.m_color_endpoint_modes[i] = cem_bits >> 2;
+ }
+ else
+ {
+ // CEM's different, but within up to 2 adjacent classes
+ const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;
+
+ total_extra_bits = 3 * log_blk.m_num_partitions - 4;
+
+ if ((total_weight_bits + total_extra_bits) > 128)
+ return false;
+
+ uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
+
+ uint32_t c[4] = { 0 }, m[4] = { 0 };
+
+ cem_bits >>= 2;
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)
+ c[i] = cem_bits & 1;
+
+ switch (log_blk.m_num_partitions)
+ {
+ case 2:
+ {
+ m[0] = cem_bits & 3;
+ m[1] = bits.next_bits(cem_bit_pos, 2);
+ break;
+ }
+ case 3:
+ {
+ m[0] = cem_bits & 1;
+ m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);
+ m[1] = bits.next_bits(cem_bit_pos, 2);
+ m[2] = bits.next_bits(cem_bit_pos, 2);
+ break;
+ }
+ case 4:
+ {
+ for (uint32_t i = 0; i < 4; i++)
+ m[i] = bits.next_bits(cem_bit_pos, 2);
+ break;
+ }
+ default:
+ {
+ assert(0);
+ break;
+ }
+ }
+
+ assert(cem_bit_pos == end_of_weight_bit_ofs);
+
+ for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
+ {
+ log_blk.m_color_endpoint_modes[i] = first_cem_index + (c[i] * 4) + m[i];
+ assert(log_blk.m_color_endpoint_modes[i] <= 15);
+ }
+ }
+ }
+
+ // Now we have all the CEM indices.
+
+ if (log_blk.m_dual_plane)
+ {
+ // Read CCS bits, beneath any CEM bits
+ total_extra_bits += 2;
+
+ if (total_extra_bits > end_of_weight_bit_ofs)
+ return false;
+
+ uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
+ log_blk.m_color_component_selector = bits.get_bits(ccs_bit_pos, 2);
+ }
+
+ uint32_t config_bit_pos = 11 + 2; // config+num_parts
+ if (log_blk.m_num_partitions == 1)
+ config_bit_pos += 4; // CEM bits
+ else
+ config_bit_pos += 10 + 6; // part_id+CEM bits
+
+ // config+num_parts+total_extra_bits (CEM extra+CCS)
+ uint32_t total_config_bits = config_bit_pos + total_extra_bits;
+
+ // Compute number of remaining bits in block
+ const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
+ if (num_remaining_bits < 0)
+ return false;
+
+ // Compute total number of ISE encoded color endpoint mode values
+ uint32_t total_cem_vals = 0;
+ for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)
+ total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);
+
+ if (total_cem_vals > MAX_ENDPOINTS)
+ return false;
+
+ // Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block
+ int endpoint_ise_range = -1;
+ for (int k = 20; k > 0; k--)
+ {
+ int b = get_ise_sequence_bits(total_cem_vals, k);
+ if (b <= num_remaining_bits)
+ {
+ endpoint_ise_range = k;
+ break;
+ }
+ }
+
+ // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
+ if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
+ return false;
+
+ log_blk.m_endpoint_ise_range = endpoint_ise_range;
+
+ // Decode endpoints forwards in block
+ decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
+
+ // Decode grid weights backwards in block
+ decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);
+
+ log_blk.m_error_flag = false;
+
+ return true;
+ }
+
+} // namespace astc_helpers
+
+#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION
diff --git a/thirdparty/basis_universal/transcoder/basisu_containers.h b/thirdparty/basis_universal/transcoder/basisu_containers.h
index d3e14369ba..bfc51bb499 100644
--- a/thirdparty/basis_universal/transcoder/basisu_containers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers.h
@@ -188,8 +188,9 @@ namespace basisu
#define BASISU_IS_SCALAR_TYPE(T) (scalar_type<T>::cFlag)
-#if defined(__GNUC__) && __GNUC__<5
- #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#if !defined(BASISU_HAVE_STD_TRIVIALLY_COPYABLE) && defined(__GNUC__) && __GNUC__<5
+ //#define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+ #define BASISU_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__)
#else
#define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
#endif
@@ -286,8 +287,19 @@ namespace basisu
if (BASISU_IS_BITWISE_COPYABLE(T))
{
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
if ((m_p) && (other.m_p))
memcpy(m_p, other.m_p, m_size * sizeof(T));
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
}
else
{
@@ -330,8 +342,19 @@ namespace basisu
if (BASISU_IS_BITWISE_COPYABLE(T))
{
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
if ((m_p) && (other.m_p))
memcpy(m_p, other.m_p, other.m_size * sizeof(T));
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
}
else
{
@@ -501,7 +524,7 @@ namespace basisu
if (new_capacity > m_capacity)
{
- if (!increase_capacity(new_capacity, false))
+ if (!increase_capacity(new_capacity, false, true))
return false;
}
else if (new_capacity < m_capacity)
@@ -509,7 +532,8 @@ namespace basisu
// Must work around the lack of a "decrease_capacity()" method.
// This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize.
vector tmp;
- tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false);
+ if (!tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false, true))
+ return false;
tmp = *this;
swap(tmp);
}
@@ -750,7 +774,21 @@ namespace basisu
}
// Copy "down" the objects to preserve, filling in the empty slots.
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
+
memmove(pDst, pSrc, num_to_move * sizeof(T));
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
}
else
{
@@ -1003,7 +1041,21 @@ namespace basisu
inline void set_all(const T& o)
{
if ((sizeof(T) == 1) && (scalar_type<T>::cFlag))
+ {
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#endif
memset(m_p, *reinterpret_cast<const uint8_t*>(&o), m_size);
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
+ }
else
{
T* pDst = m_p;
@@ -1029,7 +1081,7 @@ namespace basisu
// Important: This method is used in Basis Universal. If you change how this container allocates memory, you'll need to change any users of this method.
inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity)
{
- // To to prevent the caller from obviously shooting themselves in the foot.
+ // To prevent the caller from obviously shooting themselves in the foot.
if (((p + capacity) > m_p) && (p < (m_p + m_capacity)))
{
// Can grant ownership of a block inside the container itself!
diff --git a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
index d5cb61569b..60c0b3d89f 100644
--- a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
@@ -19,23 +19,30 @@ namespace basisu
if (m_capacity >= min_new_capacity)
return true;
- size_t new_capacity = min_new_capacity;
- if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity)))
- {
- new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity);
-
- assert(new_capacity && (new_capacity > m_capacity));
+ uint64_t new_capacity_u64 = min_new_capacity;
+ if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64)))
+ new_capacity_u64 = helpers::next_pow2(new_capacity_u64);
- if (new_capacity < min_new_capacity)
- {
- if (nofail)
- return false;
- fprintf(stderr, "vector too large\n");
- abort();
- }
+ size_t new_capacity = (size_t)new_capacity_u64;
+ if (new_capacity != new_capacity_u64)
+ {
+ if (nofail)
+ return false;
+ fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
+ abort();
}
- const size_t desired_size = element_size * new_capacity;
+ const uint64_t desired_size_u64 = (uint64_t)element_size * new_capacity;
+
+ const size_t desired_size = (size_t)desired_size_u64;
+ if (desired_size_u64 != desired_size)
+ {
+ if (nofail)
+ return false;
+ fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
+ abort();
+ }
+
size_t actual_size = 0;
if (!pMover)
{
@@ -46,11 +53,7 @@ namespace basisu
return false;
char buf[256];
-#ifdef _MSC_VER
- sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
- sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+ snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size);
fprintf(stderr, "%s", buf);
abort();
}
@@ -75,11 +78,7 @@ namespace basisu
return false;
char buf[256];
-#ifdef _MSC_VER
- sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
- sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+ snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size);
fprintf(stderr, "%s", buf);
abort();
}
diff --git a/thirdparty/basis_universal/transcoder/basisu_file_headers.h b/thirdparty/basis_universal/transcoder/basisu_file_headers.h
index 4316d738e6..d29e3feb03 100644
--- a/thirdparty/basis_universal/transcoder/basisu_file_headers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_file_headers.h
@@ -1,5 +1,5 @@
// basis_file_headers.h
-// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -89,7 +89,8 @@ namespace basist
enum class basis_tex_format
{
cETC1S = 0,
- cUASTC4x4 = 1
+ cUASTC4x4 = 1,
+ cUASTC_HDR_4x4 = 2
};
struct basis_file_header
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
index c698861f3b..32018cd282 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
@@ -1,5 +1,5 @@
// basisu_transcoder.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -17,6 +17,11 @@
#include <limits.h>
#include "basisu_containers_impl.h"
+#define BASISU_ASTC_HELPERS_IMPLEMENTATION
+#include "basisu_astc_helpers.h"
+
+#include "basisu_astc_hdr_core.h"
+
#ifndef BASISD_IS_BIG_ENDIAN
// TODO: This doesn't work on OSX. How can this be so difficult?
//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
@@ -139,6 +144,10 @@
#endif
#endif
+#ifndef BASISD_SUPPORT_UASTC_HDR
+ #define BASISD_SUPPORT_UASTC_HDR 1
+#endif
+
#define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0
#define BASISD_WRITE_NEW_DXT1_TABLES 0
#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0
@@ -1908,17 +1917,24 @@ namespace basist
void basisu_transcoder_init()
{
if (g_transcoder_initialized)
- {
- BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
+ {
+ BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
return;
- }
+ }
- BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
+ BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
#if BASISD_SUPPORT_UASTC
uastc_init();
#endif
+#if BASISD_SUPPORT_UASTC_HDR
+ // TODO: Examine this, optimize for startup time/mem utilization.
+ astc_helpers::init_tables(false);
+
+ astc_hdr_core_init();
+#endif
+
#if BASISD_SUPPORT_ASTC
transcoder_init_astc();
#endif
@@ -2027,6 +2043,10 @@ namespace basist
transcoder_init_pvrtc2();
#endif
+#if BASISD_SUPPORT_UASTC_HDR
+ bc6h_enc_init();
+#endif
+
g_transcoder_initialized = true;
}
@@ -6928,7 +6948,7 @@ namespace basist
static inline int sq(int x) { return x * x; }
- // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0.
+ // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0.
// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha!
// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
@@ -7515,6 +7535,8 @@ namespace basist
}
#endif // BASISD_SUPPORT_PVRTC2
+ //------------------------------------------------------------------------------------------------
+
basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
m_pGlobal_codebook(nullptr),
m_selector_history_buf_size(0)
@@ -8620,7 +8642,7 @@ namespace basist
// Now make sure the output buffer is large enough, or we'll overwrite memory.
if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
return false;
}
}
@@ -8632,7 +8654,7 @@ namespace basist
if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
return false;
}
}
@@ -8640,7 +8662,7 @@ namespace basist
{
if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
{
- BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
+ BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
return false;
}
}
@@ -9242,13 +9264,17 @@ namespace basist
return status;
}
+
+ //------------------------------------------------------------------------------------------------
basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
{
}
- bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
- uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+ bool basisu_lowlevel_uastc_transcoder::transcode_slice(
+ void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
+ const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
{
BASISU_NOTE_UNUSED(pState);
@@ -9784,6 +9810,317 @@ namespace basist
return status;
}
+
+ //------------------------------------------------------------------------------------------------
+
+ basisu_lowlevel_uastc_hdr_transcoder::basisu_lowlevel_uastc_hdr_transcoder()
+ {
+ }
+
+ bool basisu_lowlevel_uastc_hdr_transcoder::transcode_slice(
+ void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
+ const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+ basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
+ {
+ BASISU_NOTE_UNUSED(pState);
+ BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
+ BASISU_NOTE_UNUSED(has_alpha);
+ BASISU_NOTE_UNUSED(channel0);
+ BASISU_NOTE_UNUSED(channel1);
+ BASISU_NOTE_UNUSED(decode_flags);
+
+ assert(g_transcoder_initialized);
+ if (!g_transcoder_initialized)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder not globally initialized.\n");
+ return false;
+ }
+
+#if BASISD_SUPPORT_UASTC_HDR
+ const uint32_t total_blocks = num_blocks_x * num_blocks_y;
+
+ if (!output_row_pitch_in_blocks_or_pixels)
+ {
+ if (basis_block_format_is_uncompressed(fmt))
+ output_row_pitch_in_blocks_or_pixels = orig_width;
+ else
+ output_row_pitch_in_blocks_or_pixels = num_blocks_x;
+ }
+
+ if (basis_block_format_is_uncompressed(fmt))
+ {
+ if (!output_rows_in_pixels)
+ output_rows_in_pixels = orig_height;
+ }
+
+ uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
+ if (image_data_size < total_expected_block_bytes)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
+ return false;
+ }
+
+ const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
+
+ bool status = false;
+
+ // TODO: Optimize pure memcpy() case.
+
+ for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
+ {
+ void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
+
+ for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
+ {
+ switch (fmt)
+ {
+ case block_format::cUASTC_HDR_4x4:
+ case block_format::cASTC_HDR_4x4:
+ {
+ // Nothing to do, UASTC HDR is just ASTC.
+ memcpy(pDst_block, pSource_block, sizeof(uastc_block));
+ status = true;
+ break;
+ }
+ case block_format::cBC6H:
+ {
+ status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
+ break;
+ }
+ case block_format::cRGB_9E5:
+ {
+ astc_helpers::log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
+ static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
+ );
+
+ uint32_t blk_texels[4][4];
+
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
+
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels;
+ } // y
+ }
+ }
+
+ break;
+ }
+ case block_format::cRGBA_HALF:
+ {
+ astc_helpers::log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ half_float* pDst_pixels = reinterpret_cast<half_float*>(
+ static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
+ );
+
+ half_float blk_texels[4][4][4];
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ for (uint32_t x = 0; x < max_x; x++)
+ {
+ pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
+ pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
+ pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
+ pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
+ } // x
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
+ } // y
+ }
+ }
+
+ break;
+ }
+ case block_format::cRGB_HALF:
+ {
+ astc_helpers:: log_astc_block log_blk;
+ status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+ if (status)
+ {
+ half_float* pDst_pixels =
+ reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
+
+ half_float blk_texels[4][4][4];
+ status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+ if (status)
+ {
+ const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+ const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+ for (uint32_t y = 0; y < max_y; y++)
+ {
+ for (uint32_t x = 0; x < max_x; x++)
+ {
+ pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
+ pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
+ pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
+ } // x
+
+ pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
+ } // y
+ }
+ }
+
+ break;
+ }
+ default:
+ assert(0);
+ break;
+
+ }
+
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); return false;
+ }
+
+ } // block_x
+
+ } // block_y
+
+ return true;
+#else
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
+
+ BASISU_NOTE_UNUSED(decode_flags);
+ BASISU_NOTE_UNUSED(channel0);
+ BASISU_NOTE_UNUSED(channel1);
+ BASISU_NOTE_UNUSED(output_rows_in_pixels);
+ BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
+ BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
+ BASISU_NOTE_UNUSED(fmt);
+ BASISU_NOTE_UNUSED(image_data_size);
+ BASISU_NOTE_UNUSED(pImage_data);
+ BASISU_NOTE_UNUSED(num_blocks_x);
+ BASISU_NOTE_UNUSED(num_blocks_y);
+ BASISU_NOTE_UNUSED(pDst_blocks);
+
+ return false;
+#endif
+ }
+
+ bool basisu_lowlevel_uastc_hdr_transcoder::transcode_image(
+ transcoder_texture_format target_format,
+ void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+ const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+ uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+ uint32_t slice_offset, uint32_t slice_length,
+ uint32_t decode_flags,
+ bool has_alpha,
+ bool is_video,
+ uint32_t output_row_pitch_in_blocks_or_pixels,
+ basisu_transcoder_state* pState,
+ uint32_t output_rows_in_pixels,
+ int channel0, int channel1)
+ {
+ BASISU_NOTE_UNUSED(is_video);
+ BASISU_NOTE_UNUSED(level_index);
+ BASISU_NOTE_UNUSED(decode_flags);
+
+ if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: source data buffer too small\n");
+ return false;
+ }
+
+ const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
+ const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
+
+ if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: output buffer size too small\n");
+ return false;
+ }
+
+ bool status = false;
+
+ switch (target_format)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFBC6H:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGB_HALF:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGBA_HALF:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+ }
+ break;
+ }
+ case transcoder_texture_format::cTFRGB_9E5:
+ {
+ status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
+ bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ if (!status)
+ {
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+ }
+ break;
+ }
+ default:
+ {
+ assert(0);
+ BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: Invalid format\n");
+ break;
+ }
+ }
+
+ return status;
+ }
+
+ //------------------------------------------------------------------------------------------------
basisu_transcoder::basisu_transcoder() :
m_ready_to_transcode(false)
@@ -10390,7 +10727,7 @@ namespace basist
}
else
{
- // Nothing special to do for UASTC.
+ // Nothing special to do for UASTC/UASTC HDR.
if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
{
m_lowlevel_etc1s_decoder.clear();
@@ -10510,7 +10847,14 @@ namespace basist
return false;
}
- if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+ if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+ {
+ return m_lowlevel_uastc_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
+ pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
+ fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
+ output_rows_in_pixels, channel0, channel1, decode_flags);
+ }
+ else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
{
return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
@@ -10742,7 +11086,18 @@ namespace basist
memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
}
- if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+ if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+ {
+ const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
+
+ // Use the container independent image transcode method.
+ status = m_lowlevel_uastc_hdr_decoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
+ pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
+ decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+ }
+ else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
{
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
@@ -10808,20 +11163,27 @@ namespace basist
return 8;
case transcoder_texture_format::cTFBC7_RGBA:
case transcoder_texture_format::cTFBC7_ALT:
+ case transcoder_texture_format::cTFBC6H:
case transcoder_texture_format::cTFETC2_RGBA:
case transcoder_texture_format::cTFBC3_RGBA:
case transcoder_texture_format::cTFBC5_RG:
case transcoder_texture_format::cTFASTC_4x4_RGBA:
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFFXT1_RGB:
case transcoder_texture_format::cTFETC2_EAC_RG11:
return 16;
case transcoder_texture_format::cTFRGBA32:
+ case transcoder_texture_format::cTFRGB_9E5:
return sizeof(uint32_t);
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
return sizeof(uint16_t);
+ case transcoder_texture_format::cTFRGB_HALF:
+ return sizeof(half_float) * 3;
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return sizeof(half_float) * 4;
default:
assert(0);
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10845,17 +11207,22 @@ namespace basist
case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_RGBA";
case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
case transcoder_texture_format::cTFRGBA32: return "RGBA32";
case transcoder_texture_format::cTFRGB565: return "RGB565";
case transcoder_texture_format::cTFBGR565: return "BGR565";
case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
+ case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
+ case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
+ case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
+ case transcoder_texture_format::cTFBC6H: return "BC6H";
default:
assert(0);
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10881,7 +11248,13 @@ namespace basist
case block_format::cRGB565: return "RGB565";
case block_format::cBGR565: return "BGR565";
case block_format::cRGBA4444: return "RGBA4444";
+ case block_format::cRGBA_HALF: return "RGBA_HALF";
+ case block_format::cRGB_HALF: return "RGB_HALF";
+ case block_format::cRGB_9E5: return "RGB_9E5";
case block_format::cUASTC_4x4: return "UASTC_4x4";
+ case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
+ case block_format::cBC6H: return "BC6H";
+ case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
case block_format::cFXT1_RGB: return "FXT1_RGB";
case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
@@ -10914,11 +11287,13 @@ namespace basist
bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
{
+ // TODO: Technically ASTC_HDR does support alpha, but UASTC_HDR doesn't yet support it. Unsure what to do here.
switch (fmt)
{
case transcoder_texture_format::cTFETC2_RGBA:
case transcoder_texture_format::cTFBC3_RGBA:
case transcoder_texture_format::cTFASTC_4x4_RGBA:
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
case transcoder_texture_format::cTFBC7_RGBA:
case transcoder_texture_format::cTFBC7_ALT:
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
@@ -10926,6 +11301,23 @@ namespace basist
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFRGBA32:
case transcoder_texture_format::cTFRGBA4444:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ }
+
+ bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
+ {
+ switch (fmt)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return true;
default:
break;
@@ -10947,13 +11339,18 @@ namespace basist
case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
- case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4;
+ case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
+ case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
+ case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
+ case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
+ case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
@@ -10975,6 +11372,9 @@ namespace basist
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return true;
default:
break;
@@ -10995,6 +11395,9 @@ namespace basist
case block_format::cRGBA4444_COLOR:
case block_format::cRGBA4444_ALPHA:
case block_format::cRGBA4444_COLOR_OPAQUE:
+ case block_format::cRGBA_HALF:
+ case block_format::cRGB_HALF:
+ case block_format::cRGB_9E5:
return true;
default:
break;
@@ -11007,11 +11410,16 @@ namespace basist
switch (fmt)
{
case transcoder_texture_format::cTFRGBA32:
+ case transcoder_texture_format::cTFRGB_9E5:
return sizeof(uint32_t);
case transcoder_texture_format::cTFRGB565:
case transcoder_texture_format::cTFBGR565:
case transcoder_texture_format::cTFRGBA4444:
return sizeof(uint16_t);
+ case transcoder_texture_format::cTFRGB_HALF:
+ return sizeof(half_float) * 3;
+ case transcoder_texture_format::cTFRGBA_HALF:
+ return sizeof(half_float) * 4;
default:
break;
}
@@ -11038,8 +11446,26 @@ namespace basist
bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
{
- if (fmt == basis_tex_format::cUASTC4x4)
+ if (fmt == basis_tex_format::cUASTC_HDR_4x4)
+ {
+ // UASTC HDR
+#if BASISD_SUPPORT_UASTC_HDR
+ switch (tex_type)
+ {
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
+ return true;
+ default:
+ break;
+ }
+#endif
+ }
+ else if (fmt == basis_tex_format::cUASTC4x4)
{
+ // UASTC LDR
#if BASISD_SUPPORT_UASTC
switch (tex_type)
{
@@ -11049,6 +11475,12 @@ namespace basist
case transcoder_texture_format::cTFATC_RGB:
case transcoder_texture_format::cTFATC_RGBA:
case transcoder_texture_format::cTFFXT1_RGB:
+ // UASTC LDR doesn't support transcoding to HDR formats
+ case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+ case transcoder_texture_format::cTFBC6H:
+ case transcoder_texture_format::cTFRGBA_HALF:
+ case transcoder_texture_format::cTFRGB_HALF:
+ case transcoder_texture_format::cTFRGB_9E5:
return false;
default:
return true;
@@ -11057,6 +11489,7 @@ namespace basist
}
else
{
+ // ETC1S
switch (tex_type)
{
// ETC1 and uncompressed are always supported.
@@ -11812,7 +12245,7 @@ namespace basist
// Encodes 3 values to output, usable for any range that uses quints and bits
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
{
- // First extract the trits and the bits from the 5 input values
+ // First extract the quints and the bits from the 3 input values
int quints = 0, bits[3];
const uint32_t bit_mask = (1 << n) - 1;
for (int i = 0; i < 3; i++)
@@ -12131,11 +12564,13 @@ namespace basist
return bits & ((1U << codesize) - 1U);
}
-
- uint32_t byte_bit_offset = bit_offset & 7U;
- const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
- bit_offset += codesize;
- return (w >> byte_bit_offset)& ((1U << codesize) - 1U);
+ else
+ {
+ uint32_t byte_bit_offset = bit_offset & 7U;
+ const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
+ bit_offset += codesize;
+ return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
+ }
}
bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
@@ -12170,6 +12605,7 @@ namespace basist
return false;
unpacked.m_mode = mode;
+ unpacked.m_common_pattern = 0;
uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
@@ -16663,10 +17099,12 @@ namespace basist
memcpy(&m_header, pData, sizeof(m_header));
- // We only support UASTC and ETC1S
- if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED)
+ // We only support UASTC LDR, UASTC HDR and ETC1S.
+ // Note the DFD's contents are what we are guided by for decoding the KTX2 file, not this format field (currently).
+ if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) &&
+ (m_header.m_vk_format != basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK))
{
- BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n");
+ BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
return false;
}
@@ -16890,6 +17328,16 @@ namespace basist
// We're assuming "DATA" means RGBA so it has alpha.
m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
}
+ else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR)
+ {
+ m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
+
+ m_dfd_samples = 1;
+ m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
+
+ // We're assuming "DATA" means RGBA so it has alpha.
+ m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
+ }
else
{
// Unsupported DFD color model.
@@ -17167,7 +17615,8 @@ namespace basist
return false;
}
}
- else if (m_format == basist::basis_tex_format::cUASTC4x4)
+ else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
+ (m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
{
// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
@@ -17188,14 +17637,29 @@ namespace basist
return false;
}
- if (!m_uastc_transcoder.transcode_image(fmt,
- pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
- (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
- 0, (uint32_t)total_2D_image_size,
- decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
{
- BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
- return false;
+ if (!m_uastc_hdr_transcoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+ 0, (uint32_t)total_2D_image_size,
+ decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ {
+ BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+ return false;
+ }
+ }
+ else
+ {
+ if (!m_uastc_transcoder.transcode_image(fmt,
+ pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+ (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+ 0, (uint32_t)total_2D_image_size,
+ decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+ {
+ BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+ return false;
+ }
}
}
else
@@ -17476,4 +17940,1531 @@ namespace basist
#endif
}
+ //-------------------------------
+
+#ifdef BASISD_SUPPORT_UASTC_HDR
+ // This float->half conversion matches how "F32TO16" works on Intel GPU's.
+ basist::half_float float_to_half(float val)
+ {
+ union { float f; int32_t i; uint32_t u; } fi = { val };
+ const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
+ int s = flt_s, e = 0, m = 0;
+
+ // inf/NaN
+ if (flt_e == 0xff)
+ {
+ e = 31;
+ if (flt_m != 0) // NaN
+ m = 1;
+ }
+ // not zero or denormal
+ else if (flt_e != 0)
+ {
+ int new_exp = flt_e - 127;
+ if (new_exp > 15)
+ e = 31;
+ else if (new_exp < -14)
+ m = lrintf((1 << 24) * fabsf(fi.f));
+ else
+ {
+ e = new_exp + 15;
+ m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
+ }
+ }
+
+ assert((0 <= m) && (m <= 1024));
+ if (m == 1024)
+ {
+ e++;
+ m = 0;
+ }
+
+ assert((s >= 0) && (s <= 1));
+ assert((e >= 0) && (e <= 31));
+ assert((m >= 0) && (m <= 1023));
+
+ basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
+ return result;
+ }
+
+ //------------------------------------------------------------------------------------------------
+ // HDR support
+ //
+ // Originally from bc6h_enc.cpp
+ // BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
+
+ const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
+ {
+ // 2 subsets
+ { 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs
+ { 7, 6, 6, 6, }, // 1
+ { 11, 5, 4, 4, }, // 2
+ { 11, 4, 5, 4, }, // 3
+ { 11, 4, 4, 5, }, // 4
+ { 9, 5, 5, 5, }, // 5
+ { 8, 6, 5, 5, }, // 6
+ { 8, 5, 6, 5, }, // 7
+ { 8, 5, 5, 6, }, // 8
+ { 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs
+ // 1 subset
+ { 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
+ { 11, 9, 9, 9, }, // 11
+ { 12, 8, 8, 8, }, // 12
+ { 16, 4, 4, 4, } // 13, also useful for solid blocks
+ };
+
+ const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
+
+ const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
+ {
+ // comp_index, subset*2+lh_index, last_bit, first_bit
+ //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta
+ { { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
+ { 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
+ { 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
+ { { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
+ { 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
+ { 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
+ { 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
+ { 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
+ { 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
+ { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
+ { 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
+ { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
+ { { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
+ { 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
+ { 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
+ { { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+ { 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
+ { { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+ { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
+ { { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
+ { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
+ { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
+ { { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
+ { 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
+ { 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
+ //------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
+ //------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
+ //------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
+ { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
+ };
+
+ // The same as the first 32 2-subset patterns in BC7.
+ // Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
+ const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
+ {
+ { {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
+ { {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
+ { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
+ { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
+ { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
+ { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} },
+ { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
+ { {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
+ { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
+ { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
+ };
+
+ const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+ const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+
+ struct bc6h_logical_block
+ {
+ uint32_t m_mode;
+ uint32_t m_partition_pattern; // must be 0 if 1 subset
+ uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed
+ uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0
+
+ void clear()
+ {
+ basisu::clear_obj(*this);
+ }
+ };
+
+ static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+ {
+ assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+ assert(val < (1ULL << num_bits));
+
+ if (bit_pos < 64)
+ {
+ l |= (val << bit_pos);
+
+ if ((bit_pos + num_bits) > 64)
+ h |= (val >> (64 - bit_pos));
+ }
+ else
+ {
+ h |= (val << (bit_pos - 64));
+ }
+
+ bit_pos += num_bits;
+ assert(bit_pos <= 128);
+ }
+
+ static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+ {
+ assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+ assert(val < (1ULL << num_bits));
+
+ for (uint32_t i = 0; i < num_bits; i++)
+ write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
+ }
+
+ static void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
+ {
+ const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
+
+ const uint32_t mode = log_blk.m_mode;
+ assert(mode < NUM_BC6H_MODES);
+
+ uint64_t l = s_mode_bits[mode], h = 0;
+ uint32_t bit_pos = (mode >= 2) ? 5 : 2;
+
+ const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
+
+ assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
+ ((num_subsets == 1) && (!log_blk.m_partition_pattern)));
+
+ // Sanity checks
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits
+ assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
+ assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
+ assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
+ }
+
+ const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
+
+ while (pLayout->m_comp != -1)
+ {
+ uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
+
+ if (pLayout->m_first_bit == -1)
+ {
+ write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
+ }
+ else
+ {
+ const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
+
+ v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
+ v &= ((1 << total_bits) - 1);
+
+ if (pLayout->m_first_bit > pLayout->m_last_bit)
+ write_rev_bits(v, total_bits, bit_pos, l, h);
+ else
+ write_bits(v, total_bits, bit_pos, l, h);
+ }
+
+ pLayout++;
+ }
+
+ const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ const uint32_t sel = log_blk.m_weights[i];
+
+ uint32_t num_bits = num_mode_sel_bits;
+ if (num_subsets == 2)
+ {
+ const uint32_t subset_index = pPat[i];
+ num_bits -= (subset_index >> 7);
+ }
+ else if (!i)
+ {
+ num_bits--;
+ }
+
+ assert(sel < (1u << num_bits));
+
+ write_bits(sel, num_bits, bit_pos, l, h);
+ }
+
+ assert(bit_pos == 128);
+
+ basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
+ basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
+ basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
+ basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
+ }
+
+#if 0
+ static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
+ {
+ int unq;
+
+ if (bits_per_comp >= 15)
+ unq = comp;
+ else if (comp == 0)
+ unq = 0;
+ else if (comp == ((1u << bits_per_comp) - 1u))
+ unq = 0xFFFFu;
+ else
+ unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
+
+ return unq;
+ }
+#endif
+
+ // Suboptimal, but very close.
+ static inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
+ {
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+ return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
+ }
+
+ // 6,7,8,9,10,11,12
+ const uint32_t BC6H_BLOG_TAB_MIN = 6;
+ const uint32_t BC6H_BLOG_TAB_MAX = 12;
+ //const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
+
+ // Handles 16, or 6-12 bits. Others assert.
+ static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
+ {
+ BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
+ BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
+
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+
+ if (num_bits == 16)
+ {
+ return bc6h_half_to_blog(h, 16);
+ }
+ else
+ {
+ assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
+
+ // Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
+ return bc6h_half_to_blog(h, num_bits);
+ }
+ }
+
+ bool g_bc6h_enc_initialized;
+
+ void bc6h_enc_init()
+ {
+ if (g_bc6h_enc_initialized)
+ return;
+
+ g_bc6h_enc_initialized = true;
+ }
+
+ // mode 10, 4-bit weights
+ void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 15);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
+ log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ if (log_blk.m_weights[0] & 8)
+ {
+ for (uint32_t i = 0; i < 16; i++)
+ log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+ }
+ }
+
+ log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ // Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
+ void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 15);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
+ {
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ const int delta_bitmask = (1 << num_delta_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ assert(num_delta_bits < num_base_bits);
+ assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
+
+ uint32_t blog_endpoints[3][2];
+
+ // Convert half endpoints to blog 16, 12, or 11
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+ assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+ blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+ assert((int)blog_endpoints[c][1] <= base_bitmask);
+ }
+
+ // Copy weights
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ // Ensure first weight MSB is 0
+ if (log_blk.m_weights[0] & 8)
+ {
+ // Invert weights
+ for (uint32_t i = 0; i < 16; i++)
+ log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+ // Swap blog quantized endpoints
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+ }
+ }
+
+ const int max_delta = (1 << (num_delta_bits - 1)) - 1;
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmask);
+
+ bool failed_flag = false;
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ if ((delta < min_delta) || (delta > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta & delta_bitmask;
+ }
+
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ return;
+ }
+
+ // Worst case fall back to mode 10, which can handle any endpoints
+ bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
+ }
+
+ // Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
+ void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
+ log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
+
+ log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
+ log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ const uint32_t pat_index = 0;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ if (log_blk.m_weights[0] & 4)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (log_blk.m_weights[15] & 4)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ log_blk.m_mode = 9;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ // Tries modes 0-8, falls back to mode 9
+ void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+ {
+ static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
+ const uint32_t mode = s_mode_order[mode_iter];
+
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+ const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+ uint32_t blog_endpoints[3][4];
+
+ // Convert half endpoints to blog 7-11
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+ blog_endpoints[c][2] = blog_endpoints[c][0];
+ assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+ blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+ blog_endpoints[c][3] = blog_endpoints[c][1];
+ assert((int)blog_endpoints[c][1] <= base_bitmask);
+ }
+
+ const uint32_t pat_index = 0;
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ if (log_blk.m_weights[0] & 4)
+ {
+ // Swap part 0's endpoints/weights
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (log_blk.m_weights[15] & 4)
+ {
+ // Swap part 1's endpoints/weights
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ bool failed_flag = false;
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+ int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+ if ((delta0 < min_delta) || (delta0 > max_delta) ||
+ (delta1 < min_delta) || (delta1 > max_delta) ||
+ (delta2 < min_delta) || (delta2 > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+ if (failed_flag)
+ break;
+ }
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ return;
+
+ } // mode_iter
+
+ bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
+ }
+
+ // pEndpoints[subset][comp][lh_index]
+ void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+ assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
+ log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
+ }
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+ if (invert_flag)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
+ std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
+ }
+ }
+
+ const uint32_t pat_index = bc7_pattern;
+ assert(pat_index < 32);
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ bool swap_flags[2] = { false, false };
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if ((pPat[i] & 0x80) == 0)
+ continue;
+
+ if (log_blk.m_weights[i] & 4)
+ {
+ const uint32_t p = pPat[i] & 1;
+ swap_flags[p] = true;
+ }
+ }
+
+ if (swap_flags[0])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (swap_flags[1])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ log_blk.m_mode = 9;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+ }
+
+ void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+ {
+ assert(g_bc6h_enc_initialized);
+
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ assert(pWeights[i] <= 7);
+ }
+
+ bc6h_logical_block log_blk;
+ log_blk.clear();
+
+ for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+ {
+ static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
+ const uint32_t mode = s_mode_order[mode_iter];
+
+ const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+ const int base_bitmask = (1 << num_base_bits) - 1;
+ BASISU_NOTE_UNUSED(base_bitmask);
+
+ const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+ const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+ uint32_t blog_endpoints[3][4];
+
+ // Convert half endpoints to blog 7-11
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
+ blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
+ }
+ }
+
+ memcpy(log_blk.m_weights, pWeights, 16);
+
+ //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+ const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+ const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+ if (invert_flag)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
+ std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
+ }
+ }
+
+ const uint32_t pat_index = bc7_pattern;
+ assert(pat_index < 32);
+ const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+ bool swap_flags[2] = { false, false };
+ for (uint32_t i = 0; i < 16; i++)
+ {
+ if ((pPat[i] & 0x80) == 0)
+ continue;
+
+ if (log_blk.m_weights[i] & 4)
+ {
+ const uint32_t p = pPat[i] & 1;
+ swap_flags[p] = true;
+ }
+ }
+
+ if (swap_flags[0])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 0)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ if (swap_flags[1])
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+ for (uint32_t i = 0; i < 16; i++)
+ if ((pPat[i] & 0x7F) == 1)
+ log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+ }
+
+ // Try packing the endpoints
+ bool failed_flag = false;
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+ const int min_delta = -(max_delta + 1);
+ assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+ log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+ int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+ int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+ int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+ if ((delta0 < min_delta) || (delta0 > max_delta) ||
+ (delta1 < min_delta) || (delta1 > max_delta) ||
+ (delta2 < min_delta) || (delta2 > max_delta))
+ {
+ failed_flag = true;
+ break;
+ }
+
+ log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+ log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+ if (failed_flag)
+ break;
+ }
+ if (failed_flag)
+ continue;
+
+ log_blk.m_mode = mode;
+ log_blk.m_partition_pattern = pat_index;
+ pack_bc6h_block(*pPacked_block, log_blk);
+
+ //half_float blk[16 * 3];
+ //unpack_bc6h(pPacked_block, blk, false);
+
+ return;
+ }
+
+ bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
+ }
+
+ bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
+ {
+ assert(g_bc6h_enc_initialized);
+
+ if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
+ return false;
+
+ // ASTC block unpacker won't allow Inf/NaN's to come through.
+ //if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
+ // return false;
+
+ uint8_t weights[16];
+ memset(weights, 0, sizeof(weights));
+
+ half_float endpoints[3][2];
+ endpoints[0][0] = pColor[0];
+ endpoints[0][1] = pColor[0];
+
+ endpoints[1][0] = pColor[1];
+ endpoints[1][1] = pColor[1];
+
+ endpoints[2][0] = pColor[2];
+ endpoints[2][1] = pColor[2];
+
+ bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // basisu_astc_hdr_core.cpp
+
+ static bool g_astc_hdr_core_initialized;
+ static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void astc_hdr_core_init()
+ {
+ if (g_astc_hdr_core_initialized)
+ return;
+
+ memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
+
+ for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
+ {
+ const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
+ //const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
+
+ assert(astc_pattern < 1024);
+ g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
+ }
+
+ g_astc_hdr_core_initialized = true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ static inline int astc_hdr_sign_extend(int src, int num_src_bits)
+ {
+ assert(basisu::in_range(num_src_bits, 2, 31));
+
+ const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
+ if (negative)
+ return src | ~((1 << num_src_bits) - 1);
+ else
+ return src & ((1 << num_src_bits) - 1);
+ }
+
+ static inline void astc_hdr_pack_bit(
+ int& dst, int dst_bit,
+ int src_val, int src_bit = 0)
+ {
+ assert(dst_bit >= 0 && dst_bit <= 31);
+ int bit = basisu::get_bit(src_val, src_bit);
+ dst |= (bit << dst_bit);
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void decode_mode7_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale)
+ {
+ assert(g_astc_hdr_core_initialized);
+
+ for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+ {
+ assert(pEndpoints[i] <= 255);
+ }
+
+ const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
+
+ // Extract mode bits and unpack to major component and mode.
+ const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
+
+ int majcomp, mode;
+ if ((modeval & 0xC) != 0xC)
+ {
+ majcomp = modeval >> 2;
+ mode = modeval & 3;
+ }
+ else if (modeval != 0xF)
+ {
+ majcomp = modeval & 3;
+ mode = 4;
+ }
+ else
+ {
+ majcomp = 0;
+ mode = 5;
+ }
+
+ // Extract low-order bits of r, g, b, and s.
+ int red = v0 & 0x3f;
+ int green = v1 & 0x1f;
+ int blue = v2 & 0x1f;
+ int scale = v3 & 0x1f;
+
+ // Extract high-order bits, which may be assigned depending on mode
+ int x0 = (v1 >> 6) & 1;
+ int x1 = (v1 >> 5) & 1;
+ int x2 = (v2 >> 6) & 1;
+ int x3 = (v2 >> 5) & 1;
+ int x4 = (v3 >> 7) & 1;
+ int x5 = (v3 >> 6) & 1;
+ int x6 = (v3 >> 5) & 1;
+
+ // Now move the high-order xs into the right place.
+ const int ohm = 1 << mode;
+ if (ohm & 0x30) green |= x0 << 6;
+ if (ohm & 0x3A) green |= x1 << 5;
+ if (ohm & 0x30) blue |= x2 << 6;
+ if (ohm & 0x3A) blue |= x3 << 5;
+ if (ohm & 0x3D) scale |= x6 << 5;
+ if (ohm & 0x2D) scale |= x5 << 6;
+ if (ohm & 0x04) scale |= x4 << 7;
+ if (ohm & 0x3B) red |= x4 << 6;
+ if (ohm & 0x04) red |= x3 << 6;
+ if (ohm & 0x10) red |= x5 << 7;
+ if (ohm & 0x0F) red |= x2 << 7;
+ if (ohm & 0x05) red |= x1 << 8;
+ if (ohm & 0x0A) red |= x0 << 8;
+ if (ohm & 0x05) red |= x0 << 9;
+ if (ohm & 0x02) red |= x6 << 9;
+ if (ohm & 0x01) red |= x3 << 10;
+ if (ohm & 0x02) red |= x5 << 10;
+
+ // Shift the bits to the top of the 12-bit result.
+ static const int s_shamts[6] = { 1,1,2,3,4,5 };
+
+ const int shamt = s_shamts[mode];
+ red <<= shamt;
+ green <<= shamt;
+ blue <<= shamt;
+ scale <<= shamt;
+
+ // Minor components are stored as differences
+ if (mode != 5)
+ {
+ green = red - green;
+ blue = red - blue;
+ }
+
+ // Swizzle major component into place
+ if (majcomp == 1)
+ std::swap(red, green);
+
+ if (majcomp == 2)
+ std::swap(red, blue);
+
+ // Clamp output values, set alpha to 1.0
+ e[1][0] = basisu::clamp(red, 0, 0xFFF);
+ e[1][1] = basisu::clamp(green, 0, 0xFFF);
+ e[1][2] = basisu::clamp(blue, 0, 0xFFF);
+
+ e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
+ e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
+ e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
+
+ if (pScale)
+ *pScale = scale;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool decode_mode7_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ int* pScale,
+ uint32_t ise_endpoint_range)
+ {
+ assert(g_astc_hdr_core_initialized);
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
+ }
+ else
+ {
+ uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
+
+ for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+ dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+ decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
+ }
+
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ if (e[i][0] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][1] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][2] > (int)MAX_QLOG12)
+ return false;
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ void decode_mode11_to_qlog12_ise20(
+ const uint8_t* pEndpoints,
+ int e[2][3])
+ {
+#ifdef _DEBUG
+ for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+ {
+ assert(pEndpoints[i] <= 255);
+ }
+#endif
+
+ const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
+
+ if (maj_comp == 3)
+ {
+ // Direct, qlog8 and qlog7
+ e[0][0] = pEndpoints[0] << 4;
+ e[1][0] = pEndpoints[1] << 4;
+
+ e[0][1] = pEndpoints[2] << 4;
+ e[1][1] = pEndpoints[3] << 4;
+
+ e[0][2] = (pEndpoints[4] & 127) << 5;
+ e[1][2] = (pEndpoints[5] & 127) << 5;
+ }
+ else
+ {
+ int v0 = pEndpoints[0];
+ int v1 = pEndpoints[1];
+ int v2 = pEndpoints[2];
+ int v3 = pEndpoints[3];
+ int v4 = pEndpoints[4];
+ int v5 = pEndpoints[5];
+
+ int mode = 0;
+ astc_hdr_pack_bit(mode, 0, v1, 7);
+ astc_hdr_pack_bit(mode, 1, v2, 7);
+ astc_hdr_pack_bit(mode, 2, v3, 7);
+
+ int va = v0;
+ astc_hdr_pack_bit(va, 8, v1, 6);
+
+ int vb0 = v2 & 63;
+ int vb1 = v3 & 63;
+ int vc = v1 & 63;
+
+ int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
+ int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
+ static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
+ vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
+ vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
+
+ int x0 = basisu::get_bit(v2, 6);
+ int x1 = basisu::get_bit(v3, 6);
+ int x2 = basisu::get_bit(v4, 6);
+ int x3 = basisu::get_bit(v5, 6);
+ int x4 = basisu::get_bit(v4, 5);
+ int x5 = basisu::get_bit(v5, 5);
+
+ const uint32_t ohm = 1U << mode;
+ if (ohm & 0xA4) va |= (x0 << 9);
+ if (ohm & 0x08) va |= (x2 << 9);
+ if (ohm & 0x50) va |= (x4 << 9);
+ if (ohm & 0x50) va |= (x5 << 10);
+ if (ohm & 0xA0) va |= (x1 << 10);
+ if (ohm & 0xC0) va |= (x2 << 11);
+ if (ohm & 0x04) vc |= (x1 << 6);
+ if (ohm & 0xE8) vc |= (x3 << 6);
+ if (ohm & 0x20) vc |= (x2 << 7);
+ if (ohm & 0x5B) vb0 |= (x0 << 6);
+ if (ohm & 0x5B) vb1 |= (x1 << 6);
+ if (ohm & 0x12) vb0 |= (x2 << 7);
+ if (ohm & 0x12) vb1 |= (x3 << 7);
+
+ const int shamt = (mode >> 1) ^ 3;
+
+ va = (uint32_t)va << shamt;
+ vb0 = (uint32_t)vb0 << shamt;
+ vb1 = (uint32_t)vb1 << shamt;
+ vc = (uint32_t)vc << shamt;
+ vd0 = (uint32_t)vd0 << shamt;
+ vd1 = (uint32_t)vd1 << shamt;
+
+ // qlog12
+ e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
+ e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
+ e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
+
+ e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
+ e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
+ e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
+
+ if (maj_comp)
+ {
+ std::swap(e[0][0], e[0][maj_comp]);
+ std::swap(e[1][0], e[1][maj_comp]);
+ }
+ }
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool decode_mode11_to_qlog12(
+ const uint8_t* pEndpoints,
+ int e[2][3],
+ uint32_t ise_endpoint_range)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
+
+ if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+ {
+ decode_mode11_to_qlog12_ise20(pEndpoints, e);
+ }
+ else
+ {
+ uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
+
+ for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+ dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+ decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
+ }
+
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ if (e[i][0] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][1] > (int)MAX_QLOG12)
+ return false;
+
+ if (e[i][2] > (int)MAX_QLOG12)
+ return false;
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
+
+ if (best_blk.m_weight_ise_range == 5)
+ {
+ // Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
+ bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
+ }
+ else
+ {
+ uint8_t bc6h_weights[16];
+
+ if (best_blk.m_weight_ise_range == 1)
+ {
+ // weight ISE 1: 3 levels
+ static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 2)
+ {
+ // weight ISE 2: 4 levels
+ static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 3)
+ {
+ // weight ISE 3: 5 levels
+ static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 4)
+ {
+ // weight ISE 4: 6 levels
+ static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 6)
+ {
+ // weight ISE 6: 10 levels
+ static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 7)
+ {
+ // weight ISE 7: 12 levels
+ static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 8)
+ {
+ // 16 levels
+ memcpy(bc6h_weights, best_blk.m_weights, 16);
+ }
+ else
+ {
+ assert(0);
+ return false;
+ }
+
+ bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
+ }
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+
+ bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ assert(best_blk.m_num_partitions == 2);
+ assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
+
+ // UASTC HDR checks
+ // Both CEM's must be equal in 2-subset UASTC HDR.
+ if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
+ return false;
+ if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
+ return false;
+
+ if (best_blk.m_color_endpoint_modes[0] == 7)
+ {
+ if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
+ ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
+ ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
+ ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
+ ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
+ ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
+ {
+ return false;
+ }
+ }
+
+ for (uint32_t s = 0; s < 2; s++)
+ {
+ int e[2][3];
+ if (best_blk.m_color_endpoint_modes[0] == 7)
+ {
+ bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
+ if (!success)
+ return false;
+ }
+ else
+ {
+ bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
+ if (!success)
+ return false;
+ }
+
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ bc6h_endpoints[s][c][0] = qlog_to_half_slow(e[0][c], 12);
+ if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
+ return false;
+
+ bc6h_endpoints[s][c][1] = qlog_to_half_slow(e[1][c], 12);
+ if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
+ return false;
+ }
+ }
+
+ uint8_t bc6h_weights[16];
+ if (best_blk.m_weight_ise_range == 1)
+ {
+ static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 2)
+ {
+ static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 3)
+ {
+ static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 4)
+ {
+ static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
+
+ for (uint32_t i = 0; i < 16; i++)
+ bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
+ }
+ else if (best_blk.m_weight_ise_range == 5)
+ {
+ memcpy(bc6h_weights, best_blk.m_weights, 16);
+ }
+ else
+ {
+ assert(0);
+ return false;
+ }
+
+ bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
+
+ return true;
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+ bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ if (!g_astc_hdr_core_initialized)
+ {
+ assert(0);
+ return false;
+ }
+
+ astc_helpers::log_astc_block log_blk;
+
+ if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
+ {
+ // Failed unpacking ASTC data
+ return false;
+ }
+
+ return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
+ }
+
+ //--------------------------------------------------------------------------------------------------------------------------
+ // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+ bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
+ {
+ assert(g_astc_hdr_core_initialized);
+ if (!g_astc_hdr_core_initialized)
+ {
+ assert(0);
+ return false;
+ }
+
+ if (log_blk.m_solid_color_flag_ldr)
+ {
+ // Don't support LDR solid colors.
+ return false;
+ }
+
+ if (log_blk.m_solid_color_flag_hdr)
+ {
+ // Solid color HDR block
+ return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
+ }
+
+ // Only support 4x4 grid sizes
+ if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
+ return false;
+
+ // Don't support dual plane encoding
+ if (log_blk.m_dual_plane)
+ return false;
+
+ if (log_blk.m_num_partitions == 1)
+ {
+ // Handle 1 partition (or subset)
+
+ // UASTC HDR checks
+ if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
+ return false;
+
+ int e[2][3];
+ bool success;
+
+ if (log_blk.m_color_endpoint_modes[0] == 7)
+ {
+ if (log_blk.m_endpoint_ise_range != 20)
+ return false;
+
+ success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
+ }
+ else if (log_blk.m_color_endpoint_modes[0] == 11)
+ {
+ // UASTC HDR checks
+ if (log_blk.m_weight_ise_range <= 7)
+ {
+ if (log_blk.m_endpoint_ise_range != 20)
+ return false;
+ }
+ else if (log_blk.m_endpoint_ise_range != 19)
+ {
+ return false;
+ }
+
+ success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
+ }
+ else
+ {
+ return false;
+ }
+
+ if (!success)
+ return false;
+
+ // Transform endpoints to half float
+ half_float h_e[3][2] =
+ {
+ { qlog_to_half_slow(e[0][0], 12), qlog_to_half_slow(e[1][0], 12) },
+ { qlog_to_half_slow(e[0][1], 12), qlog_to_half_slow(e[1][1], 12) },
+ { qlog_to_half_slow(e[0][2], 12), qlog_to_half_slow(e[1][2], 12) }
+ };
+
+ // Sanity check for NaN/Inf
+ for (uint32_t i = 0; i < 2; i++)
+ if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
+ return false;
+
+ // Transcode to bc6h
+ if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
+ return false;
+ }
+ else if (log_blk.m_num_partitions == 2)
+ {
+ // Handle 2 partition (or subset)
+ int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
+ if (common_bc7_pat_index < 0)
+ return false;
+
+ assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+
+ if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
+ return false;
+ }
+ else
+ {
+ // Only supports 1 or 2 partitions (or subsets)
+ return false;
+ }
+
+ return true;
+ }
+#endif // BASISD_SUPPORT_UASTC_HDR
+
} // namespace basist
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.h b/thirdparty/basis_universal/transcoder/basisu_transcoder.h
index 3327e8ddb7..8324e99698 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.h
@@ -1,5 +1,5 @@
// basisu_transcoder.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -29,6 +29,7 @@
// Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development.
#ifndef BASISU_FORCE_DEVEL_MESSAGES
+ // TODO - disable before checking in
#define BASISU_FORCE_DEVEL_MESSAGES 0
#endif
@@ -55,7 +56,7 @@ namespace basist
cTFETC2_RGBA = 1, // Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files
// BC1-5, BC7 (desktop, some mobile devices)
- cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
+ cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
cTFBC3_RGBA = 3, // Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files
cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's)
@@ -63,10 +64,11 @@ namespace basist
// PVRTC1 4bpp (mobile, PowerVR devices)
cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format.
- cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
+ cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
// ASTC (mobile, Intel devices, hopefully all desktop GPU's one day)
- cTFASTC_4x4_RGBA = 10, // Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
+ cTFASTC_4x4_RGBA = 10, // LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files.
+ // LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
// ATC (mobile, Adreno devices, this is a niche format)
cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD)
@@ -74,8 +76,8 @@ namespace basist
// FXT1 (desktop, Intel devices, this is a super obscure format)
cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630).
- // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
- // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
+ // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
+ // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
cTFPVRTC2_4_RGB = 18, // Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB).
cTFPVRTC2_4_RGBA = 19, // Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks.
@@ -83,13 +85,22 @@ namespace basist
cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned)
cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps
+ cTFBC6H = 22, // HDR, RGB only, unsigned
+ cTFASTC_HDR_4x4_RGBA = 23, // HDR, RGBA (currently UASTC HDR is only RGB), unsigned
+
// Uncompressed (raw pixel) formats
+ // Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR).
cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte.
cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11
cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0
- cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
+ cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
+
+ // Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR).
+ cTFRGB_HALF = 24, // 48bpp RGB half (16-bits/component, 3 components)
+ cTFRGBA_HALF = 25, // 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha)
+ cTFRGB_9E5 = 26, // 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent)
- cTFTotalTextureFormats = 22,
+ cTFTotalTextureFormats = 27,
// Old enums for compatibility with code compiled against previous versions
cTFETC1 = cTFETC1_RGB,
@@ -124,6 +135,9 @@ namespace basist
// Returns true if the format supports an alpha channel.
bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt);
+ // Returns true if the format is HDR.
+ bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt);
+
// Returns the basisu::texture_format corresponding to the specified transcoder_texture_format.
basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt);
@@ -142,7 +156,7 @@ namespace basist
// Returns the block height for the specified texture format, which is currently always 4.
uint32_t basis_get_block_height(transcoder_texture_format tex_type);
- // Returns true if the specified format was enabled at compile time.
+ // Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR).
bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S);
// Validates that the output buffer is large enough to hold the entire transcoded texture.
@@ -317,6 +331,42 @@ namespace basist
int channel0 = -1, int channel1 = -1);
};
+ class basisu_lowlevel_uastc_hdr_transcoder
+ {
+ friend class basisu_transcoder;
+
+ public:
+ basisu_lowlevel_uastc_hdr_transcoder();
+
+ bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+ basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0);
+
+ bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+ uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+ basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0)
+ {
+ return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt,
+ output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels,
+ pState, output_rows_in_pixels, channel0, channel1, decode_flags);
+ }
+
+ // Container independent transcoding
+ bool transcode_image(
+ transcoder_texture_format target_format,
+ void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+ const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+ uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+ uint32_t slice_offset, uint32_t slice_length,
+ uint32_t decode_flags = 0,
+ bool has_alpha = false,
+ bool is_video = false,
+ uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+ basisu_transcoder_state* pState = nullptr,
+ uint32_t output_rows_in_pixels = 0,
+ int channel0 = -1, int channel1 = -1);
+ };
+
struct basisu_slice_info
{
uint32_t m_orig_width;
@@ -530,6 +580,7 @@ namespace basist
private:
mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder;
mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder;
+ mutable basisu_lowlevel_uastc_hdr_transcoder m_lowlevel_uastc_hdr_decoder;
bool m_ready_to_transcode;
@@ -612,10 +663,12 @@ namespace basist
#pragma pack(pop)
const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0;
+ const uint32_t KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK = 1000066000; // TODO, is this correct?
const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166;
+ const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR = 167;
const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163;
const uint32_t KTX2_IMAGE_IS_P_FRAME = 2;
- const uint32_t KTX2_UASTC_BLOCK_SIZE = 16;
+ const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR
const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased
// The KTX2 transfer functions supported by KTX2
@@ -800,13 +853,15 @@ namespace basist
// Returns 0 or the number of layers in the texture array or texture video. Valid after init().
uint32_t get_layers() const { return m_header.m_layer_count; }
- // Returns cETC1S or cUASTC4x4. Valid after init().
+ // Returns cETC1S, cUASTC4x4, or cUASTC_HDR_4x4. Valid after init().
basist::basis_tex_format get_format() const { return m_format; }
-
+
bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; }
bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; }
+ bool is_hdr() const { return get_format() == basist::basis_tex_format::cUASTC_HDR_4x4; }
+
// Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init().
uint32_t get_has_alpha() const { return m_has_alpha; }
@@ -913,6 +968,7 @@ namespace basist
basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder;
basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder;
+ basist::basisu_lowlevel_uastc_hdr_transcoder m_uastc_hdr_transcoder;
ktx2_transcoder_state m_def_transcoder_state;
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
index 0505df6ea6..17c9dc7c8c 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
@@ -1,5 +1,5 @@
// basisu_transcoder_internal.h - Universal texture format transcoder library.
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
//
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
//
@@ -20,8 +20,9 @@
#pragma warning (disable: 4127) // conditional expression is constant
#endif
-#define BASISD_LIB_VERSION 116
-#define BASISD_VERSION_STRING "01.16"
+// v1.50: Added UASTC HDR support
+#define BASISD_LIB_VERSION 150
+#define BASISD_VERSION_STRING "01.50"
#ifdef _DEBUG
#define BASISD_BUILD_DEBUG
@@ -82,9 +83,15 @@ namespace basist
cRGBA4444_ALPHA,
cRGBA4444_COLOR_OPAQUE,
cRGBA4444,
-
- cUASTC_4x4,
-
+ cRGBA_HALF,
+ cRGB_HALF,
+ cRGB_9E5,
+
+ cUASTC_4x4, // LDR, universal
+ cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
+ cBC6H,
+ cASTC_HDR_4x4,
+
cTotalBlockFormats
};
@@ -264,8 +271,8 @@ namespace basist
}
const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
- const basisu::int_vec get_lookup() const { return m_lookup; }
- const basisu::int16_vec get_tree() const { return m_tree; }
+ const basisu::int_vec &get_lookup() const { return m_lookup; }
+ const basisu::int16_vec &get_tree() const { return m_tree; }
bool is_valid() const { return m_code_sizes.size() > 0; }
@@ -789,7 +796,198 @@ namespace basist
};
bool basis_block_format_is_uncompressed(block_format tex_type);
-
+
+ //------------------------------------
+
+ typedef uint16_t half_float;
+
+ const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
+ const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
+ const double MAX_HALF_FLOAT = 65504.0; // largest normal number
+
+ inline uint32_t get_bits(uint32_t val, int low, int high)
+ {
+ const int num_bits = (high - low) + 1;
+ assert((num_bits >= 1) && (num_bits <= 32));
+
+ val >>= low;
+ if (num_bits != 32)
+ val &= ((1u << num_bits) - 1);
+
+ return val;
+ }
+
+ inline bool is_half_inf_or_nan(half_float v)
+ {
+ return get_bits(v, 10, 14) == 31;
+ }
+
+ inline bool is_half_denorm(half_float v)
+ {
+ int e = (v >> 10) & 31;
+ return !e;
+ }
+
+ inline int get_half_exp(half_float v)
+ {
+ int e = ((v >> 10) & 31);
+ return e ? (e - 15) : -14;
+ }
+
+ inline int get_half_mantissa(half_float v)
+ {
+ if (is_half_denorm(v))
+ return v & 0x3FF;
+ return (v & 0x3FF) | 0x400;
+ }
+
+ inline float get_half_mantissaf(half_float v)
+ {
+ return ((float)get_half_mantissa(v)) / 1024.0f;
+ }
+
+ inline int get_half_sign(half_float v)
+ {
+ return v ? ((v & 0x8000) ? -1 : 1) : 0;
+ }
+
+ inline bool half_is_signed(half_float v)
+ {
+ return (v & 0x8000) != 0;
+ }
+
+#if 0
+ int hexp = get_half_exp(Cf);
+ float hman = get_half_mantissaf(Cf);
+ int hsign = get_half_sign(Cf);
+ float k = powf(2.0f, hexp) * hman * hsign;
+ if (is_half_inf_or_nan(Cf))
+ k = std::numeric_limits<float>::quiet_NaN();
+#endif
+
+ half_float float_to_half(float val);
+
+ inline float half_to_float(half_float hval)
+ {
+ union { float f; uint32_t u; } x = { 0 };
+
+ uint32_t s = ((uint32_t)hval >> 15) & 1;
+ uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
+ uint32_t m = (uint32_t)hval & 0x3FF;
+
+ if (!e)
+ {
+ if (!m)
+ {
+ // +- 0
+ x.u = s << 31;
+ return x.f;
+ }
+ else
+ {
+ // denormalized
+ while (!(m & 0x00000400))
+ {
+ m <<= 1;
+ --e;
+ }
+
+ ++e;
+ m &= ~0x00000400;
+ }
+ }
+ else if (e == 31)
+ {
+ if (m == 0)
+ {
+ // +/- INF
+ x.u = (s << 31) | 0x7f800000;
+ return x.f;
+ }
+ else
+ {
+ // +/- NaN
+ x.u = (s << 31) | 0x7f800000 | (m << 13);
+ return x.f;
+ }
+ }
+
+ e = e + (127 - 15);
+ m = m << 13;
+
+ assert(s <= 1);
+ assert(m <= 0x7FFFFF);
+ assert(e <= 255);
+
+ x.u = m | (e << 23) | (s << 31);
+ return x.f;
+ }
+
+ // Originally from bc6h_enc.h
+
+ void bc6h_enc_init();
+
+ const uint32_t MAX_BLOG16_VAL = 0xFFFF;
+
+ // BC6H internals
+ const uint32_t NUM_BC6H_MODES = 14;
+ const uint32_t BC6H_LAST_MODE_INDEX = 13;
+ const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights
+ const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;
+
+ extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b
+
+ struct bc6h_bit_layout
+ {
+ int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)
+ int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)
+ int8_t m_last_bit;
+ int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed
+ };
+
+ const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;
+ extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];
+
+ extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]
+
+ extern const uint8_t g_bc6h_weight3[8];
+ extern const uint8_t g_bc6h_weight4[16];
+
+ extern const int8_t g_bc6h_mode_lookup[32];
+
+ // Converts b16 to half float
+ inline half_float bc6h_blog16_to_half(uint32_t comp)
+ {
+ assert(comp <= 0xFFFF);
+
+ // scale the magnitude by 31/64
+ comp = (comp * 31u) >> 6u;
+ return (half_float)comp;
+ }
+
+ const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;
+
+ // Inverts bc6h_blog16_to_half().
+ // Returns the nearest blog16 given a half value.
+ inline uint32_t bc6h_half_to_blog16(half_float h)
+ {
+ assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+ return (h * 64 + 30) / 31;
+ }
+
+ struct bc6h_block
+ {
+ uint8_t m_bytes[16];
+ };
+
+ void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+ void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
+ void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
+ bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);
+
} // namespace basist
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
index 8244550959..205758b3d7 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc
@@ -1,4 +1,4 @@
-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
index fad45fe22d..f2d324fcc3 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc
@@ -1,4 +1,4 @@
-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
index f91314f4ff..457bd51e30 100644
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h
@@ -13,6 +13,7 @@ namespace basist
const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8;
const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30;
+ const uint32_t TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 = 27; // BC6H only supports only 5-bit pattern indices, BC7 supports 4-bit or 6-bit
const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11;
const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19;