summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/betsy/CrossPlatformSettings_piece_all.glsl76
-rw-r--r--modules/betsy/SCsub24
-rw-r--r--modules/betsy/UavCrossPlatform_piece_all.glsl17
-rw-r--r--modules/betsy/bc6h.glsl653
-rw-r--r--modules/betsy/config.py6
-rw-r--r--modules/betsy/image_compress_betsy.cpp354
-rw-r--r--modules/betsy/image_compress_betsy.h44
-rw-r--r--modules/betsy/register_types.cpp47
-rw-r--r--modules/betsy/register_types.h39
-rw-r--r--modules/cvtt/image_compress_cvtt.cpp5
10 files changed, 1265 insertions, 0 deletions
diff --git a/modules/betsy/CrossPlatformSettings_piece_all.glsl b/modules/betsy/CrossPlatformSettings_piece_all.glsl
new file mode 100644
index 0000000000..b7abac7fcc
--- /dev/null
+++ b/modules/betsy/CrossPlatformSettings_piece_all.glsl
@@ -0,0 +1,76 @@
+
+#define min3(a, b, c) min(a, min(b, c))
+#define max3(a, b, c) max(a, max(b, c))
+
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+
+#define uint2 uvec2
+#define uint3 uvec3
+#define uint4 uvec4
+
+#define float2x2 mat2
+#define float3x3 mat3
+#define float4x4 mat4
+#define ogre_float4x3 mat3x4
+
+#define ushort uint
+#define ushort3 uint3
+#define ushort4 uint4
+
+//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
+#define rshort int
+#define rshort2 int2
+#define rint int
+//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
+#define wshort2 int2
+#define wshort3 int3
+
+#define toFloat3x3(x) mat3(x)
+#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)
+
+#define mul(x, y) ((x) * (y))
+#define saturate(x) clamp((x), 0.0, 1.0)
+#define lerp mix
+#define rsqrt inversesqrt
+#define INLINE
+#define NO_INTERPOLATION_PREFIX flat
+#define NO_INTERPOLATION_SUFFIX
+
+#define PARAMS_ARG_DECL
+#define PARAMS_ARG
+
+#define reversebits bitfieldReverse
+
+#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
+#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
+#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
+#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
+#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
+#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
+#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
+#define OGRE_ddx(val) dFdx(val)
+#define OGRE_ddy(val) dFdy(val)
+#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
+#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
+#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)
+
+#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)
+
+#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
+#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
+#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)
+
+#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x
+
+#define OGRE_SAMPLER_ARG_DECL(samplerName)
+#define OGRE_SAMPLER_ARG(samplerName)
+
+#define OGRE_Texture3D_float4 sampler3D
+#define OGRE_OUT_REF(declType, variableName) out declType variableName
+#define OGRE_INOUT_REF(declType, variableName) inout declType variableName
diff --git a/modules/betsy/SCsub b/modules/betsy/SCsub
new file mode 100644
index 0000000000..9930e1f4cf
--- /dev/null
+++ b/modules/betsy/SCsub
@@ -0,0 +1,24 @@
+# !/ usr / bin / env python
+Import("env")
+Import("env_modules")
+
+env_betsy = env_modules.Clone()
+env_betsy.GLSL_HEADER("bc6h.glsl")
+env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])
+
+# Thirdparty source files
+thirdparty_obj = []
+thirdparty_dir = "#thirdparty/betsy/"
+env_betsy.Prepend(CPPPATH=[thirdparty_dir])
+
+env_thirdparty = env_betsy.Clone()
+env_thirdparty.disable_warnings()
+env.modules_sources += thirdparty_obj
+
+# Godot source files
+module_obj = []
+env_betsy.add_source_files(module_obj, "*.cpp")
+env.modules_sources += module_obj
+
+# Needed to force rebuilding the module files when the thirdparty library is updated.
+env.Depends(module_obj, thirdparty_obj)
diff --git a/modules/betsy/UavCrossPlatform_piece_all.glsl b/modules/betsy/UavCrossPlatform_piece_all.glsl
new file mode 100644
index 0000000000..30854df637
--- /dev/null
+++ b/modules/betsy/UavCrossPlatform_piece_all.glsl
@@ -0,0 +1,17 @@
+
+#define OGRE_imageLoad2D(inImage, iuv) imageLoad(inImage, int2(iuv))
+#define OGRE_imageLoad2DArray(inImage, iuvw) imageLoad(inImage, int3(iuvw))
+
+#define OGRE_imageWrite2D1(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0, 0))
+#define OGRE_imageWrite2D2(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0))
+#define OGRE_imageWrite2D4(outImage, iuv, value) imageStore(outImage, int2(iuv), value)
+
+#define OGRE_imageLoad3D(inImage, iuv) imageLoad(inImage, int3(iuv))
+
+#define OGRE_imageWrite3D1(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
+#define OGRE_imageWrite3D4(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
+
+#define OGRE_imageWrite2DArray1(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
+#define OGRE_imageWrite2DArray4(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
+
+//#define sharedOnlyBarrier memoryBarrierShared();barrier();
diff --git a/modules/betsy/bc6h.glsl b/modules/betsy/bc6h.glsl
new file mode 100644
index 0000000000..0d10d378fd
--- /dev/null
+++ b/modules/betsy/bc6h.glsl
@@ -0,0 +1,653 @@
+#[versions]
+
+signed = "#define SIGNED";
+unsigned = "";
+
+#[compute]
+#version 450
+
+#include "CrossPlatformSettings_piece_all.glsl"
+#include "UavCrossPlatform_piece_all.glsl"
+
+#VERSION_DEFINES
+#define QUALITY
+
+//SIGNED macro is WIP
+//#define SIGNED
+
+float3 f32tof16(float3 value) {
+ return float3(packHalf2x16(float2(value.x, 0.0)),
+ packHalf2x16(float2(value.y, 0.0)),
+ packHalf2x16(float2(value.z, 0.0)));
+}
+
+float3 f16tof32(uint3 value) {
+ return float3(unpackHalf2x16(value.x).x,
+ unpackHalf2x16(value.y).x,
+ unpackHalf2x16(value.z).x);
+}
+
+float f32tof16(float value) {
+ return packHalf2x16(float2(value.x, 0.0));
+}
+
+float f16tof32(uint value) {
+ return unpackHalf2x16(value.x).x;
+}
+
+layout(binding = 0) uniform sampler2D srcTexture;
+layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
+
+layout(push_constant, std430) uniform Params {
+ float2 p_textureSizeRcp;
+ uint padding0;
+ uint padding1;
+}
+params;
+
+const float HALF_MAX = 65504.0f;
+const uint PATTERN_NUM = 32u;
+
+float CalcMSLE(float3 a, float3 b) {
+ float3 err = log2((b + 1.0f) / (a + 1.0f));
+ err = err * err;
+ return err.x + err.y + err.z;
+}
+
+uint PatternFixupID(uint i) {
+ uint ret = 15u;
+ ret = ((3441033216u >> i) & 0x1u) != 0 ? 2u : ret;
+ ret = ((845414400u >> i) & 0x1u) != 0 ? 8u : ret;
+ return ret;
+}
+
+uint Pattern(uint p, uint i) {
+ uint p2 = p / 2u;
+ uint p3 = p - p2 * 2u;
+
+ uint enc = 0u;
+ enc = p2 == 0u ? 2290666700u : enc;
+ enc = p2 == 1u ? 3972591342u : enc;
+ enc = p2 == 2u ? 4276930688u : enc;
+ enc = p2 == 3u ? 3967876808u : enc;
+ enc = p2 == 4u ? 4293707776u : enc;
+ enc = p2 == 5u ? 3892379264u : enc;
+ enc = p2 == 6u ? 4278255592u : enc;
+ enc = p2 == 7u ? 4026597360u : enc;
+ enc = p2 == 8u ? 9369360u : enc;
+ enc = p2 == 9u ? 147747072u : enc;
+ enc = p2 == 10u ? 1930428556u : enc;
+ enc = p2 == 11u ? 2362323200u : enc;
+ enc = p2 == 12u ? 823134348u : enc;
+ enc = p2 == 13u ? 913073766u : enc;
+ enc = p2 == 14u ? 267393000u : enc;
+ enc = p2 == 15u ? 966553998u : enc;
+
+ enc = p3 != 0u ? enc >> 16u : enc;
+ uint ret = (enc >> i) & 0x1u;
+ return ret;
+}
+
+#ifndef SIGNED
+//UF
+float3 Quantize7(float3 x) {
+ return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
+}
+
+float3 Quantize9(float3 x) {
+ return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
+}
+
+float3 Quantize10(float3 x) {
+ return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
+}
+
+float3 Unquantize7(float3 x) {
+ return (x * 65536.0f + 0x8000) / 128.0f;
+}
+
+float3 Unquantize9(float3 x) {
+ return (x * 65536.0f + 0x8000) / 512.0f;
+}
+
+float3 Unquantize10(float3 x) {
+ return (x * 65536.0f + 0x8000) / 1024.0f;
+}
+
+float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
+ float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
+ return f16tof32(uint3(comp));
+}
+#else
+//SF
+
+float3 cmpSign(float3 value) {
+ float3 signVal;
+ signVal.x = value.x >= 0.0f ? 1.0f : -1.0f;
+ signVal.y = value.y >= 0.0f ? 1.0f : -1.0f;
+ signVal.z = value.z >= 0.0f ? 1.0f : -1.0f;
+ return signVal;
+}
+
+float3 Quantize7(float3 x) {
+ float3 signVal = cmpSign(x);
+ return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f);
+}
+
+float3 Quantize9(float3 x) {
+ float3 signVal = cmpSign(x);
+ return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f);
+}
+
+float3 Quantize10(float3 x) {
+ float3 signVal = cmpSign(x);
+ return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f);
+}
+
+float3 Unquantize7(float3 x) {
+ float3 signVal = sign(x);
+ x = abs(x);
+ float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
+ finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x;
+ finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y;
+ finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z;
+ return finalVal;
+}
+
+float3 Unquantize9(float3 x) {
+ float3 signVal = sign(x);
+ x = abs(x);
+ float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
+ finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x;
+ finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y;
+ finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z;
+ return finalVal;
+}
+
+float3 Unquantize10(float3 x) {
+ float3 signVal = sign(x);
+ x = abs(x);
+ float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
+ finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x;
+ finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y;
+ finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z;
+ return finalVal;
+}
+
+float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
+ float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
+ /*float3 signVal;
+ signVal.x = comp.x >= 0.0f ? 0.0f : 0x8000;
+ signVal.y = comp.y >= 0.0f ? 0.0f : 0x8000;
+ signVal.z = comp.z >= 0.0f ? 0.0f : 0x8000;*/
+ //return f16tof32( uint3( signVal + abs( comp ) ) );
+ return f16tof32(uint3(comp));
+}
+#endif
+
+void Swap(inout float3 a, inout float3 b) {
+ float3 tmp = a;
+ a = b;
+ b = tmp;
+}
+
+void Swap(inout float a, inout float b) {
+ float tmp = a;
+ a = b;
+ b = tmp;
+}
+
+uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos) {
+ float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos);
+ return uint(clamp(r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f));
+}
+
+uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) {
+ float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos);
+ return uint(clamp(r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f));
+}
+
+void SignExtend(inout float3 v1, uint mask, uint signFlag) {
+ int3 v = int3(v1);
+ v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0);
+ v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0);
+ v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0);
+ v1 = v;
+}
+
+void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
+ // compute endpoints (min/max RGB bbox)
+ float3 blockMin = texels[0];
+ float3 blockMax = texels[0];
+ for (uint i = 1u; i < 16u; ++i) {
+ blockMin = min(blockMin, texels[i]);
+ blockMax = max(blockMax, texels[i]);
+ }
+
+ // refine endpoints in log2 RGB space
+ float3 refinedBlockMin = blockMax;
+ float3 refinedBlockMax = blockMin;
+ for (uint i = 0u; i < 16u; ++i) {
+ refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
+ refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
+ }
+
+ float3 logBlockMax = log2(blockMax + 1.0f);
+ float3 logBlockMin = log2(blockMin + 1.0f);
+ float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f);
+ float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f);
+ float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
+ logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
+ logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
+ blockMin = exp2(logBlockMin) - 1.0f;
+ blockMax = exp2(logBlockMax) - 1.0f;
+
+ float3 blockDir = blockMax - blockMin;
+ blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
+
+ float3 endpoint0 = Quantize10(blockMin);
+ float3 endpoint1 = Quantize10(blockMax);
+ float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
+ float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
+
+ // check if endpoint swap is required
+ float fixupTexelPos = f32tof16(dot(texels[0], blockDir));
+ uint fixupIndex = ComputeIndex4(fixupTexelPos, endPoint0Pos, endPoint1Pos);
+ if (fixupIndex > 7) {
+ Swap(endPoint0Pos, endPoint1Pos);
+ Swap(endpoint0, endpoint1);
+ }
+
+ // compute indices
+ uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u };
+ for (uint i = 0u; i < 16u; ++i) {
+ float texelPos = f32tof16(dot(texels[i], blockDir));
+ indices[i] = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos);
+ }
+
+ // compute compression error (MSLE)
+ float3 endpoint0Unq = Unquantize10(endpoint0);
+ float3 endpoint1Unq = Unquantize10(endpoint1);
+ float msle = 0.0f;
+ for (uint i = 0u; i < 16u; ++i) {
+ float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
+ float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
+
+ msle += CalcMSLE(texels[i], texelUnc);
+ }
+
+ // encode block for mode 11
+ blockMSLE = msle;
+ block.x = 0x03;
+
+ // endpoints
+ block.x |= uint(endpoint0.x) << 5u;
+ block.x |= uint(endpoint0.y) << 15u;
+ block.x |= uint(endpoint0.z) << 25u;
+ block.y |= uint(endpoint0.z) >> 7u;
+ block.y |= uint(endpoint1.x) << 3u;
+ block.y |= uint(endpoint1.y) << 13u;
+ block.y |= uint(endpoint1.z) << 23u;
+ block.z |= uint(endpoint1.z) >> 9u;
+
+ // indices
+ block.z |= indices[0] << 1u;
+ block.z |= indices[1] << 4u;
+ block.z |= indices[2] << 8u;
+ block.z |= indices[3] << 12u;
+ block.z |= indices[4] << 16u;
+ block.z |= indices[5] << 20u;
+ block.z |= indices[6] << 24u;
+ block.z |= indices[7] << 28u;
+ block.w |= indices[8] << 0u;
+ block.w |= indices[9] << 4u;
+ block.w |= indices[10] << 8u;
+ block.w |= indices[11] << 12u;
+ block.w |= indices[12] << 16u;
+ block.w |= indices[13] << 20u;
+ block.w |= indices[14] << 24u;
+ block.w |= indices[15] << 28u;
+}
+
+float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) {
+ float3 w = Point - PointOnLine;
+ float3 x = w - dot(w, LineDirection) * LineDirection;
+
+ return dot(x, x);
+}
+
+float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
+ float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
+ float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f);
+ float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
+ float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f);
+
+ for (uint i = 0; i < 16; ++i) {
+ uint paletteID = Pattern(pattern, i);
+ if (paletteID == 0) {
+ p0BlockMin = min(p0BlockMin, texels[i]);
+ p0BlockMax = max(p0BlockMax, texels[i]);
+ } else {
+ p1BlockMin = min(p1BlockMin, texels[i]);
+ p1BlockMax = max(p1BlockMax, texels[i]);
+ }
+ }
+
+ float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
+ float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
+
+ float sqDistanceFromLine = 0.0f;
+
+ for (uint i = 0; i < 16; ++i) {
+ uint paletteID = Pattern(pattern, i);
+ if (paletteID == 0) {
+ sqDistanceFromLine += DistToLineSq(p0BlockMin, p0BlockDir, texels[i]);
+ } else {
+ sqDistanceFromLine += DistToLineSq(p1BlockMin, p1BlockDir, texels[i]);
+ }
+ }
+
+ return sqDistanceFromLine;
+}
+
+void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) {
+ float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
+ float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f);
+ float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
+ float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f);
+
+ for (uint i = 0u; i < 16u; ++i) {
+ uint paletteID = Pattern(pattern, i);
+ if (paletteID == 0) {
+ p0BlockMin = min(p0BlockMin, texels[i]);
+ p0BlockMax = max(p0BlockMax, texels[i]);
+ } else {
+ p1BlockMin = min(p1BlockMin, texels[i]);
+ p1BlockMax = max(p1BlockMax, texels[i]);
+ }
+ }
+
+ float3 p0BlockDir = p0BlockMax - p0BlockMin;
+ float3 p1BlockDir = p1BlockMax - p1BlockMin;
+ p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
+ p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
+
+ float p0Endpoint0Pos = f32tof16(dot(p0BlockMin, p0BlockDir));
+ float p0Endpoint1Pos = f32tof16(dot(p0BlockMax, p0BlockDir));
+ float p1Endpoint0Pos = f32tof16(dot(p1BlockMin, p1BlockDir));
+ float p1Endpoint1Pos = f32tof16(dot(p1BlockMax, p1BlockDir));
+
+ uint fixupID = PatternFixupID(pattern);
+ float p0FixupTexelPos = f32tof16(dot(texels[0], p0BlockDir));
+ float p1FixupTexelPos = f32tof16(dot(texels[fixupID], p1BlockDir));
+ uint p0FixupIndex = ComputeIndex3(p0FixupTexelPos, p0Endpoint0Pos, p0Endpoint1Pos);
+ uint p1FixupIndex = ComputeIndex3(p1FixupTexelPos, p1Endpoint0Pos, p1Endpoint1Pos);
+ if (p0FixupIndex > 3u) {
+ Swap(p0Endpoint0Pos, p0Endpoint1Pos);
+ Swap(p0BlockMin, p0BlockMax);
+ }
+ if (p1FixupIndex > 3u) {
+ Swap(p1Endpoint0Pos, p1Endpoint1Pos);
+ Swap(p1BlockMin, p1BlockMax);
+ }
+
+ uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u };
+ for (uint i = 0u; i < 16u; ++i) {
+ float p0TexelPos = f32tof16(dot(texels[i], p0BlockDir));
+ float p1TexelPos = f32tof16(dot(texels[i], p1BlockDir));
+ uint p0Index = ComputeIndex3(p0TexelPos, p0Endpoint0Pos, p0Endpoint1Pos);
+ uint p1Index = ComputeIndex3(p1TexelPos, p1Endpoint0Pos, p1Endpoint1Pos);
+
+ uint paletteID = Pattern(pattern, i);
+ indices[i] = paletteID == 0u ? p0Index : p1Index;
+ }
+
+ float3 endpoint760 = floor(Quantize7(p0BlockMin));
+ float3 endpoint761 = floor(Quantize7(p0BlockMax));
+ float3 endpoint762 = floor(Quantize7(p1BlockMin));
+ float3 endpoint763 = floor(Quantize7(p1BlockMax));
+
+ float3 endpoint950 = floor(Quantize9(p0BlockMin));
+ float3 endpoint951 = floor(Quantize9(p0BlockMax));
+ float3 endpoint952 = floor(Quantize9(p1BlockMin));
+ float3 endpoint953 = floor(Quantize9(p1BlockMax));
+
+ endpoint761 = endpoint761 - endpoint760;
+ endpoint762 = endpoint762 - endpoint760;
+ endpoint763 = endpoint763 - endpoint760;
+
+ endpoint951 = endpoint951 - endpoint950;
+ endpoint952 = endpoint952 - endpoint950;
+ endpoint953 = endpoint953 - endpoint950;
+
+ int maxVal76 = 0x1F;
+ endpoint761 = clamp(endpoint761, -maxVal76, maxVal76);
+ endpoint762 = clamp(endpoint762, -maxVal76, maxVal76);
+ endpoint763 = clamp(endpoint763, -maxVal76, maxVal76);
+
+ int maxVal95 = 0xF;
+ endpoint951 = clamp(endpoint951, -maxVal95, maxVal95);
+ endpoint952 = clamp(endpoint952, -maxVal95, maxVal95);
+ endpoint953 = clamp(endpoint953, -maxVal95, maxVal95);
+
+ float3 endpoint760Unq = Unquantize7(endpoint760);
+ float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
+ float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
+ float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
+ float3 endpoint950Unq = Unquantize9(endpoint950);
+ float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
+ float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
+ float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
+
+ float msle76 = 0.0f;
+ float msle95 = 0.0f;
+ for (uint i = 0u; i < 16u; ++i) {
+ uint paletteID = Pattern(pattern, i);
+
+ float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
+ float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
+ float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
+ float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
+
+ float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
+ float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
+ float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
+
+ msle76 += CalcMSLE(texels[i], texelUnc76);
+ msle95 += CalcMSLE(texels[i], texelUnc95);
+ }
+
+ SignExtend(endpoint761, 0x1F, 0x20);
+ SignExtend(endpoint762, 0x1F, 0x20);
+ SignExtend(endpoint763, 0x1F, 0x20);
+
+ SignExtend(endpoint951, 0xF, 0x10);
+ SignExtend(endpoint952, 0xF, 0x10);
+ SignExtend(endpoint953, 0xF, 0x10);
+
+ // encode block
+ float p2MSLE = min(msle76, msle95);
+ if (p2MSLE < blockMSLE) {
+ blockMSLE = p2MSLE;
+ block = uint4(0u, 0u, 0u, 0u);
+
+ if (p2MSLE == msle76) {
+ // 7.6
+ block.x = 0x1u;
+ block.x |= (uint(endpoint762.y) & 0x20u) >> 3u;
+ block.x |= (uint(endpoint763.y) & 0x10u) >> 1u;
+ block.x |= (uint(endpoint763.y) & 0x20u) >> 1u;
+ block.x |= uint(endpoint760.x) << 5u;
+ block.x |= (uint(endpoint763.z) & 0x01u) << 12u;
+ block.x |= (uint(endpoint763.z) & 0x02u) << 12u;
+ block.x |= (uint(endpoint762.z) & 0x10u) << 10u;
+ block.x |= uint(endpoint760.y) << 15u;
+ block.x |= (uint(endpoint762.z) & 0x20u) << 17u;
+ block.x |= (uint(endpoint763.z) & 0x04u) << 21u;
+ block.x |= (uint(endpoint762.y) & 0x10u) << 20u;
+ block.x |= uint(endpoint760.z) << 25u;
+ block.y |= (uint(endpoint763.z) & 0x08u) >> 3u;
+ block.y |= (uint(endpoint763.z) & 0x20u) >> 4u;
+ block.y |= (uint(endpoint763.z) & 0x10u) >> 2u;
+ block.y |= uint(endpoint761.x) << 3u;
+ block.y |= (uint(endpoint762.y) & 0x0Fu) << 9u;
+ block.y |= uint(endpoint761.y) << 13u;
+ block.y |= (uint(endpoint763.y) & 0x0Fu) << 19u;
+ block.y |= uint(endpoint761.z) << 23u;
+ block.y |= (uint(endpoint762.z) & 0x07u) << 29u;
+ block.z |= (uint(endpoint762.z) & 0x08u) >> 3u;
+ block.z |= uint(endpoint762.x) << 1u;
+ block.z |= uint(endpoint763.x) << 7u;
+ } else {
+ // 9.5
+ block.x = 0xEu;
+ block.x |= uint(endpoint950.x) << 5u;
+ block.x |= (uint(endpoint952.z) & 0x10u) << 10u;
+ block.x |= uint(endpoint950.y) << 15u;
+ block.x |= (uint(endpoint952.y) & 0x10u) << 20u;
+ block.x |= uint(endpoint950.z) << 25u;
+ block.y |= uint(endpoint950.z) >> 7u;
+ block.y |= (uint(endpoint953.z) & 0x10u) >> 2u;
+ block.y |= uint(endpoint951.x) << 3u;
+ block.y |= (uint(endpoint953.y) & 0x10u) << 4u;
+ block.y |= (uint(endpoint952.y) & 0x0Fu) << 9u;
+ block.y |= uint(endpoint951.y) << 13u;
+ block.y |= (uint(endpoint953.z) & 0x01u) << 18u;
+ block.y |= (uint(endpoint953.y) & 0x0Fu) << 19u;
+ block.y |= uint(endpoint951.z) << 23u;
+ block.y |= (uint(endpoint953.z) & 0x02u) << 27u;
+ block.y |= uint(endpoint952.z) << 29u;
+ block.z |= (uint(endpoint952.z) & 0x08u) >> 3u;
+ block.z |= uint(endpoint952.x) << 1u;
+ block.z |= (uint(endpoint953.z) & 0x04u) << 4u;
+ block.z |= uint(endpoint953.x) << 7u;
+ block.z |= (uint(endpoint953.z) & 0x08u) << 9u;
+ }
+
+ block.z |= pattern << 13u;
+ uint blockFixupID = PatternFixupID(pattern);
+ if (blockFixupID == 15u) {
+ block.z |= indices[0] << 18u;
+ block.z |= indices[1] << 20u;
+ block.z |= indices[2] << 23u;
+ block.z |= indices[3] << 26u;
+ block.z |= indices[4] << 29u;
+ block.w |= indices[5] << 0u;
+ block.w |= indices[6] << 3u;
+ block.w |= indices[7] << 6u;
+ block.w |= indices[8] << 9u;
+ block.w |= indices[9] << 12u;
+ block.w |= indices[10] << 15u;
+ block.w |= indices[11] << 18u;
+ block.w |= indices[12] << 21u;
+ block.w |= indices[13] << 24u;
+ block.w |= indices[14] << 27u;
+ block.w |= indices[15] << 30u;
+ } else if (blockFixupID == 2u) {
+ block.z |= indices[0] << 18u;
+ block.z |= indices[1] << 20u;
+ block.z |= indices[2] << 23u;
+ block.z |= indices[3] << 25u;
+ block.z |= indices[4] << 28u;
+ block.z |= indices[5] << 31u;
+ block.w |= indices[5] >> 1u;
+ block.w |= indices[6] << 2u;
+ block.w |= indices[7] << 5u;
+ block.w |= indices[8] << 8u;
+ block.w |= indices[9] << 11u;
+ block.w |= indices[10] << 14u;
+ block.w |= indices[11] << 17u;
+ block.w |= indices[12] << 20u;
+ block.w |= indices[13] << 23u;
+ block.w |= indices[14] << 26u;
+ block.w |= indices[15] << 29u;
+ } else {
+ block.z |= indices[0] << 18u;
+ block.z |= indices[1] << 20u;
+ block.z |= indices[2] << 23u;
+ block.z |= indices[3] << 26u;
+ block.z |= indices[4] << 29u;
+ block.w |= indices[5] << 0u;
+ block.w |= indices[6] << 3u;
+ block.w |= indices[7] << 6u;
+ block.w |= indices[8] << 9u;
+ block.w |= indices[9] << 11u;
+ block.w |= indices[10] << 14u;
+ block.w |= indices[11] << 17u;
+ block.w |= indices[12] << 20u;
+ block.w |= indices[13] << 23u;
+ block.w |= indices[14] << 26u;
+ block.w |= indices[15] << 29u;
+ }
+ }
+}
+
+layout(local_size_x = 8,
+ local_size_y = 8,
+ local_size_z = 1) in;
+
+void main() {
+ // gather texels for current 4x4 block
+ // 0 1 2 3
+ // 4 5 6 7
+ // 8 9 10 11
+ // 12 13 14 15
+ float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
+ float2 block0UV = uv;
+ float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f);
+ float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y);
+ float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
+ float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV);
+ float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV);
+ float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV);
+ float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV);
+ float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV);
+ float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV);
+ float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV);
+ float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV);
+ float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV);
+ float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV);
+ float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV);
+ float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV);
+
+ float3 texels[16];
+ texels[0] = float3(block0X.w, block0Y.w, block0Z.w);
+ texels[1] = float3(block0X.z, block0Y.z, block0Z.z);
+ texels[2] = float3(block1X.w, block1Y.w, block1Z.w);
+ texels[3] = float3(block1X.z, block1Y.z, block1Z.z);
+ texels[4] = float3(block0X.x, block0Y.x, block0Z.x);
+ texels[5] = float3(block0X.y, block0Y.y, block0Z.y);
+ texels[6] = float3(block1X.x, block1Y.x, block1Z.x);
+ texels[7] = float3(block1X.y, block1Y.y, block1Z.y);
+ texels[8] = float3(block2X.w, block2Y.w, block2Z.w);
+ texels[9] = float3(block2X.z, block2Y.z, block2Z.z);
+ texels[10] = float3(block3X.w, block3Y.w, block3Z.w);
+ texels[11] = float3(block3X.z, block3Y.z, block3Z.z);
+ texels[12] = float3(block2X.x, block2Y.x, block2Z.x);
+ texels[13] = float3(block2X.y, block2Y.y, block2Z.y);
+ texels[14] = float3(block3X.x, block3Y.x, block3Z.x);
+ texels[15] = float3(block3X.y, block3Y.y, block3Z.y);
+
+ uint4 block = uint4(0u, 0u, 0u, 0u);
+ float blockMSLE = 0.0f;
+
+ EncodeP1(block, blockMSLE, texels);
+
+#ifdef QUALITY
+ float bestScore = EvaluateP2Pattern(0, texels);
+ uint bestPattern = 0;
+
+ for (uint i = 1u; i < 32u; ++i) {
+ float score = EvaluateP2Pattern(i, texels);
+
+ if (score < bestScore) {
+ bestPattern = i;
+ bestScore = score;
+ }
+ }
+
+ EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
+#endif
+
+ imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block);
+}
diff --git a/modules/betsy/config.py b/modules/betsy/config.py
new file mode 100644
index 0000000000..eb565b85b9
--- /dev/null
+++ b/modules/betsy/config.py
@@ -0,0 +1,6 @@
+def can_build(env, platform):
+ return env.editor_build
+
+
+def configure(env):
+ pass
diff --git a/modules/betsy/image_compress_betsy.cpp b/modules/betsy/image_compress_betsy.cpp
new file mode 100644
index 0000000000..6a0862e729
--- /dev/null
+++ b/modules/betsy/image_compress_betsy.cpp
@@ -0,0 +1,354 @@
+/**************************************************************************/
+/* image_compress_betsy.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "image_compress_betsy.h"
+
+#include "servers/rendering/rendering_device_binds.h"
+#include "servers/rendering/rendering_server_default.h"
+
+#if defined(VULKAN_ENABLED)
+#include "drivers/vulkan/rendering_context_driver_vulkan.h"
+#endif
+
+#include "bc6h.glsl.gen.h"
+
+struct BC6PushConstant {
+ float sizeX;
+ float sizeY;
+ uint32_t padding[2];
+};
+
+static int get_next_multiple(int n, int m) {
+ return n + (m - (n % m));
+}
+
+static bool is_image_signed(const Image *r_img) {
+ if (r_img->get_format() >= Image::FORMAT_RH && r_img->get_format() <= Image::FORMAT_RGBAH) {
+ const uint16_t *img_data = reinterpret_cast<const uint16_t *>(r_img->ptr());
+ const uint64_t img_size = r_img->get_data_size() / 2;
+
+ for (uint64_t i = 0; i < img_size; i++) {
+ if ((img_data[i] & 0x8000) != 0 && (img_data[i] & 0x7fff) != 0) {
+ return true;
+ }
+ }
+
+ } else if (r_img->get_format() >= Image::FORMAT_RF && r_img->get_format() <= Image::FORMAT_RGBAF) {
+ const uint32_t *img_data = reinterpret_cast<const uint32_t *>(r_img->ptr());
+ const uint64_t img_size = r_img->get_data_size() / 4;
+
+ for (uint64_t i = 0; i < img_size; i++) {
+ if ((img_data[i] & 0x80000000) != 0 && (img_data[i] & 0x7fffffff) != 0) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
+ uint64_t start_time = OS::get_singleton()->get_ticks_msec();
+
+ if (r_img->is_compressed()) {
+ return ERR_INVALID_DATA;
+ }
+
+ ERR_FAIL_COND_V_MSG(r_img->get_format() < Image::FORMAT_RF || r_img->get_format() > Image::FORMAT_RGBE9995, ERR_INVALID_DATA, "Image is not an HDR image.");
+
+ Error err = OK;
+
+ // Create local RD.
+ RenderingContextDriver *rcd = nullptr;
+ RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();
+
+ if (rd == nullptr) {
+#if defined(RD_ENABLED)
+#if defined(VULKAN_ENABLED)
+ rcd = memnew(RenderingContextDriverVulkan);
+ rd = memnew(RenderingDevice);
+#endif
+#endif
+ if (rcd != nullptr && rd != nullptr) {
+ err = rcd->initialize();
+ if (err == OK) {
+ err = rd->initialize(rcd);
+ }
+
+ if (err != OK) {
+ memdelete(rd);
+ memdelete(rcd);
+ rd = nullptr;
+ rcd = nullptr;
+ }
+ }
+ }
+
+ ERR_FAIL_NULL_V_MSG(rd, err, "Unable to create a local RenderingDevice.");
+
+ Ref<RDShaderFile> compute_shader;
+ compute_shader.instantiate();
+
+ // Destination format.
+ Image::Format dest_format = Image::FORMAT_MAX;
+
+ String version = "";
+
+ switch (p_format) {
+ case BETSY_FORMAT_BC6: {
+ err = compute_shader->parse_versions_from_text(bc6h_shader_glsl);
+
+ if (is_image_signed(r_img)) {
+ dest_format = Image::FORMAT_BPTC_RGBF;
+ version = "signed";
+ } else {
+ dest_format = Image::FORMAT_BPTC_RGBFU;
+ version = "unsigned";
+ }
+
+ } break;
+
+ default:
+ err = ERR_INVALID_PARAMETER;
+ break;
+ }
+
+ if (err != OK) {
+ memdelete(rd);
+ if (rcd != nullptr) {
+ memdelete(rcd);
+ }
+
+ return err;
+ }
+
+ // Compile the shader, return early if invalid.
+ RID shader = rd->shader_create_from_spirv(compute_shader->get_spirv_stages(version));
+
+ if (shader.is_null()) {
+ memdelete(rd);
+ if (rcd != nullptr) {
+ memdelete(rcd);
+ }
+
+ return err;
+ }
+
+ RID pipeline = rd->compute_pipeline_create(shader);
+
+ // src_texture format information.
+ RD::TextureFormat src_texture_format;
+ {
+ src_texture_format.array_layers = 1;
+ src_texture_format.depth = 1;
+ src_texture_format.mipmaps = 1;
+ src_texture_format.texture_type = RD::TEXTURE_TYPE_2D;
+ src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
+ }
+
+ switch (r_img->get_format()) {
+ case Image::FORMAT_RH:
+ src_texture_format.format = RD::DATA_FORMAT_R16_SFLOAT;
+ break;
+
+ case Image::FORMAT_RGH:
+ src_texture_format.format = RD::DATA_FORMAT_R16G16_SFLOAT;
+ break;
+
+ case Image::FORMAT_RGBH:
+ r_img->convert(Image::FORMAT_RGBAH);
+ src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
+ break;
+
+ case Image::FORMAT_RGBAH:
+ src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
+ break;
+
+ case Image::FORMAT_RF:
+ src_texture_format.format = RD::DATA_FORMAT_R32_SFLOAT;
+ break;
+
+ case Image::FORMAT_RGF:
+ src_texture_format.format = RD::DATA_FORMAT_R32G32_SFLOAT;
+ break;
+
+ case Image::FORMAT_RGBF:
+ r_img->convert(Image::FORMAT_RGBAF);
+ src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
+ break;
+
+ case Image::FORMAT_RGBAF:
+ src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
+ break;
+
+ case Image::FORMAT_RGBE9995:
+ src_texture_format.format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32;
+ break;
+
+ default: {
+ rd->free(shader);
+
+ memdelete(rd);
+ if (rcd != nullptr) {
+ memdelete(rcd);
+ }
+
+ return err;
+ }
+ }
+
+ // Create the sampler state.
+ RD::SamplerState src_sampler_state;
+ {
+ src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
+ src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
+ src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
+ src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
+ src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
+ }
+
+ RID src_sampler = rd->sampler_create(src_sampler_state);
+
+ // For the destination format just copy the source format and change the usage bits.
+ RD::TextureFormat dst_texture_format = src_texture_format;
+ dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
+ dst_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
+
+ const int mip_count = r_img->get_mipmap_count() + 1;
+
+ // Container for the compressed data.
+ Vector<uint8_t> dst_data;
+ dst_data.resize(Image::get_image_data_size(r_img->get_width(), r_img->get_height(), dest_format, r_img->has_mipmaps()));
+ uint8_t *dst_data_ptr = dst_data.ptrw();
+
+ Vector<Vector<uint8_t>> src_images;
+ src_images.push_back(Vector<uint8_t>());
+ Vector<uint8_t> *src_image_ptr = src_images.ptrw();
+
+ // Compress each mipmap.
+ for (int i = 0; i < mip_count; i++) {
+ int64_t ofs, size;
+ int width, height;
+ r_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, width, height);
+
+ // Set the source texture width and size.
+ src_texture_format.height = height;
+ src_texture_format.width = width;
+
+ // Set the destination texture width and size.
+ dst_texture_format.height = (height + 3) >> 2;
+ dst_texture_format.width = (width + 3) >> 2;
+
+ // Create a buffer filled with the source mip layer data.
+ src_image_ptr[0].resize(size);
+ memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size);
+
+ // Create the textures on the GPU.
+ RID src_texture = rd->texture_create(src_texture_format, RD::TextureView(), src_images);
+ RID dst_texture = rd->texture_create(dst_texture_format, RD::TextureView());
+
+ if (dest_format == Image::FORMAT_BPTC_RGBFU || dest_format == Image::FORMAT_BPTC_RGBF) {
+ BC6PushConstant push_constant;
+ push_constant.sizeX = 1.0f / width;
+ push_constant.sizeY = 1.0f / height;
+ push_constant.padding[0] = 0;
+ push_constant.padding[1] = 0;
+
+ Vector<RD::Uniform> uniforms;
+ {
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
+ u.binding = 0;
+ u.append_id(src_sampler);
+ u.append_id(src_texture);
+ uniforms.push_back(u);
+ }
+ {
+ RD::Uniform u;
+ u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
+ u.binding = 1;
+ u.append_id(dst_texture);
+ uniforms.push_back(u);
+ }
+ }
+
+ RID uniform_set = rd->uniform_set_create(uniforms, shader, 0);
+ RD::ComputeListID compute_list = rd->compute_list_begin();
+
+ rd->compute_list_bind_compute_pipeline(compute_list, pipeline);
+ rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
+ rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
+ rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
+ rd->compute_list_end();
+ }
+
+ rd->submit();
+ rd->sync();
+
+ // Copy data from the GPU to the buffer.
+ const Vector<uint8_t> texture_data = rd->texture_get_data(dst_texture, 0);
+ int64_t dst_ofs = Image::get_image_mipmap_offset(r_img->get_width(), r_img->get_height(), dest_format, i);
+
+ memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());
+
+ // Free the source and dest texture.
+ rd->free(dst_texture);
+ rd->free(src_texture);
+ }
+
+ src_images.clear();
+
+ // Set the compressed data to the image.
+ r_img->set_data(r_img->get_width(), r_img->get_height(), r_img->has_mipmaps(), dest_format, dst_data);
+
+ // Free the shader (dependencies will be cleared automatically).
+ rd->free(src_sampler);
+ rd->free(shader);
+
+ memdelete(rd);
+ if (rcd != nullptr) {
+ memdelete(rcd);
+ }
+
+ print_verbose(vformat("Betsy: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time));
+
+ return OK;
+}
+
+Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {
+ Image::Format format = r_img->get_format();
+
+ if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {
+ return _compress_betsy(BETSY_FORMAT_BC6, r_img);
+ }
+
+ return ERR_UNAVAILABLE;
+}
diff --git a/modules/betsy/image_compress_betsy.h b/modules/betsy/image_compress_betsy.h
new file mode 100644
index 0000000000..a64e586c76
--- /dev/null
+++ b/modules/betsy/image_compress_betsy.h
@@ -0,0 +1,44 @@
+/**************************************************************************/
+/* image_compress_betsy.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#ifndef IMAGE_COMPRESS_BETSY_H
+#define IMAGE_COMPRESS_BETSY_H
+
+#include "core/io/image.h"
+
+enum BetsyFormat {
+ BETSY_FORMAT_BC6,
+};
+
+Error _compress_betsy(BetsyFormat p_format, Image *r_img);
+
+Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels);
+
+#endif // IMAGE_COMPRESS_BETSY_H
diff --git a/modules/betsy/register_types.cpp b/modules/betsy/register_types.cpp
new file mode 100644
index 0000000000..019099e67c
--- /dev/null
+++ b/modules/betsy/register_types.cpp
@@ -0,0 +1,47 @@
+/**************************************************************************/
+/* register_types.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "register_types.h"
+
+#include "image_compress_betsy.h"
+
+void initialize_betsy_module(ModuleInitializationLevel p_level) {
+ if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
+ return;
+ }
+
+ Image::_image_compress_bptc_rd_func = _betsy_compress_bptc;
+}
+
+void uninitialize_betsy_module(ModuleInitializationLevel p_level) {
+ if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
+ return;
+ }
+}
diff --git a/modules/betsy/register_types.h b/modules/betsy/register_types.h
new file mode 100644
index 0000000000..0ce6c553b6
--- /dev/null
+++ b/modules/betsy/register_types.h
@@ -0,0 +1,39 @@
+/**************************************************************************/
+/* register_types.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#ifndef BETSY_REGISTER_TYPES_H
+#define BETSY_REGISTER_TYPES_H
+
+#include "modules/register_module_types.h"
+
+void initialize_betsy_module(ModuleInitializationLevel p_level);
+void uninitialize_betsy_module(ModuleInitializationLevel p_level);
+
+#endif // BETSY_REGISTER_TYPES_H
diff --git a/modules/cvtt/image_compress_cvtt.cpp b/modules/cvtt/image_compress_cvtt.cpp
index 4938d8bff5..ccc7dfd7ce 100644
--- a/modules/cvtt/image_compress_cvtt.cpp
+++ b/modules/cvtt/image_compress_cvtt.cpp
@@ -142,9 +142,12 @@ static void _digest_job_queue(void *p_job_queue, uint32_t p_index) {
}
void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) {
+ uint64_t start_time = OS::get_singleton()->get_ticks_msec();
+
if (p_image->is_compressed()) {
return; //do not compress, already compressed
}
+
int w = p_image->get_width();
int h = p_image->get_height();
@@ -250,6 +253,8 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) {
WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
+
+ print_verbose(vformat("CVTT: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time));
}
void image_decompress_cvtt(Image *p_image) {