diff options
Diffstat (limited to 'modules')
-rw-r--r-- | modules/betsy/CrossPlatformSettings_piece_all.glsl | 76 | ||||
-rw-r--r-- | modules/betsy/SCsub | 24 | ||||
-rw-r--r-- | modules/betsy/UavCrossPlatform_piece_all.glsl | 17 | ||||
-rw-r--r-- | modules/betsy/bc6h.glsl | 653 | ||||
-rw-r--r-- | modules/betsy/config.py | 6 | ||||
-rw-r--r-- | modules/betsy/image_compress_betsy.cpp | 354 | ||||
-rw-r--r-- | modules/betsy/image_compress_betsy.h | 44 | ||||
-rw-r--r-- | modules/betsy/register_types.cpp | 47 | ||||
-rw-r--r-- | modules/betsy/register_types.h | 39 | ||||
-rw-r--r-- | modules/cvtt/image_compress_cvtt.cpp | 5 |
10 files changed, 1265 insertions, 0 deletions
diff --git a/modules/betsy/CrossPlatformSettings_piece_all.glsl b/modules/betsy/CrossPlatformSettings_piece_all.glsl new file mode 100644 index 0000000000..b7abac7fcc --- /dev/null +++ b/modules/betsy/CrossPlatformSettings_piece_all.glsl @@ -0,0 +1,76 @@ + +#define min3(a, b, c) min(a, min(b, c)) +#define max3(a, b, c) max(a, max(b, c)) + +#define float2 vec2 +#define float3 vec3 +#define float4 vec4 + +#define int2 ivec2 +#define int3 ivec3 +#define int4 ivec4 + +#define uint2 uvec2 +#define uint3 uvec3 +#define uint4 uvec4 + +#define float2x2 mat2 +#define float3x3 mat3 +#define float4x4 mat4 +#define ogre_float4x3 mat3x4 + +#define ushort uint +#define ushort3 uint3 +#define ushort4 uint4 + +//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal +#define rshort int +#define rshort2 int2 +#define rint int +//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal +#define wshort2 int2 +#define wshort3 int3 + +#define toFloat3x3(x) mat3(x) +#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2) + +#define mul(x, y) ((x) * (y)) +#define saturate(x) clamp((x), 0.0, 1.0) +#define lerp mix +#define rsqrt inversesqrt +#define INLINE +#define NO_INTERPOLATION_PREFIX flat +#define NO_INTERPOLATION_SUFFIX + +#define PARAMS_ARG_DECL +#define PARAMS_ARG + +#define reversebits bitfieldReverse + +#define OGRE_Sample(tex, sampler, uv) texture(tex, uv) +#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod) +#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx)) +#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod) +#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod) +#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy) +#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy) +#define OGRE_ddx(val) dFdx(val) +#define OGRE_ddy(val) dFdy(val) +#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod) +#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod) +#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample) + +#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod) + +#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0) +#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1) +#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2) + +#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x + +#define OGRE_SAMPLER_ARG_DECL(samplerName) +#define OGRE_SAMPLER_ARG(samplerName) + +#define OGRE_Texture3D_float4 sampler3D +#define OGRE_OUT_REF(declType, variableName) out declType variableName +#define OGRE_INOUT_REF(declType, variableName) inout declType variableName diff --git a/modules/betsy/SCsub b/modules/betsy/SCsub new file mode 100644 index 0000000000..9930e1f4cf --- /dev/null +++ b/modules/betsy/SCsub @@ -0,0 +1,24 @@ +# !/ usr / bin / env python +Import("env") +Import("env_modules") + +env_betsy = env_modules.Clone() +env_betsy.GLSL_HEADER("bc6h.glsl") +env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"]) + +# Thirdparty source files +thirdparty_obj = [] +thirdparty_dir = "#thirdparty/betsy/" +env_betsy.Prepend(CPPPATH=[thirdparty_dir]) + +env_thirdparty = env_betsy.Clone() +env_thirdparty.disable_warnings() +env.modules_sources += thirdparty_obj + +# Godot source files +module_obj = [] +env_betsy.add_source_files(module_obj, "*.cpp") +env.modules_sources += module_obj + +# Needed to force rebuilding the module files when the thirdparty library is updated. +env.Depends(module_obj, thirdparty_obj) diff --git a/modules/betsy/UavCrossPlatform_piece_all.glsl b/modules/betsy/UavCrossPlatform_piece_all.glsl new file mode 100644 index 0000000000..30854df637 --- /dev/null +++ b/modules/betsy/UavCrossPlatform_piece_all.glsl @@ -0,0 +1,17 @@ + +#define OGRE_imageLoad2D(inImage, iuv) imageLoad(inImage, int2(iuv)) +#define OGRE_imageLoad2DArray(inImage, iuvw) imageLoad(inImage, int3(iuvw)) + +#define OGRE_imageWrite2D1(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0, 0)) +#define OGRE_imageWrite2D2(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0)) +#define OGRE_imageWrite2D4(outImage, iuv, value) imageStore(outImage, int2(iuv), value) + +#define OGRE_imageLoad3D(inImage, iuv) imageLoad(inImage, int3(iuv)) + +#define OGRE_imageWrite3D1(outImage, iuv, value) imageStore(outImage, int3(iuv), value) +#define OGRE_imageWrite3D4(outImage, iuv, value) imageStore(outImage, int3(iuv), value) + +#define OGRE_imageWrite2DArray1(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value) +#define OGRE_imageWrite2DArray4(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value) + +//#define sharedOnlyBarrier memoryBarrierShared();barrier(); diff --git a/modules/betsy/bc6h.glsl b/modules/betsy/bc6h.glsl new file mode 100644 index 0000000000..0d10d378fd --- /dev/null +++ b/modules/betsy/bc6h.glsl @@ -0,0 +1,653 @@ +#[versions] + +signed = "#define SIGNED"; +unsigned = ""; + +#[compute] +#version 450 + +#include "CrossPlatformSettings_piece_all.glsl" +#include "UavCrossPlatform_piece_all.glsl" + +#VERSION_DEFINES +#define QUALITY + +//SIGNED macro is WIP +//#define SIGNED + +float3 f32tof16(float3 value) { + return float3(packHalf2x16(float2(value.x, 0.0)), + packHalf2x16(float2(value.y, 0.0)), + packHalf2x16(float2(value.z, 0.0))); +} + +float3 f16tof32(uint3 value) { + return float3(unpackHalf2x16(value.x).x, + unpackHalf2x16(value.y).x, + unpackHalf2x16(value.z).x); +} + +float f32tof16(float value) { + return packHalf2x16(float2(value.x, 0.0)); +} + +float f16tof32(uint value) { + return unpackHalf2x16(value.x).x; +} + +layout(binding = 0) uniform sampler2D srcTexture; +layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture; + +layout(push_constant, std430) uniform Params { + float2 p_textureSizeRcp; + uint padding0; + uint padding1; +} +params; + +const float HALF_MAX = 65504.0f; +const uint PATTERN_NUM = 32u; + +float CalcMSLE(float3 a, float3 b) { + float3 err = log2((b + 1.0f) / (a + 1.0f)); + err = err * err; + return err.x + err.y + err.z; +} + +uint PatternFixupID(uint i) { + uint ret = 15u; + ret = ((3441033216u >> i) & 0x1u) != 0 ? 2u : ret; + ret = ((845414400u >> i) & 0x1u) != 0 ? 8u : ret; + return ret; +} + +uint Pattern(uint p, uint i) { + uint p2 = p / 2u; + uint p3 = p - p2 * 2u; + + uint enc = 0u; + enc = p2 == 0u ? 2290666700u : enc; + enc = p2 == 1u ? 3972591342u : enc; + enc = p2 == 2u ? 4276930688u : enc; + enc = p2 == 3u ? 3967876808u : enc; + enc = p2 == 4u ? 4293707776u : enc; + enc = p2 == 5u ? 3892379264u : enc; + enc = p2 == 6u ? 4278255592u : enc; + enc = p2 == 7u ? 4026597360u : enc; + enc = p2 == 8u ? 9369360u : enc; + enc = p2 == 9u ? 147747072u : enc; + enc = p2 == 10u ? 1930428556u : enc; + enc = p2 == 11u ? 2362323200u : enc; + enc = p2 == 12u ? 823134348u : enc; + enc = p2 == 13u ? 913073766u : enc; + enc = p2 == 14u ? 267393000u : enc; + enc = p2 == 15u ? 966553998u : enc; + + enc = p3 != 0u ? enc >> 16u : enc; + uint ret = (enc >> i) & 0x1u; + return ret; +} + +#ifndef SIGNED +//UF +float3 Quantize7(float3 x) { + return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f); +} + +float3 Quantize9(float3 x) { + return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f); +} + +float3 Quantize10(float3 x) { + return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f); +} + +float3 Unquantize7(float3 x) { + return (x * 65536.0f + 0x8000) / 128.0f; +} + +float3 Unquantize9(float3 x) { + return (x * 65536.0f + 0x8000) / 512.0f; +} + +float3 Unquantize10(float3 x) { + return (x * 65536.0f + 0x8000) / 1024.0f; +} + +float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { + float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f); + return f16tof32(uint3(comp)); +} +#else +//SF + +float3 cmpSign(float3 value) { + float3 signVal; + signVal.x = value.x >= 0.0f ? 1.0f : -1.0f; + signVal.y = value.y >= 0.0f ? 1.0f : -1.0f; + signVal.z = value.z >= 0.0f ? 1.0f : -1.0f; + return signVal; +} + +float3 Quantize7(float3 x) { + float3 signVal = cmpSign(x); + return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f); +} + +float3 Quantize9(float3 x) { + float3 signVal = cmpSign(x); + return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f); +} + +float3 Quantize10(float3 x) { + float3 signVal = cmpSign(x); + return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f); +} + +float3 Unquantize7(float3 x) { + float3 signVal = sign(x); + x = abs(x); + float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f; + finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x; + finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y; + finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z; + return finalVal; +} + +float3 Unquantize9(float3 x) { + float3 signVal = sign(x); + x = abs(x); + float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f; + finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x; + finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y; + finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z; + return finalVal; +} + +float3 Unquantize10(float3 x) { + float3 signVal = sign(x); + x = abs(x); + float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f; + finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x; + finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y; + finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z; + return finalVal; +} + +float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { + float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f); + /*float3 signVal; + signVal.x = comp.x >= 0.0f ? 0.0f : 0x8000; + signVal.y = comp.y >= 0.0f ? 0.0f : 0x8000; + signVal.z = comp.z >= 0.0f ? 0.0f : 0x8000;*/ + //return f16tof32( uint3( signVal + abs( comp ) ) ); + return f16tof32(uint3(comp)); +} +#endif + +void Swap(inout float3 a, inout float3 b) { + float3 tmp = a; + a = b; + b = tmp; +} + +void Swap(inout float a, inout float b) { + float tmp = a; + a = b; + b = tmp; +} + +uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos) { + float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); + return uint(clamp(r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f)); +} + +uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) { + float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); + return uint(clamp(r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f)); +} + +void SignExtend(inout float3 v1, uint mask, uint signFlag) { + int3 v = int3(v1); + v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0); + v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0); + v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0); + v1 = v; +} + +void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) { + // compute endpoints (min/max RGB bbox) + float3 blockMin = texels[0]; + float3 blockMax = texels[0]; + for (uint i = 1u; i < 16u; ++i) { + blockMin = min(blockMin, texels[i]); + blockMax = max(blockMax, texels[i]); + } + + // refine endpoints in log2 RGB space + float3 refinedBlockMin = blockMax; + float3 refinedBlockMax = blockMin; + for (uint i = 0u; i < 16u; ++i) { + refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); + refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); + } + + float3 logBlockMax = log2(blockMax + 1.0f); + float3 logBlockMin = log2(blockMin + 1.0f); + float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f); + float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f); + float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); + logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); + logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); + blockMin = exp2(logBlockMin) - 1.0f; + blockMax = exp2(logBlockMax) - 1.0f; + + float3 blockDir = blockMax - blockMin; + blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); + + float3 endpoint0 = Quantize10(blockMin); + float3 endpoint1 = Quantize10(blockMax); + float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); + float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); + + // check if endpoint swap is required + float fixupTexelPos = f32tof16(dot(texels[0], blockDir)); + uint fixupIndex = ComputeIndex4(fixupTexelPos, endPoint0Pos, endPoint1Pos); + if (fixupIndex > 7) { + Swap(endPoint0Pos, endPoint1Pos); + Swap(endpoint0, endpoint1); + } + + // compute indices + uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + for (uint i = 0u; i < 16u; ++i) { + float texelPos = f32tof16(dot(texels[i], blockDir)); + indices[i] = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos); + } + + // compute compression error (MSLE) + float3 endpoint0Unq = Unquantize10(endpoint0); + float3 endpoint1Unq = Unquantize10(endpoint1); + float msle = 0.0f; + for (uint i = 0u; i < 16u; ++i) { + float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f); + float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight); + + msle += CalcMSLE(texels[i], texelUnc); + } + + // encode block for mode 11 + blockMSLE = msle; + block.x = 0x03; + + // endpoints + block.x |= uint(endpoint0.x) << 5u; + block.x |= uint(endpoint0.y) << 15u; + block.x |= uint(endpoint0.z) << 25u; + block.y |= uint(endpoint0.z) >> 7u; + block.y |= uint(endpoint1.x) << 3u; + block.y |= uint(endpoint1.y) << 13u; + block.y |= uint(endpoint1.z) << 23u; + block.z |= uint(endpoint1.z) >> 9u; + + // indices + block.z |= indices[0] << 1u; + block.z |= indices[1] << 4u; + block.z |= indices[2] << 8u; + block.z |= indices[3] << 12u; + block.z |= indices[4] << 16u; + block.z |= indices[5] << 20u; + block.z |= indices[6] << 24u; + block.z |= indices[7] << 28u; + block.w |= indices[8] << 0u; + block.w |= indices[9] << 4u; + block.w |= indices[10] << 8u; + block.w |= indices[11] << 12u; + block.w |= indices[12] << 16u; + block.w |= indices[13] << 20u; + block.w |= indices[14] << 24u; + block.w |= indices[15] << 28u; +} + +float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) { + float3 w = Point - PointOnLine; + float3 x = w - dot(w, LineDirection) * LineDirection; + + return dot(x, x); +} + +float EvaluateP2Pattern(uint pattern, float3 texels[16]) { + float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); + float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); + + for (uint i = 0; i < 16; ++i) { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) { + p0BlockMin = min(p0BlockMin, texels[i]); + p0BlockMax = max(p0BlockMax, texels[i]); + } else { + p1BlockMin = min(p1BlockMin, texels[i]); + p1BlockMax = max(p1BlockMax, texels[i]); + } + } + + float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin); + float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin); + + float sqDistanceFromLine = 0.0f; + + for (uint i = 0; i < 16; ++i) { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) { + sqDistanceFromLine += DistToLineSq(p0BlockMin, p0BlockDir, texels[i]); + } else { + sqDistanceFromLine += DistToLineSq(p1BlockMin, p1BlockDir, texels[i]); + } + } + + return sqDistanceFromLine; +} + +void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) { + float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); + float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); + + for (uint i = 0u; i < 16u; ++i) { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) { + p0BlockMin = min(p0BlockMin, texels[i]); + p0BlockMax = max(p0BlockMax, texels[i]); + } else { + p1BlockMin = min(p1BlockMin, texels[i]); + p1BlockMax = max(p1BlockMax, texels[i]); + } + } + + float3 p0BlockDir = p0BlockMax - p0BlockMin; + float3 p1BlockDir = p1BlockMax - p1BlockMin; + p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z); + p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z); + + float p0Endpoint0Pos = f32tof16(dot(p0BlockMin, p0BlockDir)); + float p0Endpoint1Pos = f32tof16(dot(p0BlockMax, p0BlockDir)); + float p1Endpoint0Pos = f32tof16(dot(p1BlockMin, p1BlockDir)); + float p1Endpoint1Pos = f32tof16(dot(p1BlockMax, p1BlockDir)); + + uint fixupID = PatternFixupID(pattern); + float p0FixupTexelPos = f32tof16(dot(texels[0], p0BlockDir)); + float p1FixupTexelPos = f32tof16(dot(texels[fixupID], p1BlockDir)); + uint p0FixupIndex = ComputeIndex3(p0FixupTexelPos, p0Endpoint0Pos, p0Endpoint1Pos); + uint p1FixupIndex = ComputeIndex3(p1FixupTexelPos, p1Endpoint0Pos, p1Endpoint1Pos); + if (p0FixupIndex > 3u) { + Swap(p0Endpoint0Pos, p0Endpoint1Pos); + Swap(p0BlockMin, p0BlockMax); + } + if (p1FixupIndex > 3u) { + Swap(p1Endpoint0Pos, p1Endpoint1Pos); + Swap(p1BlockMin, p1BlockMax); + } + + uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + for (uint i = 0u; i < 16u; ++i) { + float p0TexelPos = f32tof16(dot(texels[i], p0BlockDir)); + float p1TexelPos = f32tof16(dot(texels[i], p1BlockDir)); + uint p0Index = ComputeIndex3(p0TexelPos, p0Endpoint0Pos, p0Endpoint1Pos); + uint p1Index = ComputeIndex3(p1TexelPos, p1Endpoint0Pos, p1Endpoint1Pos); + + uint paletteID = Pattern(pattern, i); + indices[i] = paletteID == 0u ? p0Index : p1Index; + } + + float3 endpoint760 = floor(Quantize7(p0BlockMin)); + float3 endpoint761 = floor(Quantize7(p0BlockMax)); + float3 endpoint762 = floor(Quantize7(p1BlockMin)); + float3 endpoint763 = floor(Quantize7(p1BlockMax)); + + float3 endpoint950 = floor(Quantize9(p0BlockMin)); + float3 endpoint951 = floor(Quantize9(p0BlockMax)); + float3 endpoint952 = floor(Quantize9(p1BlockMin)); + float3 endpoint953 = floor(Quantize9(p1BlockMax)); + + endpoint761 = endpoint761 - endpoint760; + endpoint762 = endpoint762 - endpoint760; + endpoint763 = endpoint763 - endpoint760; + + endpoint951 = endpoint951 - endpoint950; + endpoint952 = endpoint952 - endpoint950; + endpoint953 = endpoint953 - endpoint950; + + int maxVal76 = 0x1F; + endpoint761 = clamp(endpoint761, -maxVal76, maxVal76); + endpoint762 = clamp(endpoint762, -maxVal76, maxVal76); + endpoint763 = clamp(endpoint763, -maxVal76, maxVal76); + + int maxVal95 = 0xF; + endpoint951 = clamp(endpoint951, -maxVal95, maxVal95); + endpoint952 = clamp(endpoint952, -maxVal95, maxVal95); + endpoint953 = clamp(endpoint953, -maxVal95, maxVal95); + + float3 endpoint760Unq = Unquantize7(endpoint760); + float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761); + float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762); + float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763); + float3 endpoint950Unq = Unquantize9(endpoint950); + float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951); + float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952); + float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953); + + float msle76 = 0.0f; + float msle95 = 0.0f; + for (uint i = 0u; i < 16u; ++i) { + uint paletteID = Pattern(pattern, i); + + float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq; + float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq; + float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq; + float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq; + + float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f); + float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight); + float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight); + + msle76 += CalcMSLE(texels[i], texelUnc76); + msle95 += CalcMSLE(texels[i], texelUnc95); + } + + SignExtend(endpoint761, 0x1F, 0x20); + SignExtend(endpoint762, 0x1F, 0x20); + SignExtend(endpoint763, 0x1F, 0x20); + + SignExtend(endpoint951, 0xF, 0x10); + SignExtend(endpoint952, 0xF, 0x10); + SignExtend(endpoint953, 0xF, 0x10); + + // encode block + float p2MSLE = min(msle76, msle95); + if (p2MSLE < blockMSLE) { + blockMSLE = p2MSLE; + block = uint4(0u, 0u, 0u, 0u); + + if (p2MSLE == msle76) { + // 7.6 + block.x = 0x1u; + block.x |= (uint(endpoint762.y) & 0x20u) >> 3u; + block.x |= (uint(endpoint763.y) & 0x10u) >> 1u; + block.x |= (uint(endpoint763.y) & 0x20u) >> 1u; + block.x |= uint(endpoint760.x) << 5u; + block.x |= (uint(endpoint763.z) & 0x01u) << 12u; + block.x |= (uint(endpoint763.z) & 0x02u) << 12u; + block.x |= (uint(endpoint762.z) & 0x10u) << 10u; + block.x |= uint(endpoint760.y) << 15u; + block.x |= (uint(endpoint762.z) & 0x20u) << 17u; + block.x |= (uint(endpoint763.z) & 0x04u) << 21u; + block.x |= (uint(endpoint762.y) & 0x10u) << 20u; + block.x |= uint(endpoint760.z) << 25u; + block.y |= (uint(endpoint763.z) & 0x08u) >> 3u; + block.y |= (uint(endpoint763.z) & 0x20u) >> 4u; + block.y |= (uint(endpoint763.z) & 0x10u) >> 2u; + block.y |= uint(endpoint761.x) << 3u; + block.y |= (uint(endpoint762.y) & 0x0Fu) << 9u; + block.y |= uint(endpoint761.y) << 13u; + block.y |= (uint(endpoint763.y) & 0x0Fu) << 19u; + block.y |= uint(endpoint761.z) << 23u; + block.y |= (uint(endpoint762.z) & 0x07u) << 29u; + block.z |= (uint(endpoint762.z) & 0x08u) >> 3u; + block.z |= uint(endpoint762.x) << 1u; + block.z |= uint(endpoint763.x) << 7u; + } else { + // 9.5 + block.x = 0xEu; + block.x |= uint(endpoint950.x) << 5u; + block.x |= (uint(endpoint952.z) & 0x10u) << 10u; + block.x |= uint(endpoint950.y) << 15u; + block.x |= (uint(endpoint952.y) & 0x10u) << 20u; + block.x |= uint(endpoint950.z) << 25u; + block.y |= uint(endpoint950.z) >> 7u; + block.y |= (uint(endpoint953.z) & 0x10u) >> 2u; + block.y |= uint(endpoint951.x) << 3u; + block.y |= (uint(endpoint953.y) & 0x10u) << 4u; + block.y |= (uint(endpoint952.y) & 0x0Fu) << 9u; + block.y |= uint(endpoint951.y) << 13u; + block.y |= (uint(endpoint953.z) & 0x01u) << 18u; + block.y |= (uint(endpoint953.y) & 0x0Fu) << 19u; + block.y |= uint(endpoint951.z) << 23u; + block.y |= (uint(endpoint953.z) & 0x02u) << 27u; + block.y |= uint(endpoint952.z) << 29u; + block.z |= (uint(endpoint952.z) & 0x08u) >> 3u; + block.z |= uint(endpoint952.x) << 1u; + block.z |= (uint(endpoint953.z) & 0x04u) << 4u; + block.z |= uint(endpoint953.x) << 7u; + block.z |= (uint(endpoint953.z) & 0x08u) << 9u; + } + + block.z |= pattern << 13u; + uint blockFixupID = PatternFixupID(pattern); + if (blockFixupID == 15u) { + block.z |= indices[0] << 18u; + block.z |= indices[1] << 20u; + block.z |= indices[2] << 23u; + block.z |= indices[3] << 26u; + block.z |= indices[4] << 29u; + block.w |= indices[5] << 0u; + block.w |= indices[6] << 3u; + block.w |= indices[7] << 6u; + block.w |= indices[8] << 9u; + block.w |= indices[9] << 12u; + block.w |= indices[10] << 15u; + block.w |= indices[11] << 18u; + block.w |= indices[12] << 21u; + block.w |= indices[13] << 24u; + block.w |= indices[14] << 27u; + block.w |= indices[15] << 30u; + } else if (blockFixupID == 2u) { + block.z |= indices[0] << 18u; + block.z |= indices[1] << 20u; + block.z |= indices[2] << 23u; + block.z |= indices[3] << 25u; + block.z |= indices[4] << 28u; + block.z |= indices[5] << 31u; + block.w |= indices[5] >> 1u; + block.w |= indices[6] << 2u; + block.w |= indices[7] << 5u; + block.w |= indices[8] << 8u; + block.w |= indices[9] << 11u; + block.w |= indices[10] << 14u; + block.w |= indices[11] << 17u; + block.w |= indices[12] << 20u; + block.w |= indices[13] << 23u; + block.w |= indices[14] << 26u; + block.w |= indices[15] << 29u; + } else { + block.z |= indices[0] << 18u; + block.z |= indices[1] << 20u; + block.z |= indices[2] << 23u; + block.z |= indices[3] << 26u; + block.z |= indices[4] << 29u; + block.w |= indices[5] << 0u; + block.w |= indices[6] << 3u; + block.w |= indices[7] << 6u; + block.w |= indices[8] << 9u; + block.w |= indices[9] << 11u; + block.w |= indices[10] << 14u; + block.w |= indices[11] << 17u; + block.w |= indices[12] << 20u; + block.w |= indices[13] << 23u; + block.w |= indices[14] << 26u; + block.w |= indices[15] << 29u; + } + } +} + +layout(local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +void main() { + // gather texels for current 4x4 block + // 0 1 2 3 + // 4 5 6 7 + // 8 9 10 11 + // 12 13 14 15 + float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp; + float2 block0UV = uv; + float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f); + float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y); + float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y); + float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV); + float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV); + float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV); + float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV); + float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV); + float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV); + float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV); + float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV); + float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV); + float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV); + float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV); + float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV); + + float3 texels[16]; + texels[0] = float3(block0X.w, block0Y.w, block0Z.w); + texels[1] = float3(block0X.z, block0Y.z, block0Z.z); + texels[2] = float3(block1X.w, block1Y.w, block1Z.w); + texels[3] = float3(block1X.z, block1Y.z, block1Z.z); + texels[4] = float3(block0X.x, block0Y.x, block0Z.x); + texels[5] = float3(block0X.y, block0Y.y, block0Z.y); + texels[6] = float3(block1X.x, block1Y.x, block1Z.x); + texels[7] = float3(block1X.y, block1Y.y, block1Z.y); + texels[8] = float3(block2X.w, block2Y.w, block2Z.w); + texels[9] = float3(block2X.z, block2Y.z, block2Z.z); + texels[10] = float3(block3X.w, block3Y.w, block3Z.w); + texels[11] = float3(block3X.z, block3Y.z, block3Z.z); + texels[12] = float3(block2X.x, block2Y.x, block2Z.x); + texels[13] = float3(block2X.y, block2Y.y, block2Z.y); + texels[14] = float3(block3X.x, block3Y.x, block3Z.x); + texels[15] = float3(block3X.y, block3Y.y, block3Z.y); + + uint4 block = uint4(0u, 0u, 0u, 0u); + float blockMSLE = 0.0f; + + EncodeP1(block, blockMSLE, texels); + +#ifdef QUALITY + float bestScore = EvaluateP2Pattern(0, texels); + uint bestPattern = 0; + + for (uint i = 1u; i < 32u; ++i) { + float score = EvaluateP2Pattern(i, texels); + + if (score < bestScore) { + bestPattern = i; + bestScore = score; + } + } + + EncodeP2Pattern(block, blockMSLE, bestPattern, texels); +#endif + + imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block); +} diff --git a/modules/betsy/config.py b/modules/betsy/config.py new file mode 100644 index 0000000000..eb565b85b9 --- /dev/null +++ b/modules/betsy/config.py @@ -0,0 +1,6 @@ +def can_build(env, platform): + return env.editor_build + + +def configure(env): + pass diff --git a/modules/betsy/image_compress_betsy.cpp b/modules/betsy/image_compress_betsy.cpp new file mode 100644 index 0000000000..6a0862e729 --- /dev/null +++ b/modules/betsy/image_compress_betsy.cpp @@ -0,0 +1,354 @@ +/**************************************************************************/ +/* image_compress_betsy.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "image_compress_betsy.h" + +#include "servers/rendering/rendering_device_binds.h" +#include "servers/rendering/rendering_server_default.h" + +#if defined(VULKAN_ENABLED) +#include "drivers/vulkan/rendering_context_driver_vulkan.h" +#endif + +#include "bc6h.glsl.gen.h" + +struct BC6PushConstant { + float sizeX; + float sizeY; + uint32_t padding[2]; +}; + +static int get_next_multiple(int n, int m) { + return n + (m - (n % m)); +} + +static bool is_image_signed(const Image *r_img) { + if (r_img->get_format() >= Image::FORMAT_RH && r_img->get_format() <= Image::FORMAT_RGBAH) { + const uint16_t *img_data = reinterpret_cast<const uint16_t *>(r_img->ptr()); + const uint64_t img_size = r_img->get_data_size() / 2; + + for (uint64_t i = 0; i < img_size; i++) { + if ((img_data[i] & 0x8000) != 0 && (img_data[i] & 0x7fff) != 0) { + return true; + } + } + + } else if (r_img->get_format() >= Image::FORMAT_RF && r_img->get_format() <= Image::FORMAT_RGBAF) { + const uint32_t *img_data = reinterpret_cast<const uint32_t *>(r_img->ptr()); + const uint64_t img_size = r_img->get_data_size() / 4; + + for (uint64_t i = 0; i < img_size; i++) { + if ((img_data[i] & 0x80000000) != 0 && (img_data[i] & 0x7fffffff) != 0) { + return true; + } + } + } + + return false; +} + +Error _compress_betsy(BetsyFormat p_format, Image *r_img) { + uint64_t start_time = OS::get_singleton()->get_ticks_msec(); + + if (r_img->is_compressed()) { + return ERR_INVALID_DATA; + } + + ERR_FAIL_COND_V_MSG(r_img->get_format() < Image::FORMAT_RF || r_img->get_format() > Image::FORMAT_RGBE9995, ERR_INVALID_DATA, "Image is not an HDR image."); + + Error err = OK; + + // Create local RD. + RenderingContextDriver *rcd = nullptr; + RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device(); + + if (rd == nullptr) { +#if defined(RD_ENABLED) +#if defined(VULKAN_ENABLED) + rcd = memnew(RenderingContextDriverVulkan); + rd = memnew(RenderingDevice); +#endif +#endif + if (rcd != nullptr && rd != nullptr) { + err = rcd->initialize(); + if (err == OK) { + err = rd->initialize(rcd); + } + + if (err != OK) { + memdelete(rd); + memdelete(rcd); + rd = nullptr; + rcd = nullptr; + } + } + } + + ERR_FAIL_NULL_V_MSG(rd, err, "Unable to create a local RenderingDevice."); + + Ref<RDShaderFile> compute_shader; + compute_shader.instantiate(); + + // Destination format. + Image::Format dest_format = Image::FORMAT_MAX; + + String version = ""; + + switch (p_format) { + case BETSY_FORMAT_BC6: { + err = compute_shader->parse_versions_from_text(bc6h_shader_glsl); + + if (is_image_signed(r_img)) { + dest_format = Image::FORMAT_BPTC_RGBF; + version = "signed"; + } else { + dest_format = Image::FORMAT_BPTC_RGBFU; + version = "unsigned"; + } + + } break; + + default: + err = ERR_INVALID_PARAMETER; + break; + } + + if (err != OK) { + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + return err; + } + + // Compile the shader, return early if invalid. + RID shader = rd->shader_create_from_spirv(compute_shader->get_spirv_stages(version)); + + if (shader.is_null()) { + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + return err; + } + + RID pipeline = rd->compute_pipeline_create(shader); + + // src_texture format information. + RD::TextureFormat src_texture_format; + { + src_texture_format.array_layers = 1; + src_texture_format.depth = 1; + src_texture_format.mipmaps = 1; + src_texture_format.texture_type = RD::TEXTURE_TYPE_2D; + src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + } + + switch (r_img->get_format()) { + case Image::FORMAT_RH: + src_texture_format.format = RD::DATA_FORMAT_R16_SFLOAT; + break; + + case Image::FORMAT_RGH: + src_texture_format.format = RD::DATA_FORMAT_R16G16_SFLOAT; + break; + + case Image::FORMAT_RGBH: + r_img->convert(Image::FORMAT_RGBAH); + src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + break; + + case Image::FORMAT_RGBAH: + src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + break; + + case Image::FORMAT_RF: + src_texture_format.format = RD::DATA_FORMAT_R32_SFLOAT; + break; + + case Image::FORMAT_RGF: + src_texture_format.format = RD::DATA_FORMAT_R32G32_SFLOAT; + break; + + case Image::FORMAT_RGBF: + r_img->convert(Image::FORMAT_RGBAF); + src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + break; + + case Image::FORMAT_RGBAF: + src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + break; + + case Image::FORMAT_RGBE9995: + src_texture_format.format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; + break; + + default: { + rd->free(shader); + + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + return err; + } + } + + // Create the sampler state. + RD::SamplerState src_sampler_state; + { + src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST; + src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST; + src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST; + } + + RID src_sampler = rd->sampler_create(src_sampler_state); + + // For the destination format just copy the source format and change the usage bits. + RD::TextureFormat dst_texture_format = src_texture_format; + dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + dst_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + + const int mip_count = r_img->get_mipmap_count() + 1; + + // Container for the compressed data. + Vector<uint8_t> dst_data; + dst_data.resize(Image::get_image_data_size(r_img->get_width(), r_img->get_height(), dest_format, r_img->has_mipmaps())); + uint8_t *dst_data_ptr = dst_data.ptrw(); + + Vector<Vector<uint8_t>> src_images; + src_images.push_back(Vector<uint8_t>()); + Vector<uint8_t> *src_image_ptr = src_images.ptrw(); + + // Compress each mipmap. + for (int i = 0; i < mip_count; i++) { + int64_t ofs, size; + int width, height; + r_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, width, height); + + // Set the source texture width and size. + src_texture_format.height = height; + src_texture_format.width = width; + + // Set the destination texture width and size. + dst_texture_format.height = (height + 3) >> 2; + dst_texture_format.width = (width + 3) >> 2; + + // Create a buffer filled with the source mip layer data. + src_image_ptr[0].resize(size); + memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size); + + // Create the textures on the GPU. + RID src_texture = rd->texture_create(src_texture_format, RD::TextureView(), src_images); + RID dst_texture = rd->texture_create(dst_texture_format, RD::TextureView()); + + if (dest_format == Image::FORMAT_BPTC_RGBFU || dest_format == Image::FORMAT_BPTC_RGBF) { + BC6PushConstant push_constant; + push_constant.sizeX = 1.0f / width; + push_constant.sizeY = 1.0f / height; + push_constant.padding[0] = 0; + push_constant.padding[1] = 0; + + Vector<RD::Uniform> uniforms; + { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.append_id(src_sampler); + u.append_id(src_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(dst_texture); + uniforms.push_back(u); + } + } + + RID uniform_set = rd->uniform_set_create(uniforms, shader, 0); + RD::ComputeListID compute_list = rd->compute_list_begin(); + + rd->compute_list_bind_compute_pipeline(compute_list, pipeline); + rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant)); + rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); + rd->compute_list_end(); + } + + rd->submit(); + rd->sync(); + + // Copy data from the GPU to the buffer. + const Vector<uint8_t> texture_data = rd->texture_get_data(dst_texture, 0); + int64_t dst_ofs = Image::get_image_mipmap_offset(r_img->get_width(), r_img->get_height(), dest_format, i); + + memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size()); + + // Free the source and dest texture. + rd->free(dst_texture); + rd->free(src_texture); + } + + src_images.clear(); + + // Set the compressed data to the image. + r_img->set_data(r_img->get_width(), r_img->get_height(), r_img->has_mipmaps(), dest_format, dst_data); + + // Free the shader (dependencies will be cleared automatically). + rd->free(src_sampler); + rd->free(shader); + + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + print_verbose(vformat("Betsy: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time)); + + return OK; +} + +Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) { + Image::Format format = r_img->get_format(); + + if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) { + return _compress_betsy(BETSY_FORMAT_BC6, r_img); + } + + return ERR_UNAVAILABLE; +} diff --git a/modules/betsy/image_compress_betsy.h b/modules/betsy/image_compress_betsy.h new file mode 100644 index 0000000000..a64e586c76 --- /dev/null +++ b/modules/betsy/image_compress_betsy.h @@ -0,0 +1,44 @@ +/**************************************************************************/ +/* image_compress_betsy.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef IMAGE_COMPRESS_BETSY_H +#define IMAGE_COMPRESS_BETSY_H + +#include "core/io/image.h" + +enum BetsyFormat { + BETSY_FORMAT_BC6, +}; + +Error _compress_betsy(BetsyFormat p_format, Image *r_img); + +Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels); + +#endif // IMAGE_COMPRESS_BETSY_H diff --git a/modules/betsy/register_types.cpp b/modules/betsy/register_types.cpp new file mode 100644 index 0000000000..019099e67c --- /dev/null +++ b/modules/betsy/register_types.cpp @@ -0,0 +1,47 @@ +/**************************************************************************/ +/* register_types.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "register_types.h" + +#include "image_compress_betsy.h" + +void initialize_betsy_module(ModuleInitializationLevel p_level) { + if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) { + return; + } + + Image::_image_compress_bptc_rd_func = _betsy_compress_bptc; +} + +void uninitialize_betsy_module(ModuleInitializationLevel p_level) { + if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) { + return; + } +} diff --git a/modules/betsy/register_types.h b/modules/betsy/register_types.h new file mode 100644 index 0000000000..0ce6c553b6 --- /dev/null +++ b/modules/betsy/register_types.h @@ -0,0 +1,39 @@ +/**************************************************************************/ +/* register_types.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef BETSY_REGISTER_TYPES_H +#define BETSY_REGISTER_TYPES_H + +#include "modules/register_module_types.h" + +void initialize_betsy_module(ModuleInitializationLevel p_level); +void uninitialize_betsy_module(ModuleInitializationLevel p_level); + +#endif // BETSY_REGISTER_TYPES_H diff --git a/modules/cvtt/image_compress_cvtt.cpp b/modules/cvtt/image_compress_cvtt.cpp index 4938d8bff5..ccc7dfd7ce 100644 --- a/modules/cvtt/image_compress_cvtt.cpp +++ b/modules/cvtt/image_compress_cvtt.cpp @@ -142,9 +142,12 @@ static void _digest_job_queue(void *p_job_queue, uint32_t p_index) { } void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { + uint64_t start_time = OS::get_singleton()->get_ticks_msec(); + if (p_image->is_compressed()) { return; //do not compress, already compressed } + int w = p_image->get_width(); int h = p_image->get_height(); @@ -250,6 +253,8 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data); + + print_verbose(vformat("CVTT: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time)); } void image_decompress_cvtt(Image *p_image) { |