summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Verschelde <rverschelde@gmail.com>2024-08-16 10:35:32 +0200
committerRémi Verschelde <rverschelde@gmail.com>2024-08-16 10:35:32 +0200
commit886d5865a43543d93721e437d69bf38e5ee932c1 (patch)
treeeff56d6199e5a81aec19ea4f68d2a941b0d0a985
parentb1c624beb55f053dca9f7397b9309f1d1567f38d (diff)
parent80cf6cbfe9c0611ac649eb0e9cb92d0f5765d220 (diff)
downloadredot-engine-886d5865a43543d93721e437d69bf38e5ee932c1.tar.gz
Merge pull request #95291 from BlueCube3310/hdr-optimizations
Optimize .hdr loading and RGB9E5 conversion
-rw-r--r--core/math/color.h67
-rw-r--r--modules/hdr/image_loader_hdr.cpp32
-rw-r--r--modules/hdr/image_loader_hdr.h1
3 files changed, 62 insertions, 38 deletions
diff --git a/core/math/color.h b/core/math/color.h
index e17b8c9fd7..70fad78acb 100644
--- a/core/math/color.h
+++ b/core/math/color.h
@@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
}
_FORCE_INLINE_ uint32_t to_rgbe9995() const {
- const float pow2to9 = 512.0f;
- const float B = 15.0f;
- const float N = 9.0f;
-
- float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)
-
- float cRed = MAX(0.0f, MIN(sharedexp, r));
- float cGreen = MAX(0.0f, MIN(sharedexp, g));
- float cBlue = MAX(0.0f, MIN(sharedexp, b));
-
- float cMax = MAX(cRed, MAX(cGreen, cBlue));
-
- float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;
-
- float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);
-
- float exps = expp + 1.0f;
-
- if (0.0f <= sMax && sMax < pow2to9) {
- exps = expp;
- }
-
- float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
- float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
- float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
-
- return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
+ // https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
+ static const float kMaxVal = float(0x1FF << 7);
+ static const float kMinVal = float(1.f / (1 << 16));
+
+ // Clamp RGB to [0, 1.FF*2^16]
+ const float _r = CLAMP(r, 0.0f, kMaxVal);
+ const float _g = CLAMP(g, 0.0f, kMaxVal);
+ const float _b = CLAMP(b, 0.0f, kMaxVal);
+
+ // Compute the maximum channel, no less than 1.0*2^-15
+ const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));
+
+ // Take the exponent of the maximum channel (rounding up the 9th bit) and
+ // add 15 to it. When added to the channels, it causes the implicit '1.0'
+ // bit and the first 8 mantissa bits to be shifted down to the low 9 bits
+ // of the mantissa, rounding the truncated bits.
+ union {
+ float f;
+ int32_t i;
+ } R, G, B, E;
+
+ E.f = MaxChannel;
+ E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
+ E.i &= 0x7F800000; // Zero the mantissa
+
+ // This shifts the 9-bit values we need into the lowest bits, rounding as
+ // needed. Note that if the channel has a smaller exponent than the max
+ // channel, it will shift even more. This is intentional.
+ R.f = _r + E.f;
+ G.f = _g + E.f;
+ B.f = _b + E.f;
+
+ // Convert the Bias to the correct exponent in the upper 5 bits.
+ E.i <<= 4;
+ E.i += 0x10000000;
+
+ // Combine the fields. RGB floats have unwanted data in the upper 9
+ // bits. Only red needs to mask them off because green and blue shift
+ // it out to the left.
+ return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
}
_FORCE_INLINE_ Color blend(const Color &p_over) const {
diff --git a/modules/hdr/image_loader_hdr.cpp b/modules/hdr/image_loader_hdr.cpp
index c49c62a08b..ba59bb25ee 100644
--- a/modules/hdr/image_loader_hdr.cpp
+++ b/modules/hdr/image_loader_hdr.cpp
@@ -68,9 +68,11 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
imgdata.resize(height * width * (int)sizeof(uint32_t));
{
- uint8_t *w = imgdata.ptrw();
+ uint8_t *ptr = imgdata.ptrw();
- uint8_t *ptr = (uint8_t *)w;
+ Vector<uint8_t> temp_read_data;
+ temp_read_data.resize(128);
+ uint8_t *temp_read_ptr = temp_read_data.ptrw();
if (width < 8 || width >= 32768) {
// Read flat data
@@ -113,8 +115,9 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
}
} else {
// Dump
+ f->get_buffer(temp_read_ptr, count);
for (int z = 0; z < count; ++z) {
- ptr[(j * width + i++) * 4 + k] = f->get_8();
+ ptr[(j * width + i++) * 4 + k] = temp_read_ptr[z];
}
}
}
@@ -122,20 +125,27 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
}
}
+ const bool force_linear = p_flags & FLAG_FORCE_LINEAR;
+
//convert
for (int i = 0; i < width * height; i++) {
- float exp = pow(2.0f, ptr[3] - 128.0f);
+ int e = ptr[3] - 128;
+
+ if (force_linear || (e < -15 || e > 15)) {
+ float exp = pow(2.0f, e);
+ Color c(ptr[0] * exp / 255.0, ptr[1] * exp / 255.0, ptr[2] * exp / 255.0);
- Color c(
- ptr[0] * exp / 255.0,
- ptr[1] * exp / 255.0,
- ptr[2] * exp / 255.0);
+ if (force_linear) {
+ c = c.srgb_to_linear();
+ }
- if (p_flags & FLAG_FORCE_LINEAR) {
- c = c.srgb_to_linear();
+ *(uint32_t *)ptr = c.to_rgbe9995();
+ } else {
+ // https://github.com/george-steel/rgbe-rs/blob/e7cc33b7f42b4eb3272c166dac75385e48687c92/src/types.rs#L123-L129
+ uint32_t e5 = (uint32_t)(e + 15);
+ *(uint32_t *)ptr = ((e5 << 27) | ((uint32_t)ptr[2] << 19) | ((uint32_t)ptr[1] << 10) | ((uint32_t)ptr[0] << 1));
}
- *(uint32_t *)ptr = c.to_rgbe9995();
ptr += 4;
}
}
diff --git a/modules/hdr/image_loader_hdr.h b/modules/hdr/image_loader_hdr.h
index 9821db059e..0a8e91fb9e 100644
--- a/modules/hdr/image_loader_hdr.h
+++ b/modules/hdr/image_loader_hdr.h
@@ -37,6 +37,7 @@ class ImageLoaderHDR : public ImageFormatLoader {
public:
virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale);
virtual void get_recognized_extensions(List<String> *p_extensions) const;
+
ImageLoaderHDR();
};