summaryrefslogtreecommitdiffstats
path: root/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h')
-rw-r--r--thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h605
1 files changed, 605 insertions, 0 deletions
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
new file mode 100644
index 0000000000..f94f40aa79
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
@@ -0,0 +1,605 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_SAMPLE_H
+#define FFX_FSR2_SAMPLE_H
+
+// suppress warnings
+#ifdef FFX_HLSL
+#pragma warning(disable: 4008) // potentially divide by zero
+#endif //FFX_HLSL
+
+struct FetchedBilinearSamples {
+
+ FfxFloat32x4 fColor00;
+ FfxFloat32x4 fColor10;
+
+ FfxFloat32x4 fColor01;
+ FfxFloat32x4 fColor11;
+};
+
+struct FetchedBicubicSamples {
+
+ FfxFloat32x4 fColor00;
+ FfxFloat32x4 fColor10;
+ FfxFloat32x4 fColor20;
+ FfxFloat32x4 fColor30;
+
+ FfxFloat32x4 fColor01;
+ FfxFloat32x4 fColor11;
+ FfxFloat32x4 fColor21;
+ FfxFloat32x4 fColor31;
+
+ FfxFloat32x4 fColor02;
+ FfxFloat32x4 fColor12;
+ FfxFloat32x4 fColor22;
+ FfxFloat32x4 fColor32;
+
+ FfxFloat32x4 fColor03;
+ FfxFloat32x4 fColor13;
+ FfxFloat32x4 fColor23;
+ FfxFloat32x4 fColor33;
+};
+
+#if FFX_HALF
+struct FetchedBilinearSamplesMin16 {
+
+ FFX_MIN16_F4 fColor00;
+ FFX_MIN16_F4 fColor10;
+
+ FFX_MIN16_F4 fColor01;
+ FFX_MIN16_F4 fColor11;
+};
+
+struct FetchedBicubicSamplesMin16 {
+
+ FFX_MIN16_F4 fColor00;
+ FFX_MIN16_F4 fColor10;
+ FFX_MIN16_F4 fColor20;
+ FFX_MIN16_F4 fColor30;
+
+ FFX_MIN16_F4 fColor01;
+ FFX_MIN16_F4 fColor11;
+ FFX_MIN16_F4 fColor21;
+ FFX_MIN16_F4 fColor31;
+
+ FFX_MIN16_F4 fColor02;
+ FFX_MIN16_F4 fColor12;
+ FFX_MIN16_F4 fColor22;
+ FFX_MIN16_F4 fColor32;
+
+ FFX_MIN16_F4 fColor03;
+ FFX_MIN16_F4 fColor13;
+ FFX_MIN16_F4 fColor23;
+ FFX_MIN16_F4 fColor33;
+};
+#else //FFX_HALF
+#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
+#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
+#endif //FFX_HALF
+
+FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
+{
+ return A + (B - A) * t;
+}
+
+FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+ return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
+{
+ return A + (B - A) * t;
+}
+
+FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
+{
+ FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+ FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+ FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+ return fColorXY;
+}
+#endif
+
+FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
+{
+ const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
+ return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
+}
+
+FfxFloat32 Lanczos2(FfxFloat32 x)
+{
+ x = ffxMin(abs(x), 2.0f);
+ return Lanczos2NoClamp(x);
+}
+
+#if FFX_HALF
+
+#if 0
+FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
+{
+ const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
+ return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
+}
+#endif
+
+FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
+{
+ x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
+ return FFX_MIN16_F(Lanczos2NoClamp(x));
+}
+#endif //FFX_HALF
+
+// FSR1 lanczos approximation. Input is x*x and must be <= 4.
+FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
+{
+ FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
+ FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
+ return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
+{
+ FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
+ FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
+ return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
+{
+ x2 = ffxMin(x2, 4.0f);
+ return Lanczos2ApproxSqNoClamp(x2);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
+{
+ x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
+ return Lanczos2ApproxSqNoClamp(x2);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
+{
+ return Lanczos2ApproxSqNoClamp(x * x);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
+{
+ return Lanczos2ApproxSqNoClamp(x * x);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2Approx(FfxFloat32 x)
+{
+ return Lanczos2ApproxSq(x * x);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
+{
+ return Lanczos2ApproxSq(x * x);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
+{
+ return SampleLanczos2Weight(abs(x));
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
+{
+ return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
+}
+#endif //FFX_HALF
+
+FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+ FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
+ FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
+ FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
+ FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+ FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
+ FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
+ FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
+ FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif
+
+FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+ FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
+ FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
+ FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
+ FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FfxFloat32x4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+ FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+ FFX_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+ return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+ FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
+ FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
+ FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
+ FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+ FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FFX_MIN16_F4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFX_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+ return fColorXY;
+}
+#endif //FFX_HALF
+
+
+FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FfxFloat32x4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+ FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+ FFX_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+ return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+ FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FFX_MIN16_F4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFX_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+ return fColorXY;
+}
+#endif //FFX_HALF
+
+
+
+FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+ FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
+ FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
+ FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
+ FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+ FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
+ FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
+ FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
+ FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif //FFX_HALF
+
+FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FfxFloat32x4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+ FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+ FFX_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+ return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+ FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FFX_MIN16_F4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFX_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+ return fColorXY;
+}
+#endif
+
+// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
+FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
+{
+ FfxInt32x2 result = iPxSample + iPxOffset;
+ result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
+ result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
+ result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
+ result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
+ return result;
+}
+#if FFX_HALF
+FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
+{
+ FFX_MIN16_I2 result = iPxSample + iPxOffset;
+ result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
+ result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
+ result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
+ result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
+ return result;
+}
+#endif //FFX_HALF
+
+
+#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
+ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
+ { \
+ SampleType Samples; \
+ \
+ Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
+ Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
+ Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
+ Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \
+ \
+ Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
+ Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
+ Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
+ Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \
+ \
+ Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
+ Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
+ Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
+ Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \
+ \
+ Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \
+ Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \
+ Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \
+ Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \
+ \
+ return Samples; \
+ }
+
+#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
+ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
+ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \
+ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
+ { \
+ SampleType Samples; \
+ Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
+ Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
+ Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
+ Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
+ return Samples; \
+ }
+
+#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
+ DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \
+ DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
+// is common, so iPxSample can "jitter"
+#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
+ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
+ { \
+ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
+ /* Clamp base coords */ \
+ fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
+ fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
+ /* */ \
+ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
+ FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
+ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
+ return fColorXY; \
+ }
+
+#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
+ FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
+ { \
+ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
+ /* Clamp base coords */ \
+ fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
+ fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
+ /* */ \
+ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
+ FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
+ FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
+ return fColorXY; \
+ }
+
+#define FFX_FSR2_CONCAT_ID(x, y) x ## y
+#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
+#define FFX_FSR2_SAMPLER_1D_0 Lanczos2
+#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
+#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
+
+#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
+
+#endif //!defined( FFX_FSR2_SAMPLE_H )