summaryrefslogtreecommitdiffstats
path: root/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
blob: fa4c975a23fddcabba65eb93820a05e48882ea31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#ifndef FFX_FSR2_DEPTH_CLIP_H
#define FFX_FSR2_DEPTH_CLIP_H

FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;

FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
{
    FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());

    FfxFloat32 fDilatedSum = 0.0f;
    FfxFloat32 fDepth = 0.0f;
    FfxFloat32 fWeightSum = 0.0f;
    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {

        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
        const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;

        if (IsOnScreen(iSamplePos, RenderSize())) {
            const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
            if (fWeight > fReconstructedDepthBilinearWeightThreshold) {

                const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
                const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);

                const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;

                if (fDepthDiff > 0.0f) {

#if FFX_FSR2_OPTION_INVERTED_DEPTH
                    const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
#else
                    const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
#endif
                    
                    const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
                    const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);

                    const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
                    const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);

                    const FfxFloat32 Ksep = 1.37e-05f;
                    const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
                    const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;

                    const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
                    const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
                    fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
                    fWeightSum += fWeight;
                }
            }
        }
    }

    return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
}

FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
{
    FfxFloat32 minconvergence = 1.0f;

    FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
    FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
    FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);

    const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;

    if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
        for (FfxInt32 y = -1; y <= 1; ++y) {
            for (FfxInt32 x = -1; x <= 1; ++x) {

                FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);

                FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
                FfxFloat32 fVelocityUv = length(fMotionVector);

                fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
                fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
                minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
            }
        }
    }

    return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
}

FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
{
    const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
    FfxFloat32 fDepthMax = 0.0f;
    FfxFloat32 fDepthMin = fMaxDistInMeters;

    FfxInt32 iMaxDistFound = 0;

    for (FfxInt32 y = -1; y < 2; y++) {
        for (FfxInt32 x = -1; x < 2; x++) {

            const FfxInt32x2 iOffset = FfxInt32x2(x, y);
            const FfxInt32x2 iSamplePos = iPxPos + iOffset;

            const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
            FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;

            iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);

            fDepthMin = ffxMin(fDepthMin, fDepth);
            fDepthMax = ffxMax(fDepthMax, fDepth);
        }
    }

    return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
}

FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
{
    const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();

    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
    fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
    FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);

    float fPxDistance = length(fMotionVector * DisplaySize());
    return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
}

void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
{
    // Compensate for bilinear sampling in accumulation pass

    FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
    FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);

    float fMasksSum = 0.0f;

    FfxFloat32x3 fColorSamples[9];
    FfxFloat32 fReactiveSamples[9];
    FfxFloat32 fTransparencyAndCompositionSamples[9];

    FFX_UNROLL
    for (FfxInt32 y = -1; y < 2; y++) {
        FFX_UNROLL
        for (FfxInt32 x = -1; x < 2; x++) {

            const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));

            FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;

            FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
            FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
            FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);

            fColorSamples[sampleIdx] = fColorSample;
            fReactiveSamples[sampleIdx] = fReactiveSample;
            fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;

            fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
        }
    }

    if (fMasksSum > 0)
    {
        for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
        {
            FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
            FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
            FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];

            const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
            const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;

            // Increase power for non-similar samples
            const FfxFloat32 fPowerBiasMax = 6.0f;
            const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
            const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
            const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);

            fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
        }
    }

    StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
}

FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
{
    //We assume linear data. if non-linear input (sRGB, ...),
    //then we should convert to linear first and back to sRGB on output.
    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));

    fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());

    const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);

    return fPreparedYCoCg;
}

FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
{
    FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
    FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
    FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));

    return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
}

void DepthClip(FfxInt32x2 iPxPos)
{
    FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);

    // Discard tiny mvs
    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);

    const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
    const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
    const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));

    // Compute prepared input color and depth clip
    FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
    FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
    StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));

    // Compute dilated reactive mask
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
    FfxInt32x2 iSamplePos = iPxPos;
#else
    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
#endif

    FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
    FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));

    PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
}

#endif //!defined( FFX_FSR2_DEPTH_CLIPH )