diff options
Diffstat (limited to 'thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h')
-rw-r--r-- | thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h new file mode 100644 index 0000000000..c63f1820e0 --- /dev/null +++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h @@ -0,0 +1,189 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) +{ + FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize(); + fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions()); + FfxFloat32x3 fRgb = SampleInputColor(fUv); + + fRgb /= PreExposure(); + + //compute log luma + const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb))); + + // Make sure out of screen pixels contribute no value to the end result + const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; + + return FfxFloat32x4(result, 0, 0, 0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return SPD_LoadMipmap5(tex); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index == LumaMipLevelToUse() || index == 5) + { + SPD_SetMipmap(pix, index, outValue.r); + } + + if (index == MipCount() - 1) { //accumulate on 1x1 level + + if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) + { + FfxFloat32 prev = SPD_LoadExposureBuffer().y; + FfxFloat32 result = outValue.r; + + if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + { + FfxFloat32 rate = 1.0f; + result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); + } + FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); + SPD_SetExposureBuffer(spdOutput); + } + } +} + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} +#endif + +// define fetch and store functions Packed +#if FFX_HALF +#error Callback must be implemented + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]); +} +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]); +} +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ + if (index == LumaMipLevelToUse() || index == 5) + { + imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value); + return; + } + imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value); +} +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter); +} +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0; +} +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "ffx_spd.h" + +void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +}
\ No newline at end of file |