diff options
Diffstat (limited to 'servers/rendering/renderer_rd/effects')
-rw-r--r-- | servers/rendering/renderer_rd/effects/SCsub | 31 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/effects/copy_effects.cpp | 4 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/effects/debug_effects.cpp | 23 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/effects/debug_effects.h | 8 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/effects/fsr2.cpp | 889 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/effects/fsr2.h | 199 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/effects/taa.cpp | 14 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/effects/taa.h | 1 |
8 files changed, 1143 insertions, 26 deletions
diff --git a/servers/rendering/renderer_rd/effects/SCsub b/servers/rendering/renderer_rd/effects/SCsub index 86681f9c74..8e13715447 100644 --- a/servers/rendering/renderer_rd/effects/SCsub +++ b/servers/rendering/renderer_rd/effects/SCsub @@ -2,4 +2,33 @@ Import("env") -env.add_source_files(env.servers_sources, "*.cpp") +env_effects = env.Clone() + +# Thirdparty source files + +thirdparty_obj = [] + +thirdparty_dir = "#thirdparty/amd-fsr2/" +thirdparty_sources = ["ffx_assert.cpp", "ffx_fsr2.cpp"] +thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + +env_effects.Prepend(CPPPATH=[thirdparty_dir]) + +# This flag doesn't actually control anything GCC specific in FSR2. It determines +# if symbols should be exported, which is not required for Godot. +env_effects.Append(CPPDEFINES=["FFX_GCC"]) + +env_thirdparty = env_effects.Clone() +env_thirdparty.disable_warnings() +env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) +env.servers_sources += thirdparty_obj + +# Godot source files + +module_obj = [] + +env_effects.add_source_files(module_obj, "*.cpp") +env.servers_sources += module_obj + +# Needed to force rebuilding the module files when the thirdparty library is updated. +env.Depends(module_obj, thirdparty_obj) diff --git a/servers/rendering/renderer_rd/effects/copy_effects.cpp b/servers/rendering/renderer_rd/effects/copy_effects.cpp index 834653f5c2..aa1a87cdd7 100644 --- a/servers/rendering/renderer_rd/effects/copy_effects.cpp +++ b/servers/rendering/renderer_rd/effects/copy_effects.cpp @@ -281,8 +281,8 @@ CopyEffects::CopyEffects(bool p_prefer_raster_effects) { ba.enable_blend = true; ba.src_color_blend_factor = RD::BLEND_FACTOR_ONE; ba.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - ba.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - ba.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + ba.src_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; + ba.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; ba.color_blend_op = RD::BLEND_OP_ADD; ba.alpha_blend_op = RD::BLEND_OP_ADD; diff --git a/servers/rendering/renderer_rd/effects/debug_effects.cpp b/servers/rendering/renderer_rd/effects/debug_effects.cpp index 357d035ae9..abcd9bbfae 100644 --- a/servers/rendering/renderer_rd/effects/debug_effects.cpp +++ b/servers/rendering/renderer_rd/effects/debug_effects.cpp @@ -340,25 +340,38 @@ void DebugEffects::draw_shadow_frustum(RID p_light, const Projection &p_cam_proj } } -void DebugEffects::draw_motion_vectors(RID p_velocity, RID p_dest_fb, Size2i p_velocity_size) { +void DebugEffects::draw_motion_vectors(RID p_velocity, RID p_depth, RID p_dest_fb, const Projection &p_current_projection, const Transform3D &p_current_transform, const Projection &p_previous_projection, const Transform3D &p_previous_transform, Size2i p_resolution) { MaterialStorage *material_storage = MaterialStorage::get_singleton(); ERR_FAIL_NULL(material_storage); UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); ERR_FAIL_NULL(uniform_set_cache); - RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); RD::Uniform u_source_velocity(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector<RID>({ default_sampler, p_velocity })); + RD::Uniform u_source_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, Vector<RID>({ default_sampler, p_depth })); RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, motion_vectors.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_fb), false, RD::get_singleton()->draw_list_get_current_pass())); - motion_vectors.push_constant.velocity_resolution[0] = p_velocity_size.width; - motion_vectors.push_constant.velocity_resolution[1] = p_velocity_size.height; + Projection reprojection = p_previous_projection.flipped_y() * p_previous_transform.affine_inverse() * p_current_transform * p_current_projection.flipped_y().inverse(); + RendererRD::MaterialStorage::store_camera(reprojection, motion_vectors.push_constant.reprojection_matrix); + + motion_vectors.push_constant.resolution[0] = p_resolution.width; + motion_vectors.push_constant.resolution[1] = p_resolution.height; + motion_vectors.push_constant.force_derive_from_depth = false; RID shader = motion_vectors.shader.version_get_shader(motion_vectors.shader_version, 0); - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_velocity), 0); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_velocity, u_source_depth), 0); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &motion_vectors.push_constant, sizeof(MotionVectorsPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, false, 1u, 3u); + +#ifdef DRAW_DERIVATION_FROM_DEPTH_ON_TOP + motion_vectors.push_constant.force_derive_from_depth = true; + RD::get_singleton()->draw_list_set_push_constant(draw_list, &motion_vectors.push_constant, sizeof(MotionVectorsPushConstant)); RD::get_singleton()->draw_list_draw(draw_list, false, 1u, 3u); +#endif + RD::get_singleton()->draw_list_end(); } diff --git a/servers/rendering/renderer_rd/effects/debug_effects.h b/servers/rendering/renderer_rd/effects/debug_effects.h index ae32d94912..b813d577e4 100644 --- a/servers/rendering/renderer_rd/effects/debug_effects.h +++ b/servers/rendering/renderer_rd/effects/debug_effects.h @@ -72,8 +72,10 @@ private: } shadow_frustum; struct MotionVectorsPushConstant { - float velocity_resolution[2]; - float pad[2]; + float reprojection_matrix[16]; + float resolution[2]; + uint32_t force_derive_from_depth; + float pad; }; struct { @@ -91,7 +93,7 @@ public: ~DebugEffects(); void draw_shadow_frustum(RID p_light, const Projection &p_cam_projection, const Transform3D &p_cam_transform, RID p_dest_fb, const Rect2 p_rect); - void draw_motion_vectors(RID p_velocity, RID p_dest_fb, Size2i p_velocity_size); + void draw_motion_vectors(RID p_velocity, RID p_depth, RID p_dest_fb, const Projection &p_current_projection, const Transform3D &p_current_transform, const Projection &p_previous_projection, const Transform3D &p_previous_transform, Size2i p_resolution); }; } // namespace RendererRD diff --git a/servers/rendering/renderer_rd/effects/fsr2.cpp b/servers/rendering/renderer_rd/effects/fsr2.cpp new file mode 100644 index 0000000000..1fea1f9716 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/fsr2.cpp @@ -0,0 +1,889 @@ +/**************************************************************************/ +/* fsr2.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "fsr2.h" + +#include "../storage_rd/material_storage.h" +#include "../uniform_set_cache_rd.h" + +using namespace RendererRD; + +#ifndef _MSC_VER +#include <wchar.h> +#define wcscpy_s wcscpy +#endif + +static RD::TextureType ffx_resource_type_to_rd_texture_type(FfxResourceType p_type) { + switch (p_type) { + case FFX_RESOURCE_TYPE_TEXTURE1D: + return RD::TEXTURE_TYPE_1D; + case FFX_RESOURCE_TYPE_TEXTURE2D: + return RD::TEXTURE_TYPE_2D; + case FFX_RESOURCE_TYPE_TEXTURE3D: + return RD::TEXTURE_TYPE_3D; + default: + return RD::TEXTURE_TYPE_MAX; + } +} + +static FfxResourceType rd_texture_type_to_ffx_resource_type(RD::TextureType p_type) { + switch (p_type) { + case RD::TEXTURE_TYPE_1D: + return FFX_RESOURCE_TYPE_TEXTURE1D; + case RD::TEXTURE_TYPE_2D: + return FFX_RESOURCE_TYPE_TEXTURE2D; + case RD::TEXTURE_TYPE_3D: + return FFX_RESOURCE_TYPE_TEXTURE3D; + default: + return FFX_RESOURCE_TYPE_BUFFER; + } +} + +static RD::DataFormat ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format) { + switch (p_format) { + case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS: + return RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT: + return RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT: + return RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + case FFX_SURFACE_FORMAT_R16G16B16A16_UNORM: + return RD::DATA_FORMAT_R16G16B16A16_UNORM; + case FFX_SURFACE_FORMAT_R32G32_FLOAT: + return RD::DATA_FORMAT_R32G32_SFLOAT; + case FFX_SURFACE_FORMAT_R32_UINT: + return RD::DATA_FORMAT_R32_UINT; + case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS: + return RD::DATA_FORMAT_R8G8B8A8_UNORM; + case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM: + return RD::DATA_FORMAT_R8G8B8A8_UNORM; + case FFX_SURFACE_FORMAT_R11G11B10_FLOAT: + return RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32; + case FFX_SURFACE_FORMAT_R16G16_FLOAT: + return RD::DATA_FORMAT_R16G16_SFLOAT; + case FFX_SURFACE_FORMAT_R16G16_UINT: + return RD::DATA_FORMAT_R16G16_UINT; + case FFX_SURFACE_FORMAT_R16_FLOAT: + return RD::DATA_FORMAT_R16_SFLOAT; + case FFX_SURFACE_FORMAT_R16_UINT: + return RD::DATA_FORMAT_R16_UINT; + case FFX_SURFACE_FORMAT_R16_UNORM: + return RD::DATA_FORMAT_R16_UNORM; + case FFX_SURFACE_FORMAT_R16_SNORM: + return RD::DATA_FORMAT_R16_SNORM; + case FFX_SURFACE_FORMAT_R8_UNORM: + return RD::DATA_FORMAT_R8_UNORM; + case FFX_SURFACE_FORMAT_R8_UINT: + return RD::DATA_FORMAT_R8_UINT; + case FFX_SURFACE_FORMAT_R8G8_UNORM: + return RD::DATA_FORMAT_R8G8_UNORM; + case FFX_SURFACE_FORMAT_R32_FLOAT: + return RD::DATA_FORMAT_R32_SFLOAT; + default: + return RD::DATA_FORMAT_MAX; + } +} + +static FfxSurfaceFormat rd_format_to_ffx_surface_format(RD::DataFormat p_format) { + switch (p_format) { + case RD::DATA_FORMAT_R32G32B32A32_SFLOAT: + return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT; + case RD::DATA_FORMAT_R16G16B16A16_SFLOAT: + return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT; + case RD::DATA_FORMAT_R16G16B16A16_UNORM: + return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM; + case RD::DATA_FORMAT_R32G32_SFLOAT: + return FFX_SURFACE_FORMAT_R32G32_FLOAT; + case RD::DATA_FORMAT_R32_UINT: + return FFX_SURFACE_FORMAT_R32_UINT; + case RD::DATA_FORMAT_R8G8B8A8_UNORM: + return FFX_SURFACE_FORMAT_R8G8B8A8_UNORM; + case RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32: + return FFX_SURFACE_FORMAT_R11G11B10_FLOAT; + case RD::DATA_FORMAT_R16G16_SFLOAT: + return FFX_SURFACE_FORMAT_R16G16_FLOAT; + case RD::DATA_FORMAT_R16G16_UINT: + return FFX_SURFACE_FORMAT_R16G16_UINT; + case RD::DATA_FORMAT_R16_SFLOAT: + return FFX_SURFACE_FORMAT_R16_FLOAT; + case RD::DATA_FORMAT_R16_UINT: + return FFX_SURFACE_FORMAT_R16_UINT; + case RD::DATA_FORMAT_R16_UNORM: + return FFX_SURFACE_FORMAT_R16_UNORM; + case RD::DATA_FORMAT_R16_SNORM: + return FFX_SURFACE_FORMAT_R16_SNORM; + case RD::DATA_FORMAT_R8_UNORM: + return FFX_SURFACE_FORMAT_R8_UNORM; + case RD::DATA_FORMAT_R8_UINT: + return FFX_SURFACE_FORMAT_R8_UINT; + case RD::DATA_FORMAT_R8G8_UNORM: + return FFX_SURFACE_FORMAT_R8G8_UNORM; + case RD::DATA_FORMAT_R32_SFLOAT: + return FFX_SURFACE_FORMAT_R32_FLOAT; + default: + return FFX_SURFACE_FORMAT_UNKNOWN; + } +} + +static uint32_t ffx_usage_to_rd_usage_flags(uint32_t p_flags) { + uint32_t ret = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + + if (p_flags & FFX_RESOURCE_USAGE_RENDERTARGET) { + ret |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + + if (p_flags & FFX_RESOURCE_USAGE_UAV) { + ret |= RD::TEXTURE_USAGE_STORAGE_BIT; + ret |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + ret |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + } + + return ret; +} + +static FfxErrorCode create_backend_context_rd(FfxFsr2Interface *p_backend_interface, FfxDevice p_device) { + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + + // Store pointer to the device common to all contexts. + scratch.device = p_device; + + // Create a ring buffer of uniform buffers. + // FIXME: This could be optimized to be a single memory block if it was possible for RD to create views into a particular memory range of a UBO. + for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) { + scratch.ubo_ring_buffer[i] = RD::get_singleton()->uniform_buffer_create(FFX_MAX_CONST_SIZE * sizeof(uint32_t)); + ERR_FAIL_COND_V(scratch.ubo_ring_buffer[i].is_null(), FFX_ERROR_BACKEND_API_ERROR); + } + + return FFX_OK; +} + +static FfxErrorCode get_device_capabilities_rd(FfxFsr2Interface *p_backend_interface, FfxDeviceCapabilities *p_out_device_capabilities, FfxDevice p_device) { + FSR2Effect::Device &effect_device = *reinterpret_cast<FSR2Effect::Device *>(p_device); + + *p_out_device_capabilities = effect_device.capabilities; + + return FFX_OK; +} + +static FfxErrorCode destroy_backend_context_rd(FfxFsr2Interface *p_backend_interface) { + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + + for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) { + RD::get_singleton()->free(scratch.ubo_ring_buffer[i]); + } + + return FFX_OK; +} + +static FfxErrorCode create_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxCreateResourceDescription *p_create_resource_description, FfxResourceInternal *p_out_resource) { + // FSR2's base implementation won't issue a call to create a heap type that isn't just default on its own, + // so we can safely ignore it as RD does not expose this concept. + ERR_FAIL_COND_V(p_create_resource_description->heapType != FFX_HEAP_TYPE_DEFAULT, FFX_ERROR_INVALID_ARGUMENT); + + RenderingDevice *rd = RD::get_singleton(); + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + FfxResourceDescription res_desc = p_create_resource_description->resourceDescription; + + // FSR2's base implementation never requests buffer creation. + ERR_FAIL_COND_V(res_desc.type != FFX_RESOURCE_TYPE_TEXTURE1D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE2D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE3D, FFX_ERROR_INVALID_ARGUMENT); + + if (res_desc.mipCount == 0) { + // Mipmap count must be derived from the resource's dimensions. + res_desc.mipCount = uint32_t(1 + floor(log2(MAX(MAX(res_desc.width, res_desc.height), res_desc.depth)))); + } + + Vector<PackedByteArray> initial_data; + if (p_create_resource_description->initDataSize) { + PackedByteArray byte_array; + byte_array.resize(p_create_resource_description->initDataSize); + memcpy(byte_array.ptrw(), p_create_resource_description->initData, p_create_resource_description->initDataSize); + initial_data.push_back(byte_array); + } + + RD::TextureFormat texture_format; + texture_format.texture_type = ffx_resource_type_to_rd_texture_type(res_desc.type); + texture_format.format = ffx_surface_format_to_rd_format(res_desc.format); + texture_format.usage_bits = ffx_usage_to_rd_usage_flags(p_create_resource_description->usage); + texture_format.width = res_desc.width; + texture_format.height = res_desc.height; + texture_format.depth = res_desc.depth; + texture_format.mipmaps = res_desc.mipCount; + + RID texture = rd->texture_create(texture_format, RD::TextureView(), initial_data); + ERR_FAIL_COND_V(texture.is_null(), FFX_ERROR_BACKEND_API_ERROR); + + rd->set_resource_name(texture, String(p_create_resource_description->name)); + + // Add the resource to the storage and use the internal index to reference it. + p_out_resource->internalIndex = scratch.resources.add(texture, false, p_create_resource_description->id, res_desc); + + return FFX_OK; +} + +static FfxErrorCode register_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxResource *p_in_resource, FfxResourceInternal *p_out_resource) { + if (p_in_resource->resource == nullptr) { + // Null resource case. + p_out_resource->internalIndex = -1; + return FFX_OK; + } + + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + const RID &rid = *reinterpret_cast<const RID *>(p_in_resource->resource); + ERR_FAIL_COND_V(rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); + + // Add the resource to the storage and use the internal index to reference it. + p_out_resource->internalIndex = scratch.resources.add(rid, true, FSR2Context::RESOURCE_ID_DYNAMIC, p_in_resource->description); + + return FFX_OK; +} + +static FfxErrorCode unregister_resources_rd(FfxFsr2Interface *p_backend_interface) { + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + LocalVector<uint32_t> dynamic_list_copy = scratch.resources.dynamic_list; + for (uint32_t i : dynamic_list_copy) { + scratch.resources.remove(i); + } + + return FFX_OK; +} + +static FfxResourceDescription get_resource_description_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) { + if (p_resource.internalIndex != -1) { + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + return scratch.resources.descriptions[p_resource.internalIndex]; + } else { + return {}; + } +} + +static FfxErrorCode destroy_resource_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) { + if (p_resource.internalIndex != -1) { + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + if (scratch.resources.rids[p_resource.internalIndex].is_valid()) { + RD::get_singleton()->free(scratch.resources.rids[p_resource.internalIndex]); + scratch.resources.remove(p_resource.internalIndex); + } + } + + return FFX_OK; +} + +static FfxErrorCode create_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxFsr2Pass p_pass, const FfxPipelineDescription *p_pipeline_description, FfxPipelineState *p_out_pipeline) { + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + FSR2Effect::Device &device = *reinterpret_cast<FSR2Effect::Device *>(scratch.device); + FSR2Effect::Pass &effect_pass = device.passes[p_pass]; + + if (effect_pass.pipeline.pipeline_rid.is_null()) { + // Create pipeline for the device if it hasn't been created yet. + effect_pass.root_signature.shader_rid = effect_pass.shader->version_get_shader(effect_pass.shader_version, effect_pass.shader_variant); + ERR_FAIL_COND_V(effect_pass.root_signature.shader_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR); + + effect_pass.pipeline.pipeline_rid = RD::get_singleton()->compute_pipeline_create(effect_pass.root_signature.shader_rid); + ERR_FAIL_COND_V(effect_pass.pipeline.pipeline_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR); + } + + // While this is not their intended use, we use the pipeline and root signature pointers to store the + // RIDs to the pipeline and shader that RD needs for the compute pipeline. + p_out_pipeline->pipeline = reinterpret_cast<FfxPipeline>(&effect_pass.pipeline); + p_out_pipeline->rootSignature = reinterpret_cast<FfxRootSignature>(&effect_pass.root_signature); + + p_out_pipeline->srvCount = effect_pass.sampled_bindings.size(); + ERR_FAIL_COND_V(p_out_pipeline->srvCount > FFX_MAX_NUM_SRVS, FFX_ERROR_OUT_OF_RANGE); + memcpy(p_out_pipeline->srvResourceBindings, effect_pass.sampled_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->srvCount); + + p_out_pipeline->uavCount = effect_pass.storage_bindings.size(); + ERR_FAIL_COND_V(p_out_pipeline->uavCount > FFX_MAX_NUM_UAVS, FFX_ERROR_OUT_OF_RANGE); + memcpy(p_out_pipeline->uavResourceBindings, effect_pass.storage_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->uavCount); + + p_out_pipeline->constCount = effect_pass.uniform_bindings.size(); + ERR_FAIL_COND_V(p_out_pipeline->constCount > FFX_MAX_NUM_CONST_BUFFERS, FFX_ERROR_OUT_OF_RANGE); + memcpy(p_out_pipeline->cbResourceBindings, effect_pass.uniform_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->constCount); + + bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0; + + if (p_pass == FFX_FSR2_PASS_ACCUMULATE || p_pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) { + // Change the binding for motion vectors in this particular pass if low resolution MVs are used. + if (low_resolution_mvs) { + FfxResourceBinding &binding = p_out_pipeline->srvResourceBindings[2]; + wcscpy_s(binding.name, L"r_dilated_motion_vectors"); + } + } + + return FFX_OK; +} + +static FfxErrorCode destroy_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxPipelineState *p_pipeline) { + // We don't want to destroy pipelines when the FSR2 API deems it necessary as it'll do so whenever the context is destroyed. + + return FFX_OK; +} + +static FfxErrorCode schedule_gpu_job_rd(FfxFsr2Interface *p_backend_interface, const FfxGpuJobDescription *p_job) { + ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT); + ERR_FAIL_NULL_V(p_job, FFX_ERROR_INVALID_ARGUMENT); + + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + scratch.gpu_jobs.push_back(*p_job); + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_job_clear_float_rd(FSR2Context::Scratch &p_scratch, const FfxClearFloatJobDescription &p_job) { + RID resource = p_scratch.resources.rids[p_job.target.internalIndex]; + FfxResourceDescription &desc = p_scratch.resources.descriptions[p_job.target.internalIndex]; + + ERR_FAIL_COND_V(desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); + + Color color(p_job.color[0], p_job.color[1], p_job.color[2], p_job.color[3]); + RD::get_singleton()->texture_clear(resource, color, 0, desc.mipCount, 0, 1); + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_job_copy_rd(FSR2Context::Scratch &p_scratch, const FfxCopyJobDescription &p_job) { + RID src = p_scratch.resources.rids[p_job.src.internalIndex]; + RID dst = p_scratch.resources.rids[p_job.dst.internalIndex]; + FfxResourceDescription &src_desc = p_scratch.resources.descriptions[p_job.src.internalIndex]; + FfxResourceDescription &dst_desc = p_scratch.resources.descriptions[p_job.dst.internalIndex]; + + ERR_FAIL_COND_V(src_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); + ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); + + for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) { + // Only push the barriers on the last copy. + // FIXME: This could be optimized if RenderingDevice was able to copy multiple mip levels in a single command. + BitField<RD::BarrierMask> post_barrier = (mip_level == (src_desc.mipCount - 1)) ? RD::BARRIER_MASK_ALL_BARRIERS : RD::BARRIER_MASK_NO_BARRIER; + RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0, post_barrier); + } + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_job_compute_rd(FSR2Context::Scratch &p_scratch, const FfxComputeJobDescription &p_job) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL_V(uniform_set_cache, FFX_ERROR_BACKEND_API_ERROR); + + FSR2Effect::RootSignature &root_signature = *reinterpret_cast<FSR2Effect::RootSignature *>(p_job.pipeline.rootSignature); + ERR_FAIL_COND_V(root_signature.shader_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); + + FSR2Effect::Pipeline &backend_pipeline = *reinterpret_cast<FSR2Effect::Pipeline *>(p_job.pipeline.pipeline); + ERR_FAIL_COND_V(backend_pipeline.pipeline_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); + + Vector<RD::Uniform> compute_uniforms; + for (uint32_t i = 0; i < p_job.pipeline.srvCount; i++) { + RID texture_rid = p_scratch.resources.rids[p_job.srvs[i].internalIndex]; + RD::Uniform texture_uniform(RD::UNIFORM_TYPE_TEXTURE, p_job.pipeline.srvResourceBindings[i].slotIndex, texture_rid); + compute_uniforms.push_back(texture_uniform); + } + + for (uint32_t i = 0; i < p_job.pipeline.uavCount; i++) { + RID image_rid = p_scratch.resources.rids[p_job.uavs[i].internalIndex]; + RD::Uniform storage_uniform; + storage_uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE; + storage_uniform.binding = p_job.pipeline.uavResourceBindings[i].slotIndex; + + if (p_job.uavMip[i] > 0) { + LocalVector<RID> &mip_slice_rids = p_scratch.resources.mip_slice_rids[p_job.uavs[i].internalIndex]; + if (mip_slice_rids.is_empty()) { + mip_slice_rids.resize(p_scratch.resources.descriptions[p_job.uavs[i].internalIndex].mipCount); + } + + ERR_FAIL_COND_V(p_job.uavMip[i] >= mip_slice_rids.size(), FFX_ERROR_INVALID_ARGUMENT); + + if (mip_slice_rids[p_job.uavMip[i]].is_null()) { + mip_slice_rids[p_job.uavMip[i]] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), image_rid, 0, p_job.uavMip[i]); + } + + ERR_FAIL_COND_V(mip_slice_rids[p_job.uavMip[i]].is_null(), FFX_ERROR_BACKEND_API_ERROR); + + storage_uniform.append_id(mip_slice_rids[p_job.uavMip[i]]); + } else { + storage_uniform.append_id(image_rid); + } + + compute_uniforms.push_back(storage_uniform); + } + + for (uint32_t i = 0; i < p_job.pipeline.constCount; i++) { + RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index]; + p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FSR2_UBO_RING_BUFFER_SIZE; + + BitField<RD::BarrierMask> post_barrier = (i == (p_job.pipeline.constCount - 1)) ? RD::BARRIER_MASK_ALL_BARRIERS : RD::BARRIER_MASK_NO_BARRIER; + RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].uint32Size * sizeof(uint32_t), p_job.cbs[i].data, post_barrier); + + RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.cbResourceBindings[i].slotIndex, buffer_rid); + compute_uniforms.push_back(buffer_uniform); + } + + FSR2Effect::Device &device = *reinterpret_cast<FSR2Effect::Device *>(p_scratch.device); + RD::Uniform u_point_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 0, device.point_clamp_sampler); + RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 1, device.linear_clamp_sampler); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, backend_pipeline.pipeline_rid); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(root_signature.shader_rid, 0, u_point_clamp_sampler, u_linear_clamp_sampler), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache_vec(root_signature.shader_rid, 1, compute_uniforms), 1); + RD::get_singleton()->compute_list_dispatch(compute_list, p_job.dimensions[0], p_job.dimensions[1], p_job.dimensions[2]); + RD::get_singleton()->compute_list_end(); + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_jobs_rd(FfxFsr2Interface *p_backend_interface, FfxCommandList p_command_list) { + ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT); + + FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer); + FfxErrorCode error_code = FFX_OK; + for (const FfxGpuJobDescription &job : scratch.gpu_jobs) { + switch (job.jobType) { + case FFX_GPU_JOB_CLEAR_FLOAT: { + error_code = execute_gpu_job_clear_float_rd(scratch, job.clearJobDescriptor); + } break; + case FFX_GPU_JOB_COPY: { + error_code = execute_gpu_job_copy_rd(scratch, job.copyJobDescriptor); + } break; + case FFX_GPU_JOB_COMPUTE: { + error_code = execute_gpu_job_compute_rd(scratch, job.computeJobDescriptor); + } break; + default: { + error_code = FFX_ERROR_INVALID_ARGUMENT; + } break; + } + + if (error_code != FFX_OK) { + scratch.gpu_jobs.clear(); + return error_code; + } + } + + scratch.gpu_jobs.clear(); + + return FFX_OK; +} + +static FfxResource get_resource_rd(RID *p_rid, const wchar_t *p_name) { + FfxResource res = {}; + if (p_rid->is_null()) { + return res; + } + + wcscpy_s(res.name, p_name); + + RD::TextureFormat texture_format = RD::get_singleton()->texture_get_format(*p_rid); + res.description.type = rd_texture_type_to_ffx_resource_type(texture_format.texture_type); + res.description.format = rd_format_to_ffx_surface_format(texture_format.format); + res.description.width = texture_format.width; + res.description.height = texture_format.height; + res.description.depth = texture_format.depth; + res.description.mipCount = texture_format.mipmaps; + res.description.flags = FFX_RESOURCE_FLAGS_NONE; + res.resource = reinterpret_cast<void *>(p_rid); + res.isDepth = texture_format.usage_bits & RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + return res; +} + +FSR2Context::~FSR2Context() { + ffxFsr2ContextDestroy(&fsr_context); +} + +FSR2Effect::FSR2Effect() { + FfxDeviceCapabilities &capabilities = device.capabilities; + uint64_t default_subgroup_size = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_SIZE); + capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1; + capabilities.waveLaneCountMin = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MIN_SIZE); + capabilities.waveLaneCountMax = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MAX_SIZE); + capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_FSR_HALF_FLOAT); + capabilities.raytracingSupported = false; + + bool force_wave_64 = default_subgroup_size == 32 && capabilities.waveLaneCountMax == 64; + bool use_lut = force_wave_64 || default_subgroup_size == 64; + + String general_defines_base = + "\n#define FFX_GPU\n" + "\n#define FFX_GLSL 1\n" + "\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n" + "\n#define FFX_FSR2_OPTION_HDR_COLOR_INPUT 1\n" + "\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n" + "\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n"; + + if (use_lut) { + general_defines_base += "\n#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1\n"; + } + + String general_defines = general_defines_base; + if (capabilities.fp16Supported) { + general_defines += "\n#define FFX_HALF 1\n"; + } + + Vector<String> modes; + modes.push_back(""); + + // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and + // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL + // files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names. + // + // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these + // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an + // error if the bindings do not match. + + { + Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP]; + pass.shader = &shaders.depth_clip; + pass.shader->initialize(modes, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" }, + FfxResourceBinding{ 1, 0, L"r_dilated_motion_vectors" }, + FfxResourceBinding{ 2, 0, L"r_dilatedDepth" }, + FfxResourceBinding{ 3, 0, L"r_reactive_mask" }, + FfxResourceBinding{ 4, 0, L"r_transparency_and_composition_mask" }, + FfxResourceBinding{ 5, 0, L"r_prepared_input_color" }, + FfxResourceBinding{ 6, 0, L"r_previous_dilated_motion_vectors" }, + FfxResourceBinding{ 7, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 8, 0, L"r_input_color_jittered" }, + FfxResourceBinding{ 9, 0, L"r_input_depth" }, + FfxResourceBinding{ 10, 0, L"r_input_exposure" } + }; + + pass.storage_bindings = { + // FSR2_BIND_UAV_DEPTH_CLIP (11) does not point to anything. + FfxResourceBinding{ 12, 0, L"rw_dilated_reactive_masks" }, + FfxResourceBinding{ 13, 0, L"rw_prepared_input_color" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 14, 0, L"cbFSR2" } + }; + } + + { + Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH]; + pass.shader = &shaders.reconstruct_previous_depth; + pass.shader->initialize(modes, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 1, 0, L"r_input_depth" }, + FfxResourceBinding{ 2, 0, L"r_input_color_jittered" }, + FfxResourceBinding{ 3, 0, L"r_input_exposure" }, + FfxResourceBinding{ 4, 0, L"r_luma_history" } + }; + + pass.storage_bindings = { + FfxResourceBinding{ 5, 0, L"rw_reconstructed_previous_nearest_depth" }, + FfxResourceBinding{ 6, 0, L"rw_dilated_motion_vectors" }, + FfxResourceBinding{ 7, 0, L"rw_dilatedDepth" }, + FfxResourceBinding{ 8, 0, L"rw_prepared_input_color" }, + FfxResourceBinding{ 9, 0, L"rw_luma_history" }, + // FSR2_BIND_UAV_LUMA_INSTABILITY (10) does not point to anything. + FfxResourceBinding{ 11, 0, L"rw_lock_input_luma" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 12, 0, L"cbFSR2" } + }; + } + + { + Pass &pass = device.passes[FFX_FSR2_PASS_LOCK]; + pass.shader = &shaders.lock; + pass.shader->initialize(modes, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_lock_input_luma" } + }; + + pass.storage_bindings = { + FfxResourceBinding{ 1, 0, L"rw_new_locks" }, + FfxResourceBinding{ 2, 0, L"rw_reconstructed_previous_nearest_depth" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, L"cbFSR2" } + }; + } + + { + Vector<String> accumulate_modes; + accumulate_modes.push_back("\n"); + accumulate_modes.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); + + String general_defines_accumulate; + if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") { + // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. + general_defines_accumulate = general_defines_base; + } else { + general_defines_accumulate = general_defines; + } + + Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE]; + pass.shader = &shaders.accumulate; + pass.shader->initialize(accumulate_modes, general_defines_accumulate); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 1, 0, L"r_dilated_reactive_masks" }, + FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 3, 0, L"r_internal_upscaled_color" }, + FfxResourceBinding{ 4, 0, L"r_lock_status" }, + FfxResourceBinding{ 5, 0, L"r_input_depth" }, + FfxResourceBinding{ 6, 0, L"r_prepared_input_color" }, + // FSR2_BIND_SRV_LUMA_INSTABILITY(7) does not point to anything. + FfxResourceBinding{ 8, 0, L"r_lanczos_lut" }, + FfxResourceBinding{ 9, 0, L"r_upsample_maximum_bias_lut" }, + FfxResourceBinding{ 10, 0, L"r_imgMips" }, + FfxResourceBinding{ 11, 0, L"r_auto_exposure" }, + FfxResourceBinding{ 12, 0, L"r_luma_history" } + }; + + pass.storage_bindings = { + FfxResourceBinding{ 13, 0, L"rw_internal_upscaled_color" }, + FfxResourceBinding{ 14, 0, L"rw_lock_status" }, + FfxResourceBinding{ 15, 0, L"rw_upscaled_output" }, + FfxResourceBinding{ 16, 0, L"rw_new_locks" }, + FfxResourceBinding{ 17, 0, L"rw_luma_history" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 18, 0, L"cbFSR2" } + }; + + // Sharpen pass is a clone of the accumulate pass. + Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN]; + sharpen_pass = pass; + sharpen_pass.shader_variant = 1; + } + + { + Pass &pass = device.passes[FFX_FSR2_PASS_RCAS]; + pass.shader = &shaders.rcas; + pass.shader->initialize(modes, general_defines_base); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 1, 0, L"r_rcas_input" } + }; + + pass.storage_bindings = { + FfxResourceBinding{ 2, 0, L"rw_upscaled_output" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, L"cbFSR2" }, + FfxResourceBinding{ 4, 0, L"cbRCAS" } + }; + } + + { + Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID]; + pass.shader = &shaders.compute_luminance_pyramid; + pass.shader->initialize(modes, general_defines_base); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_input_color_jittered" } + }; + + pass.storage_bindings = { + FfxResourceBinding{ 1, 0, L"rw_spd_global_atomic" }, + FfxResourceBinding{ 2, 0, L"rw_img_mip_shading_change" }, + FfxResourceBinding{ 3, 0, L"rw_img_mip_5" }, + FfxResourceBinding{ 4, 0, L"rw_auto_exposure" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 5, 0, L"cbFSR2" }, + FfxResourceBinding{ 6, 0, L"cbSPD" } + }; + } + + { + Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE]; + pass.shader = &shaders.autogen_reactive; + pass.shader->initialize(modes, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_input_opaque_only" }, + FfxResourceBinding{ 1, 0, L"r_input_color_jittered" } + }; + + pass.storage_bindings = { + FfxResourceBinding{ 2, 0, L"rw_output_autoreactive" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, L"cbGenerateReactive" }, + FfxResourceBinding{ 4, 0, L"cbFSR2" } + }; + } + + { + Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE]; + pass.shader = &shaders.tcr_autogen; + pass.shader->initialize(modes, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_bindings = { + FfxResourceBinding{ 0, 0, L"r_input_opaque_only" }, + FfxResourceBinding{ 1, 0, L"r_input_color_jittered" }, + FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 3, 0, L"r_input_prev_color_pre_alpha" }, + FfxResourceBinding{ 4, 0, L"r_input_prev_color_post_alpha" }, + FfxResourceBinding{ 5, 0, L"r_reactive_mask" }, + FfxResourceBinding{ 6, 0, L"r_transparency_and_composition_mask" }, + FfxResourceBinding{ 13, 0, L"r_input_depth" } + }; + + pass.storage_bindings = { + FfxResourceBinding{ 7, 0, L"rw_output_autoreactive" }, + FfxResourceBinding{ 8, 0, L"rw_output_autocomposition" }, + FfxResourceBinding{ 9, 0, L"rw_output_prev_color_pre_alpha" }, + FfxResourceBinding{ 10, 0, L"rw_output_prev_color_post_alpha" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 11, 0, L"cbFSR2" }, + FfxResourceBinding{ 12, 0, L"cbGenerateReactive" } + }; + } + + RD::SamplerState state; + state.mag_filter = RD::SAMPLER_FILTER_NEAREST; + state.min_filter = RD::SAMPLER_FILTER_NEAREST; + state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + state.min_lod = -1000.0f; + state.max_lod = 1000.0f; + state.anisotropy_max = 1.0; + device.point_clamp_sampler = RD::get_singleton()->sampler_create(state); + ERR_FAIL_COND(device.point_clamp_sampler.is_null()); + + state.mag_filter = RD::SAMPLER_FILTER_LINEAR; + state.min_filter = RD::SAMPLER_FILTER_LINEAR; + device.linear_clamp_sampler = RD::get_singleton()->sampler_create(state); + ERR_FAIL_COND(device.linear_clamp_sampler.is_null()); +} + +FSR2Effect::~FSR2Effect() { + RD::get_singleton()->free(device.point_clamp_sampler); + RD::get_singleton()->free(device.linear_clamp_sampler); + + for (uint32_t i = 0; i < FFX_FSR2_PASS_COUNT; i++) { + RD::get_singleton()->free(device.passes[i].pipeline.pipeline_rid); + device.passes[i].shader->version_free(device.passes[i].shader_version); + } +} + +FSR2Context *FSR2Effect::create_context(Size2i p_internal_size, Size2i p_target_size) { + FSR2Context *context = memnew(RendererRD::FSR2Context); + context->fsr_desc.flags = FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE; + context->fsr_desc.maxRenderSize.width = p_internal_size.x; + context->fsr_desc.maxRenderSize.height = p_internal_size.y; + context->fsr_desc.displaySize.width = p_target_size.x; + context->fsr_desc.displaySize.height = p_target_size.y; + context->fsr_desc.device = &device; + + FfxFsr2Interface &functions = context->fsr_desc.callbacks; + functions.fpCreateBackendContext = create_backend_context_rd; + functions.fpGetDeviceCapabilities = get_device_capabilities_rd; + functions.fpDestroyBackendContext = destroy_backend_context_rd; + functions.fpCreateResource = create_resource_rd; + functions.fpRegisterResource = register_resource_rd; + functions.fpUnregisterResources = unregister_resources_rd; + functions.fpGetResourceDescription = get_resource_description_rd; + functions.fpDestroyResource = destroy_resource_rd; + functions.fpCreatePipeline = create_pipeline_rd; + functions.fpDestroyPipeline = destroy_pipeline_rd; + functions.fpScheduleGpuJob = schedule_gpu_job_rd; + functions.fpExecuteGpuJobs = execute_gpu_jobs_rd; + functions.scratchBuffer = &context->scratch; + functions.scratchBufferSize = sizeof(context->scratch); + + FfxErrorCode result = ffxFsr2ContextCreate(&context->fsr_context, &context->fsr_desc); + if (result == FFX_OK) { + return context; + } else { + memdelete(context); + return nullptr; + } +} + +void FSR2Effect::upscale(const Parameters &p_params) { + // TODO: Transparency & Composition mask is not implemented. + FfxFsr2DispatchDescription dispatch_desc = {}; + RID color = p_params.color; + RID depth = p_params.depth; + RID velocity = p_params.velocity; + RID reactive = p_params.reactive; + RID exposure = p_params.exposure; + RID output = p_params.output; + dispatch_desc.commandList = nullptr; + dispatch_desc.color = get_resource_rd(&color, L"color"); + dispatch_desc.depth = get_resource_rd(&depth, L"depth"); + dispatch_desc.motionVectors = get_resource_rd(&velocity, L"velocity"); + dispatch_desc.reactive = get_resource_rd(&reactive, L"reactive"); + dispatch_desc.exposure = get_resource_rd(&exposure, L"exposure"); + dispatch_desc.transparencyAndComposition = {}; + dispatch_desc.output = get_resource_rd(&output, L"output"); + dispatch_desc.colorOpaqueOnly = {}; + dispatch_desc.jitterOffset.x = p_params.jitter.x; + dispatch_desc.jitterOffset.y = p_params.jitter.y; + dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width); + dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height); + dispatch_desc.reset = p_params.reset_accumulation; + dispatch_desc.renderSize.width = p_params.internal_size.width; + dispatch_desc.renderSize.height = p_params.internal_size.height; + dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f); + dispatch_desc.sharpness = p_params.sharpness; + dispatch_desc.frameTimeDelta = p_params.delta_time; + dispatch_desc.preExposure = 1.0f; + dispatch_desc.cameraNear = p_params.z_near; + dispatch_desc.cameraFar = p_params.z_far; + dispatch_desc.cameraFovAngleVertical = p_params.fovy; + dispatch_desc.viewSpaceToMetersFactor = 1.0f; + dispatch_desc.enableAutoReactive = false; + dispatch_desc.autoTcThreshold = 1.0f; + dispatch_desc.autoTcScale = 1.0f; + dispatch_desc.autoReactiveScale = 1.0f; + dispatch_desc.autoReactiveMax = 1.0f; + + RendererRD::MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix); + + FfxErrorCode result = ffxFsr2ContextDispatch(&p_params.context->fsr_context, &dispatch_desc); + ERR_FAIL_COND(result != FFX_OK); +} diff --git a/servers/rendering/renderer_rd/effects/fsr2.h b/servers/rendering/renderer_rd/effects/fsr2.h new file mode 100644 index 0000000000..789714cc77 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/fsr2.h @@ -0,0 +1,199 @@ +/**************************************************************************/ +/* fsr2.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef FSR2_RD_H +#define FSR2_RD_H + +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl.gen.h" + +// This flag doesn't actually control anything GCC specific in FSR2. It determines +// if symbols should be exported, which is not required for Godot. +#ifndef FFX_GCC +#define FFX_GCC +#endif + +#include "thirdparty/amd-fsr2/ffx_fsr2.h" + +#define FSR2_MAX_QUEUED_FRAMES (4) +#define FSR2_MAX_UNIFORM_BUFFERS (4) +#define FSR2_MAX_BUFFERED_DESCRIPTORS (FFX_FSR2_PASS_COUNT * FSR2_MAX_QUEUED_FRAMES) +#define FSR2_UBO_RING_BUFFER_SIZE (FSR2_MAX_BUFFERED_DESCRIPTORS * FSR2_MAX_UNIFORM_BUFFERS) + +namespace RendererRD { +class FSR2Context { +public: + enum ResourceID : uint32_t { + RESOURCE_ID_DYNAMIC = 0xFFFFFFFF + }; + + struct Resources { + LocalVector<RID> rids; + LocalVector<LocalVector<RID>> mip_slice_rids; + LocalVector<uint32_t> ids; + LocalVector<FfxResourceDescription> descriptions; + LocalVector<uint32_t> dynamic_list; + LocalVector<uint32_t> free_list; + + uint32_t add(RID p_rid, bool p_dynamic, uint32_t p_id, FfxResourceDescription p_description) { + uint32_t ret_index; + if (free_list.is_empty()) { + ret_index = rids.size(); + uint32_t new_size = ret_index + 1; + rids.resize(new_size); + mip_slice_rids.resize(new_size); + ids.resize(new_size); + descriptions.resize(new_size); + } else { + uint32_t end_index = free_list.size() - 1; + ret_index = free_list[end_index]; + free_list.resize(end_index); + } + + rids[ret_index] = p_rid; + mip_slice_rids[ret_index].clear(); + ids[ret_index] = p_id; + descriptions[ret_index] = p_description; + + if (p_dynamic) { + dynamic_list.push_back(ret_index); + } + + return ret_index; + } + + void remove(uint32_t p_index) { + DEV_ASSERT(p_index < rids.size()); + free_list.push_back(p_index); + rids[p_index] = RID(); + mip_slice_rids[p_index].clear(); + ids[p_index] = 0; + descriptions[p_index] = {}; + dynamic_list.erase(p_index); + } + + uint32_t size() const { + return rids.size(); + } + }; + + struct Scratch { + Resources resources; + LocalVector<FfxGpuJobDescription> gpu_jobs; + RID ubo_ring_buffer[FSR2_UBO_RING_BUFFER_SIZE]; + uint32_t ubo_ring_buffer_index = 0; + FfxDevice device = nullptr; + }; + + Scratch scratch; + FfxFsr2Context fsr_context; + FfxFsr2ContextDescription fsr_desc; + + ~FSR2Context(); +}; + +class FSR2Effect { +public: + struct RootSignature { + // Proxy structure to store the shader required by RD that uses the terminology used by the FSR2 API. + RID shader_rid; + }; + + struct Pipeline { + RID pipeline_rid; + }; + + struct Pass { + ShaderRD *shader; + RID shader_version; + RootSignature root_signature; + uint32_t shader_variant = 0; + Pipeline pipeline; + Vector<FfxResourceBinding> sampled_bindings; + Vector<FfxResourceBinding> storage_bindings; + Vector<FfxResourceBinding> uniform_bindings; + }; + + struct Device { + Pass passes[FFX_FSR2_PASS_COUNT]; + FfxDeviceCapabilities capabilities; + RID point_clamp_sampler; + RID linear_clamp_sampler; + }; + + struct Parameters { + FSR2Context *context; + Size2i internal_size; + RID color; + RID depth; + RID velocity; + RID reactive; + RID exposure; + RID output; + float z_near = 0.0f; + float z_far = 0.0f; + float fovy = 0.0f; + Vector2 jitter; + float delta_time = 0.0f; + float sharpness = 0.0f; + bool reset_accumulation = false; + Projection reprojection; + }; + + FSR2Effect(); + ~FSR2Effect(); + FSR2Context *create_context(Size2i p_internal_size, Size2i p_target_size); + void upscale(const Parameters &p_params); + +private: + struct { + Fsr2DepthClipPassShaderRD depth_clip; + Fsr2ReconstructPreviousDepthPassShaderRD reconstruct_previous_depth; + Fsr2LockPassShaderRD lock; + Fsr2AccumulatePassShaderRD accumulate; + Fsr2AccumulatePassShaderRD accumulate_sharpen; + Fsr2RcasPassShaderRD rcas; + Fsr2ComputeLuminancePyramidPassShaderRD compute_luminance_pyramid; + Fsr2AutogenReactivePassShaderRD autogen_reactive; + Fsr2TcrAutogenPassShaderRD tcr_autogen; + } shaders; + + Device device; +}; + +} // namespace RendererRD + +#endif // FSR2_RD_H diff --git a/servers/rendering/renderer_rd/effects/taa.cpp b/servers/rendering/renderer_rd/effects/taa.cpp index 61e0d3866c..c1037ec11a 100644 --- a/servers/rendering/renderer_rd/effects/taa.cpp +++ b/servers/rendering/renderer_rd/effects/taa.cpp @@ -47,20 +47,6 @@ TAA::~TAA() { taa_shader.version_free(shader_version); } -void TAA::msaa_resolve(Ref<RenderSceneBuffersRD> p_render_buffers) { - if (!p_render_buffers->has_velocity_buffer(true)) { - // nothing to resolve - return; - } - - for (uint32_t v = 0; v < p_render_buffers->get_view_count(); v++) { - RID velocity_buffer_msaa = p_render_buffers->get_velocity_buffer(true, v); - RID velocity_buffer = p_render_buffers->get_velocity_buffer(false, v); - - RD::get_singleton()->texture_resolve_multisample(velocity_buffer_msaa, velocity_buffer); - } -} - void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) { UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); ERR_FAIL_NULL(uniform_set_cache); diff --git a/servers/rendering/renderer_rd/effects/taa.h b/servers/rendering/renderer_rd/effects/taa.h index 9e7ad76fb8..f50e297fe5 100644 --- a/servers/rendering/renderer_rd/effects/taa.h +++ b/servers/rendering/renderer_rd/effects/taa.h @@ -45,7 +45,6 @@ public: TAA(); ~TAA(); - void msaa_resolve(Ref<RenderSceneBuffersRD> p_render_buffers); void process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_format, float p_z_near, float p_z_far); private: |