diff options
Diffstat (limited to 'drivers/d3d12/rendering_device_driver_d3d12.cpp')
-rw-r--r-- | drivers/d3d12/rendering_device_driver_d3d12.cpp | 199 |
1 files changed, 106 insertions, 93 deletions
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index fb278a4d56..a445006058 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -36,6 +36,7 @@ #include "thirdparty/zlib/zlib.h" #include "d3d12_godot_nir_bridge.h" +#include "dxil_hash.h" #include "rendering_context_driver_d3d12.h" // No point in fighting warnings in Mesa. @@ -51,9 +52,14 @@ #pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Wswitch" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" #endif -#include "dxil_validator.h" #include "nir_spirv.h" #include "nir_to_dxil.h" #include "spirv_to_dxil.h" @@ -63,6 +69,8 @@ extern "C" { #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#elif defined(__clang__) +#pragma clang diagnostic pop #endif #if defined(_MSC_VER) @@ -96,11 +104,6 @@ static const D3D12_RANGE VOID_RANGE = {}; static const uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 1); static const uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 2); -#ifdef DEV_ENABLED -//#define DEBUG_COUNT_BARRIERS -#define CUSTOM_INFO_QUEUE_ENABLED 0 -#endif - /*****************/ /**** GENERIC ****/ /*****************/ @@ -869,6 +872,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel D3D12MA::ALLOCATION_DESC allocation_desc = {}; allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COMMON; switch (p_allocation_type) { case MEMORY_ALLOCATION_TYPE_CPU: { bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT); @@ -876,10 +880,12 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel if (is_src && !is_dst) { // Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM. allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD; + initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; } if (is_dst && !is_src) { // Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads. allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK; + initial_state = D3D12_RESOURCE_STATE_COPY_DEST; } } break; case MEMORY_ALLOCATION_TYPE_GPU: { @@ -908,7 +914,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel res = allocator->CreateResource( &allocation_desc, reinterpret_cast<const D3D12_RESOURCE_DESC *>(&resource_desc), - D3D12_RESOURCE_STATE_COMMON, + initial_state, nullptr, allocation.GetAddressOf(), IID_PPV_ARGS(buffer.GetAddressOf())); @@ -922,7 +928,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel buf_info->resource = buffer.Get(); buf_info->owner_info.resource = buffer; buf_info->owner_info.allocation = allocation; - buf_info->owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_COMMON); + buf_info->owner_info.states.subresource_states.push_back(initial_state); buf_info->states_ptr = &buf_info->owner_info.states; buf_info->size = p_size; buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); @@ -1462,7 +1468,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; } - if (p_slice_type != -1) { + if (p_slice_type != (TextureSliceType)-1) { // Complete description with slicing. switch (p_slice_type) { @@ -1560,7 +1566,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex tex_info->states_ptr = owner_tex_info->states_ptr; tex_info->format = p_view.format; tex_info->desc = new_tex_resource_desc; - if (p_slice_type == -1) { + if (p_slice_type == (TextureSliceType)-1) { tex_info->base_layer = owner_tex_info->base_layer; tex_info->layers = owner_tex_info->layers; tex_info->base_mip = owner_tex_info->base_mip; @@ -1741,7 +1747,7 @@ RDD::SamplerID RenderingDeviceDriverD3D12::sampler_create(const SamplerState &p_ slot = 1; } else { for (uint32_t i = 1; i < samplers.size(); i++) { - if (samplers[i].Filter == INT_MAX) { + if ((int)samplers[i].Filter == INT_MAX) { slot = i; break; } @@ -2137,33 +2143,59 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_ for (uint32_t i = 0; i < p_texture_barriers.size(); i++) { const TextureBarrier &texture_barrier_rd = p_texture_barriers[i]; const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id); + if (texture_info->main_texture) { + texture_info = texture_info->main_texture; + } _rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore); _rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter); texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout); texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout); texture_barrier_d3d12.pResource = texture_info->resource; - texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap; - texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count; - texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer; - texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count; - texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect); - texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format); + if (texture_barrier_rd.subresources.mipmap_count == texture_info->mipmaps && texture_barrier_rd.subresources.layer_count == texture_info->layers) { + // So, all resources. Then, let's be explicit about it so D3D12 doesn't think + // we are dealing with a subset of subresources. + texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = 0xffffffff; + texture_barrier_d3d12.Subresources.NumMipLevels = 0; + // Because NumMipLevels == 0, all the other fields are ignored by D3D12. + } else { + texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap; + texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count; + texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer; + texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count; + texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect); + texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format); + } texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE; texture_barriers.push_back(texture_barrier_d3d12); } // Define the barrier groups and execute. + D3D12_BARRIER_GROUP barrier_groups[3] = {}; - barrier_groups[0].Type = D3D12_BARRIER_TYPE_GLOBAL; - barrier_groups[1].Type = D3D12_BARRIER_TYPE_BUFFER; - barrier_groups[2].Type = D3D12_BARRIER_TYPE_TEXTURE; - barrier_groups[0].NumBarriers = global_barriers.size(); - barrier_groups[1].NumBarriers = buffer_barriers.size(); - barrier_groups[2].NumBarriers = texture_barriers.size(); - barrier_groups[0].pGlobalBarriers = global_barriers.ptr(); - barrier_groups[1].pBufferBarriers = buffer_barriers.ptr(); - barrier_groups[2].pTextureBarriers = texture_barriers.ptr(); - cmd_list_7->Barrier(ARRAY_SIZE(barrier_groups), barrier_groups); + uint32_t barrier_groups_count = 0; + + if (!global_barriers.is_empty()) { + D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++]; + barrier_group.Type = D3D12_BARRIER_TYPE_GLOBAL; + barrier_group.NumBarriers = global_barriers.size(); + barrier_group.pGlobalBarriers = global_barriers.ptr(); + } + + if (!buffer_barriers.is_empty()) { + D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++]; + barrier_group.Type = D3D12_BARRIER_TYPE_BUFFER; + barrier_group.NumBarriers = buffer_barriers.size(); + barrier_group.pBufferBarriers = buffer_barriers.ptr(); + } + + if (!texture_barriers.is_empty()) { + D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++]; + barrier_group.Type = D3D12_BARRIER_TYPE_TEXTURE; + barrier_group.NumBarriers = texture_barriers.size(); + barrier_group.pTextureBarriers = texture_barriers.ptr(); + } + + cmd_list_7->Barrier(barrier_groups_count, barrier_groups); } /****************/ @@ -2703,6 +2735,8 @@ D3D12_UNORDERED_ACCESS_VIEW_DESC RenderingDeviceDriverD3D12::_make_ranged_uav_fo uav_desc.Texture3D.MipSlice = mip; uav_desc.Texture3D.WSize >>= p_mipmap_offset; } break; + default: + break; } return uav_desc; @@ -2859,23 +2893,6 @@ static uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDevice::SHADER_STAGE_M /* SHADER_STAGE_COMPUTE */ 2, }; -dxil_validator *RenderingDeviceDriverD3D12::_get_dxil_validator_for_current_thread() { - MutexLock lock(dxil_mutex); - - int thread_idx = WorkerThreadPool::get_singleton()->get_thread_index(); - if (dxil_validators.has(thread_idx)) { - return dxil_validators[thread_idx]; - } - -#ifdef DEV_ENABLED - print_verbose("Creating DXIL validator for worker thread index " + itos(thread_idx)); -#endif - - dxil_validator *dxil_validator = dxil_create_validator(nullptr); - dxil_validators.insert(thread_idx, dxil_validator); - return dxil_validator; -} - uint32_t RenderingDeviceDriverD3D12::_shader_patch_dxil_specialization_constant( PipelineSpecializationConstantType p_type, const void *p_value, @@ -2998,40 +3015,20 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( ShaderStage stage = E.key; if ((stages_re_sign_mask & (1 << stage))) { Vector<uint8_t> &bytecode = E.value; - bool sign_ok = _shader_sign_dxil_bytecode(stage, bytecode); - ERR_FAIL_COND_V(!sign_ok, false); + _shader_sign_dxil_bytecode(stage, bytecode); } } return true; } -bool RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) { - dxil_validator *validator = _get_dxil_validator_for_current_thread(); - if (!validator) { - if (is_in_developer_mode()) { - return true; - } else { - OS::get_singleton()->alert("Shader validation failed: DXIL.dll was not found, and developer mode is disabled.\n\nClick OK to exit."); - CRASH_NOW(); - } - } - - char *err = nullptr; - bool res = dxil_validate_module(validator, r_dxil_blob.ptrw(), r_dxil_blob.size(), &err); - if (!res) { - if (err) { - ERR_FAIL_COND_V_MSG(!res, false, "Shader signing invocation at stage " + String(SHADER_STAGE_NAMES[p_stage]) + " failed:\n" + String(err)); - } else { - ERR_FAIL_COND_V_MSG(!res, false, "Shader signing invocation at stage " + String(SHADER_STAGE_NAMES[p_stage]) + " failed."); - } - } - - return true; +void RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) { + uint8_t *w = r_dxil_blob.ptrw(); + compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4); } String RenderingDeviceDriverD3D12::shader_get_binary_cache_key() { - return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model) + (is_in_developer_mode() ? "dev" : ""); + return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model); } Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) { @@ -3299,10 +3296,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec nir_to_dxil_options nir_to_dxil_options = {}; nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN; nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(shader_capabilities.shader_model); - dxil_validator *validator = _get_dxil_validator_for_current_thread(); - if (validator) { - nir_to_dxil_options.validator_version_max = dxil_get_validator_version(validator); - } + nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION; nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks; dxil_logger logger = {}; @@ -3353,8 +3347,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec for (KeyValue<ShaderStage, Vector<uint8_t>> &E : dxil_blobs) { ShaderStage stage = E.key; Vector<uint8_t> &dxil_blob = E.value; - bool sign_ok = _shader_sign_dxil_bytecode(stage, dxil_blob); - ERR_FAIL_COND_V(!sign_ok, Vector<uint8_t>()); + _shader_sign_dxil_bytecode(stage, dxil_blob); } // Build the root signature. @@ -3822,6 +3815,11 @@ void RenderingDeviceDriverD3D12::shader_free(ShaderID p_shader) { VersatileResource::free(resources_allocator, shader_info_in); } +void RenderingDeviceDriverD3D12::shader_destroy_modules(ShaderID p_shader) { + ShaderInfo *shader_info_in = (ShaderInfo *)p_shader.id; + shader_info_in->stages_bytecode.clear(); +} + /*********************/ /**** UNIFORM SET ****/ /*********************/ @@ -4094,7 +4092,6 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun { uniform_set_info->resource_states.reserve(resource_states.size()); - uint32_t i = 0; for (const KeyValue<ResourceInfo *, NeededState> &E : resource_states) { UniformSetInfo::StateRequirement sr; sr.resource = E.key; @@ -4102,7 +4099,6 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun sr.states = E.value.states; sr.shader_uniform_idx_mask = E.value.shader_uniform_idx_mask; uniform_set_info->resource_states.push_back(sr); - i++; } } @@ -5092,6 +5088,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) { clear.aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT); clear.color_attachment = i; + tex_info->pending_clear.remove_from_list(); } } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { if (pass_info->attachments[i].stencil_load_op == ATTACHMENT_LOAD_OP_CLEAR) { @@ -5370,10 +5367,12 @@ void RenderingDeviceDriverD3D12::command_bind_render_pipeline(CommandBufferID p_ cmd_buf_info->cmd_list->OMSetBlendFactor(pso_extra_info.dyn_params.blend_constant.components); cmd_buf_info->cmd_list->OMSetStencilRef(pso_extra_info.dyn_params.stencil_reference); - ComPtr<ID3D12GraphicsCommandList1> command_list_1; - cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf()); - if (command_list_1) { - command_list_1->OMSetDepthBounds(pso_extra_info.dyn_params.depth_bounds_min, pso_extra_info.dyn_params.depth_bounds_max); + if (misc_features_support.depth_bounds_supported) { + ComPtr<ID3D12GraphicsCommandList1> command_list_1; + cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf()); + if (command_list_1) { + command_list_1->OMSetDepthBounds(pso_extra_info.dyn_params.depth_bounds_min, pso_extra_info.dyn_params.depth_bounds_max); + } } cmd_buf_info->render_pass_state.vf_info = pso_extra_info.vf_info; @@ -5763,8 +5762,15 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( (&pipeline_desc.DepthStencilState)->BackFace.StencilDepthFailOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.back_op.depth_fail]; (&pipeline_desc.DepthStencilState)->BackFace.StencilFunc = RD_TO_D3D12_COMPARE_OP[p_depth_stencil_state.back_op.compare]; - pso_extra_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f; - pso_extra_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f; + if (misc_features_support.depth_bounds_supported) { + pso_extra_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f; + pso_extra_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f; + } else { + if (p_depth_stencil_state.enable_depth_range) { + WARN_PRINT_ONCE("Depth bounds test is not supported by the GPU driver."); + } + } + pso_extra_info.dyn_params.stencil_reference = p_depth_stencil_state.front_op.reference; } @@ -6035,6 +6041,10 @@ void RenderingDeviceDriverD3D12::command_end_label(CommandBufferID p_cmd_buffer) #endif } +void RenderingDeviceDriverD3D12::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) { + // TODO: Implement via DRED. +} + /********************/ /**** SUBMISSION ****/ /********************/ @@ -6281,15 +6291,6 @@ RenderingDeviceDriverD3D12::RenderingDeviceDriverD3D12(RenderingContextDriverD3D } RenderingDeviceDriverD3D12::~RenderingDeviceDriverD3D12() { - { - MutexLock lock(dxil_mutex); - for (const KeyValue<int, dxil_validator *> &E : dxil_validators) { - if (E.value) { - dxil_destroy_validator(E.value); - } - } - } - glsl_type_singleton_decref(); } @@ -6485,6 +6486,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { subgroup_capabilities.wave_ops_supported = options1.WaveOps; } + D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2)); + if (SUCCEEDED(res)) { + misc_features_support.depth_bounds_supported = options2.DepthBoundsTestSupported; + } + D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {}; res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)); if (SUCCEEDED(res)) { @@ -6570,6 +6577,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { print_verbose(String("- D3D12 16-bit ops supported: ") + (shader_capabilities.native_16bit_ops ? "yes" : "no")); + if (misc_features_support.depth_bounds_supported) { + print_verbose("- Depth bounds test supported"); + } else { + print_verbose("- Depth bounds test not supported"); + } + return OK; } @@ -6635,7 +6648,7 @@ Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) { D3D12MA::ALLOCATION_DESC allocation_desc = {}; allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + //CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); uint32_t resource_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame"); uint32_t sampler_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame"); uint32_t misc_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame"); |