diff options
Diffstat (limited to 'drivers')
55 files changed, 6027 insertions, 5411 deletions
diff --git a/drivers/d3d12/SCsub b/drivers/d3d12/SCsub index 11ae52fd40..35227ebe08 100644 --- a/drivers/d3d12/SCsub +++ b/drivers/d3d12/SCsub @@ -5,8 +5,6 @@ from pathlib import Path Import("env") -env.Append(CPPDEFINES=["RD_ENABLED"]) - env_d3d12_rdd = env.Clone() thirdparty_obj = [] @@ -27,7 +25,7 @@ env_d3d12_rdd.Append(CPPPATH=["#thirdparty/d3d12ma"]) # Agility SDK. -if env["agility_sdk_path"] != "": +if env["agility_sdk_path"] != "" and os.path.exists(env["agility_sdk_path"]): env_d3d12_rdd.Append(CPPDEFINES=["AGILITY_SDK_ENABLED"]) if env["agility_sdk_multiarch"]: env_d3d12_rdd.Append(CPPDEFINES=["AGILITY_SDK_MULTIARCH_ENABLED"]) @@ -35,7 +33,7 @@ if env["agility_sdk_path"] != "": # PIX. -if env["pix_path"] != "": +if env["use_pix"]: env_d3d12_rdd.Append(CPPDEFINES=["PIX_ENABLED"]) env_d3d12_rdd.Append(CPPPATH=[env["pix_path"] + "/Include"]) diff --git a/drivers/d3d12/d3d12_context.cpp b/drivers/d3d12/d3d12_context.cpp deleted file mode 100644 index 36492b198b..0000000000 --- a/drivers/d3d12/d3d12_context.cpp +++ /dev/null @@ -1,1119 +0,0 @@ -/**************************************************************************/ -/* d3d12_context.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "d3d12_context.h" - -#include "core/config/engine.h" -#include "core/config/project_settings.h" -#include "core/string/ustring.h" -#include "core/templates/local_vector.h" -#include "core/version.h" -#include "servers/rendering/rendering_device.h" - -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" -#pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wswitch" -#pragma GCC diagnostic ignored "-Wmissing-field-initializers" -#endif - -#include "dxcapi.h" - -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - -#if !defined(_MSC_VER) -#include <guiddef.h> - -#include <dxguids.h> -#endif - -extern "C" { -char godot_nir_arch_name[32]; - -#ifdef AGILITY_SDK_ENABLED -__declspec(dllexport) extern const UINT D3D12SDKVersion = 610; -#ifdef AGILITY_SDK_MULTIARCH_ENABLED -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -__declspec(dllexport) extern const char *D3D12SDKPath = "\\.\\arm64"; -#elif defined(__arm__) || defined(_M_ARM) -__declspec(dllexport) extern const char *D3D12SDKPath = "\\.\\arm32"; -#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) -__declspec(dllexport) extern const char *D3D12SDKPath = "\\.\\x86_64"; -#elif defined(__i386) || defined(__i386__) || defined(_M_IX86) -__declspec(dllexport) extern const char *D3D12SDKPath = "\\.\\x86_32"; -#endif -#else -__declspec(dllexport) extern const char *D3D12SDKPath = "\\."; -#endif // AGILITY_SDK_MULTIARCH -#endif // AGILITY_SDK_ENABLED -} - -#ifdef PIX_ENABLED -#if defined(__GNUC__) -#define _MSC_VER 1800 -#endif -#define USE_PIX -#include "WinPixEventRuntime/pix3.h" -#if defined(__GNUC__) -#undef _MSC_VER -#endif -#endif - -void D3D12Context::_debug_message_func( - D3D12_MESSAGE_CATEGORY p_category, - D3D12_MESSAGE_SEVERITY p_severity, - D3D12_MESSAGE_ID p_id, - LPCSTR p_description, - void *p_context) { - String type_string; - switch (p_category) { - case D3D12_MESSAGE_CATEGORY_APPLICATION_DEFINED: - type_string = "APPLICATION_DEFINED"; - break; - case D3D12_MESSAGE_CATEGORY_MISCELLANEOUS: - type_string = "MISCELLANEOUS"; - break; - case D3D12_MESSAGE_CATEGORY_INITIALIZATION: - type_string = "INITIALIZATION"; - break; - case D3D12_MESSAGE_CATEGORY_CLEANUP: - type_string = "CLEANUP"; - break; - case D3D12_MESSAGE_CATEGORY_COMPILATION: - type_string = "COMPILATION"; - break; - case D3D12_MESSAGE_CATEGORY_STATE_CREATION: - type_string = "STATE_CREATION"; - break; - case D3D12_MESSAGE_CATEGORY_STATE_SETTING: - type_string = "STATE_SETTING"; - break; - case D3D12_MESSAGE_CATEGORY_STATE_GETTING: - type_string = "STATE_GETTING"; - break; - case D3D12_MESSAGE_CATEGORY_RESOURCE_MANIPULATION: - type_string = "RESOURCE_MANIPULATION"; - break; - case D3D12_MESSAGE_CATEGORY_EXECUTION: - type_string = "EXECUTION"; - break; - case D3D12_MESSAGE_CATEGORY_SHADER: - type_string = "SHADER"; - break; - } - - String error_message(type_string + - " - Message Id Number: " + String::num_int64(p_id) + - "\n\t" + p_description); - - // Convert D3D12 severity to our own log macros. - switch (p_severity) { - case D3D12_MESSAGE_SEVERITY_MESSAGE: - print_verbose(error_message); - break; - case D3D12_MESSAGE_SEVERITY_INFO: - print_line(error_message); - break; - case D3D12_MESSAGE_SEVERITY_WARNING: - WARN_PRINT(error_message); - break; - case D3D12_MESSAGE_SEVERITY_ERROR: - case D3D12_MESSAGE_SEVERITY_CORRUPTION: - ERR_PRINT(error_message); - CRASH_COND_MSG(Engine::get_singleton()->is_abort_on_gpu_errors_enabled(), - "Crashing, because abort on GPU errors is enabled."); - break; - } -} - -uint32_t D3D12Context::SubgroupCapabilities::supported_stages_flags_rd() const { - // If there's a way to check exactly which are supported, I have yet to find it. - return ( - RenderingDevice::ShaderStage::SHADER_STAGE_FRAGMENT_BIT | - RenderingDevice::ShaderStage::SHADER_STAGE_COMPUTE_BIT); -} - -uint32_t D3D12Context::SubgroupCapabilities::supported_operations_flags_rd() const { - if (!wave_ops_supported) { - return 0; - } else { - return ( - RenderingDevice::SubgroupOperations::SUBGROUP_BASIC_BIT | - RenderingDevice::SubgroupOperations::SUBGROUP_BALLOT_BIT | - RenderingDevice::SubgroupOperations::SUBGROUP_VOTE_BIT | - RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_BIT | - RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT | - RenderingDevice::SubgroupOperations::SUBGROUP_QUAD_BIT | - RenderingDevice::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT | - RenderingDevice::SubgroupOperations::SUBGROUP_CLUSTERED_BIT); - } -} - -Error D3D12Context::_check_capabilities() { - // Assume not supported until proven otherwise. - vrs_capabilities.draw_call_supported = false; - vrs_capabilities.primitive_supported = false; - vrs_capabilities.primitive_in_multiviewport = false; - vrs_capabilities.ss_image_supported = false; - vrs_capabilities.ss_image_tile_size = 1; - vrs_capabilities.additional_rates_supported = false; - multiview_capabilities.is_supported = false; - multiview_capabilities.geometry_shader_is_supported = false; - multiview_capabilities.tessellation_shader_is_supported = false; - multiview_capabilities.max_view_count = 0; - multiview_capabilities.max_instance_count = 0; - multiview_capabilities.is_supported = false; - subgroup_capabilities.size = 0; - subgroup_capabilities.wave_ops_supported = false; - shader_capabilities.shader_model = D3D_SHADER_MODEL_6_0; - shader_capabilities.native_16bit_ops = false; - storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = false; - format_capabilities.relaxed_casting_supported = false; - - { - D3D12_FEATURE_DATA_SHADER_MODEL shader_model = {}; - shader_model.HighestShaderModel = MIN(D3D_HIGHEST_SHADER_MODEL, D3D_SHADER_MODEL_6_6); - HRESULT res = md.device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - shader_capabilities.shader_model = shader_model.HighestShaderModel; - } - print_verbose("- Shader:"); - print_verbose(" model: " + itos(shader_capabilities.shader_model >> 4) + "." + itos(shader_capabilities.shader_model & 0xf)); - - D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; - HRESULT res = md.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); - if (SUCCEEDED(res)) { - storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = options.TypedUAVLoadAdditionalFormats; - } - - D3D12_FEATURE_DATA_D3D12_OPTIONS1 options1 = {}; - res = md.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &options1, sizeof(options1)); - if (SUCCEEDED(res)) { - subgroup_capabilities.size = options1.WaveLaneCountMin; - subgroup_capabilities.wave_ops_supported = options1.WaveOps; - } - - D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {}; - res = md.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)); - if (SUCCEEDED(res)) { - // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_view_instancing_tier - // https://microsoft.github.io/DirectX-Specs/d3d/ViewInstancing.html#sv_viewid - if (options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_1) { - multiview_capabilities.is_supported = true; - multiview_capabilities.geometry_shader_is_supported = options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_3; - multiview_capabilities.tessellation_shader_is_supported = options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_3; - multiview_capabilities.max_view_count = D3D12_MAX_VIEW_INSTANCE_COUNT; - multiview_capabilities.max_instance_count = UINT32_MAX; - } - } - - D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {}; - res = md.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)); - if (SUCCEEDED(res)) { - if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_1) { - vrs_capabilities.draw_call_supported = true; - if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_2) { - vrs_capabilities.primitive_supported = true; - vrs_capabilities.primitive_in_multiviewport = options6.PerPrimitiveShadingRateSupportedWithViewportIndexing; - vrs_capabilities.ss_image_supported = true; - vrs_capabilities.ss_image_tile_size = options6.ShadingRateImageTileSize; - vrs_capabilities.additional_rates_supported = options6.AdditionalShadingRatesSupported; - } - } - } - - D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = {}; - res = md.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); - if (SUCCEEDED(res)) { - format_capabilities.relaxed_casting_supported = options12.RelaxedFormatCastingSupported; - } - - if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) { - print_verbose("- D3D12 Variable Rate Shading supported:"); - if (vrs_capabilities.draw_call_supported) { - print_verbose(" Draw call"); - } - if (vrs_capabilities.primitive_supported) { - print_verbose(String(" Per-primitive (multi-viewport: ") + (vrs_capabilities.primitive_in_multiviewport ? "yes" : "no") + ")"); - } - if (vrs_capabilities.ss_image_supported) { - print_verbose(String(" Screen-space image (tile size: ") + itos(vrs_capabilities.ss_image_tile_size) + ")"); - } - if (vrs_capabilities.additional_rates_supported) { - print_verbose(String(" Additional rates: ") + (vrs_capabilities.additional_rates_supported ? "yes" : "no")); - } - } else { - print_verbose("- D3D12 Variable Rate Shading not supported"); - } - - if (multiview_capabilities.is_supported) { - print_verbose("- D3D12 multiview supported:"); - print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); - //print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); // Hardcoded; not very useful at the moment. - } else { - print_verbose("- D3D12 multiview not supported"); - } - - if (format_capabilities.relaxed_casting_supported) { - print_verbose("- Relaxed casting supported"); - } else { - print_verbose("- Relaxed casting not supported"); - } - - return OK; -} - -Error D3D12Context::_initialize_debug_layers() { - ComPtr<ID3D12Debug> debug_controller; - HRESULT res = D3D12GetDebugInterface(IID_PPV_ARGS(&debug_controller)); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_QUERY_FAILED); - debug_controller->EnableDebugLayer(); - return OK; -} - -Error D3D12Context::_select_adapter(int &r_index) { - { - UINT flags = _use_validation_layers() ? DXGI_CREATE_FACTORY_DEBUG : 0; - HRESULT res = CreateDXGIFactory2(flags, IID_PPV_ARGS(&dxgi_factory)); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - } - - ComPtr<IDXGIFactory6> factory6; - dxgi_factory.As(&factory6); - - // TODO: Use IDXCoreAdapterList, which gives more comprehensive information. - LocalVector<IDXGIAdapter1 *> adapters; - while (true) { - IDXGIAdapter1 *curr_adapter = nullptr; - if (factory6) { - if (factory6->EnumAdapterByGpuPreference(adapters.size(), DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, IID_PPV_ARGS(&curr_adapter)) == DXGI_ERROR_NOT_FOUND) { - break; - } - } else { - if (dxgi_factory->EnumAdapters1(adapters.size(), &curr_adapter) == DXGI_ERROR_NOT_FOUND) { - break; - } - } - adapters.push_back(curr_adapter); - } - - ERR_FAIL_COND_V_MSG(adapters.size() == 0, ERR_CANT_CREATE, "Adapters enumeration reported zero accessible devices."); - - // The device should really be a preference, but for now choosing a discrete GPU over the - // integrated one is better than the default. - - int32_t adapter_index = -1; - int type_selected = -1; - LocalVector<RenderingDevice::DeviceType> adapter_types; - print_verbose("D3D12 devices:"); - for (uint32_t i = 0; i < adapters.size(); ++i) { - DXGI_ADAPTER_DESC1 desc = {}; - adapters[i]->GetDesc1(&desc); - - String name = desc.Description; - String dev_type; - RenderingDevice::DeviceType type = {}; - if (((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE))) { - type = RenderingDevice::DEVICE_TYPE_CPU; - } else { - type = desc.DedicatedVideoMemory ? RenderingDevice::DEVICE_TYPE_DISCRETE_GPU : RenderingDevice::DEVICE_TYPE_INTEGRATED_GPU; - } - adapter_types.push_back(type); - - switch (type) { - case RenderingDevice::DEVICE_TYPE_DISCRETE_GPU: { - dev_type = "Discrete"; - } break; - case RenderingDevice::DEVICE_TYPE_INTEGRATED_GPU: { - dev_type = "Integrated"; - } break; - case RenderingDevice::DEVICE_TYPE_VIRTUAL_GPU: { - dev_type = "Virtual"; - } break; - case RenderingDevice::DEVICE_TYPE_CPU: { - dev_type = "CPU"; - } break; - default: { - dev_type = "Other"; - } break; - } - print_verbose(" #" + itos(i) + ": " + name + ", " + dev_type); - - switch (type) { - case RenderingDevice::DEVICE_TYPE_DISCRETE_GPU: { - if (type_selected < 4) { - type_selected = 4; - adapter_index = i; - } - } break; - case RenderingDevice::DEVICE_TYPE_INTEGRATED_GPU: { - if (type_selected < 3) { - type_selected = 3; - adapter_index = i; - } - } break; - case RenderingDevice::DEVICE_TYPE_VIRTUAL_GPU: { - if (type_selected < 2) { - type_selected = 2; - adapter_index = i; - } - } break; - case RenderingDevice::DEVICE_TYPE_CPU: { - if (type_selected < 1) { - type_selected = 1; - adapter_index = i; - } - } break; - default: { - if (type_selected < 0) { - type_selected = 0; - adapter_index = i; - } - } break; - } - } - - int32_t user_adapter_index = Engine::get_singleton()->get_gpu_index(); // Force user selected GPU. - if (user_adapter_index >= 0 && user_adapter_index < (int32_t)adapters.size()) { - adapter_index = user_adapter_index; - } - - ERR_FAIL_COND_V_MSG(adapter_index == -1, ERR_CANT_CREATE, "None of D3D12 devices supports hardware rendering."); - - gpu = adapters[adapter_index]; - for (uint32_t i = 0; i < adapters.size(); ++i) { - adapters[i]->Release(); - } - - adapter_type = adapter_types[adapter_index]; - - ComPtr<IDXGIFactory5> factory5; - dxgi_factory.As(&factory5); - if (factory5) { - BOOL result = FALSE; // sizeof(bool) != sizeof(BOOL), in general. - HRESULT res = factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &result, sizeof(result)); - if (SUCCEEDED(res)) { - tearing_supported = result; - } else { - ERR_PRINT("CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - } - } - - r_index = adapter_index; - - return OK; -} - -void D3D12Context::_dump_adapter_info(int p_index) { - { - const D3D_FEATURE_LEVEL FEATURE_LEVELS[] = { - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_12_0, - D3D_FEATURE_LEVEL_12_1, - D3D_FEATURE_LEVEL_12_2, - }; - - D3D12_FEATURE_DATA_FEATURE_LEVELS feat_levels = {}; - feat_levels.NumFeatureLevels = ARRAY_SIZE(FEATURE_LEVELS); - feat_levels.pFeatureLevelsRequested = FEATURE_LEVELS; - - HRESULT res = md.device->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &feat_levels, sizeof(feat_levels)); - ERR_FAIL_COND_MSG(!SUCCEEDED(res), "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - - // Example: D3D_FEATURE_LEVEL_12_1 = 0xc100. - uint32_t feat_level_major = feat_levels.MaxSupportedFeatureLevel >> 12; - uint32_t feat_level_minor = (feat_levels.MaxSupportedFeatureLevel >> 16) & 0xff; - feature_level = feat_level_major * 10 + feat_level_minor; - } - - String rendering_method; - if (OS::get_singleton()->get_current_rendering_method() == "mobile") { - rendering_method = "Forward Mobile"; - } else { - rendering_method = "Forward+"; - } - - static const struct { - uint32_t id; - const char *name; - } vendor_names[] = { - { 0x1002, "AMD" }, - { 0x1010, "ImgTec" }, - { 0x106B, "Apple" }, - { 0x10DE, "NVIDIA" }, - { 0x13B5, "ARM" }, - { 0x1414, "Microsoft" }, - { 0x5143, "Qualcomm" }, - { 0x8086, "Intel" }, - { 0, nullptr }, - }; - - DXGI_ADAPTER_DESC gpu_desc = {}; - gpu->GetDesc(&gpu_desc); - - adapter_name = gpu_desc.Description; - pipeline_cache_id = String::hex_encode_buffer((uint8_t *)&gpu_desc.AdapterLuid, sizeof(LUID)); - pipeline_cache_id += "-driver-" + itos(gpu_desc.Revision); - { - adapter_vendor = "Unknown"; - uint32_t vendor_idx = 0; - while (vendor_names[vendor_idx].name != nullptr) { - if (gpu_desc.VendorId == vendor_names[vendor_idx].id) { - adapter_vendor = vendor_names[vendor_idx].name; - break; - } - vendor_idx++; - } - } - - print_line(vformat("D3D12 feature level %s - %s - Using D3D12 Adapter #%d: %s", get_device_api_version(), rendering_method, p_index, adapter_name)); -} - -Error D3D12Context::_create_device(DeviceBasics &r_basics) { - HRESULT res = D3D12CreateDevice(gpu.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(r_basics.device.GetAddressOf())); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12CreateDevice failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - - // Create direct command queue. - D3D12_COMMAND_QUEUE_DESC queue_desc = {}; - queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; - queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - res = r_basics.device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(r_basics.queue.GetAddressOf())); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - - // Create sync objects. - res = r_basics.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(r_basics.fence.GetAddressOf())); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - r_basics.fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); - ERR_FAIL_NULL_V(r_basics.fence_event, ERR_CANT_CREATE); - - if (_use_validation_layers()) { - ComPtr<ID3D12InfoQueue> info_queue; - res = r_basics.device.As(&info_queue); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - -#if 0 // This causes crashes. Needs investigation. - ComPtr<ID3D12InfoQueue1> info_queue_1; - device.As(&info_queue_1); - if (info_queue_1) { - // Custom printing supported (added in Windows 10 Release Preview build 20236). - - info_queue_1->SetMuteDebugOutput(TRUE); - - res = info_queue_1->RegisterMessageCallback(&_debug_message_func, D3D12_MESSAGE_CALLBACK_IGNORE_FILTERS, nullptr, 0); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - } else -#endif - { - // Rely on D3D12's own debug printing. - - if (Engine::get_singleton()->is_abort_on_gpu_errors_enabled()) { - res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - } - } - D3D12_MESSAGE_SEVERITY severities_to_mute[] = { - D3D12_MESSAGE_SEVERITY_INFO, - }; - - D3D12_MESSAGE_ID messages_to_mute[] = { - D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, - D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, - // These happen due to how D3D12MA manages buffers; seem bening. - D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_HAS_NO_RESOURCE, - D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS, - }; - - D3D12_INFO_QUEUE_FILTER filter = {}; - filter.DenyList.NumSeverities = ARRAY_SIZE(severities_to_mute); - filter.DenyList.pSeverityList = severities_to_mute; - filter.DenyList.NumIDs = ARRAY_SIZE(messages_to_mute); - filter.DenyList.pIDList = messages_to_mute; - - res = info_queue->PushStorageFilter(&filter); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - } - - return OK; -} - -Error D3D12Context::_get_device_limits() { - D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; - HRESULT res = md.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_UNAVAILABLE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - - // https://docs.microsoft.com/en-us/windows/win32/direct3d12/hardware-support - gpu_limits.max_srvs_per_shader_stage = options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 128 : UINT64_MAX; - gpu_limits.max_cbvs_per_shader_stage = options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ? 14 : UINT64_MAX; - gpu_limits.max_samplers_across_all_stages = options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 16 : 2048; - if (options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1) { - gpu_limits.max_uavs_across_all_stages = feature_level <= 110 ? 8 : 64; - } else if (options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_2) { - gpu_limits.max_uavs_across_all_stages = 64; - } else { - gpu_limits.max_uavs_across_all_stages = UINT64_MAX; - } - - md.queue->GetTimestampFrequency(&gpu_limits.timestamp_frequency); - - return OK; -} - -bool D3D12Context::_use_validation_layers() { - return Engine::get_singleton()->is_validation_layers_enabled(); -} - -Error D3D12Context::window_create(DisplayServer::WindowID p_window_id, DisplayServer::VSyncMode p_vsync_mode, int p_width, int p_height, const void *p_platform_data) { - ERR_FAIL_COND_V(windows.has(p_window_id), ERR_INVALID_PARAMETER); - - Window window; - window.hwnd = ((const WindowPlatformData *)p_platform_data)->window; - window.width = p_width; - window.height = p_height; - window.vsync_mode = p_vsync_mode; - - { - RDD::Attachment attachment; - attachment.samples = RD::TEXTURE_SAMPLES_1; - attachment.load_op = RDD::ATTACHMENT_LOAD_OP_CLEAR; - attachment.store_op = RDD::ATTACHMENT_STORE_OP_STORE; - window.render_pass.attachments.push_back(attachment); - - RDD::Subpass subpass; - { - RDD::AttachmentReference color_ref; - color_ref.attachment = 0; - color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT); - subpass.color_references.push_back(color_ref); - } - window.render_pass.subpasses.push_back(subpass); - } - - for (uint32_t i = 0; i < IMAGE_COUNT; i++) { - Error err = window.framebuffers[i].rtv_heap.allocate(md.device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1, false); - ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); - window.framebuffers[i].is_screen = true; - window.framebuffers[i].attachments_handle_inds.push_back(0); - } - - Error err = _update_swap_chain(&window); - ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); - - windows[p_window_id] = window; - - return OK; -} - -void D3D12Context::window_resize(DisplayServer::WindowID p_window, int p_width, int p_height) { - ERR_FAIL_COND(!windows.has(p_window)); - windows[p_window].width = p_width; - windows[p_window].height = p_height; - _update_swap_chain(&windows[p_window]); -} - -int D3D12Context::window_get_width(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), -1); - return windows[p_window].width; -} - -int D3D12Context::window_get_height(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), -1); - return windows[p_window].height; -} - -bool D3D12Context::window_is_valid_swapchain(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), false); - Window *w = &windows[p_window]; - return (bool)w->swapchain; -} - -RDD::RenderPassID D3D12Context::window_get_render_pass(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), RDD::RenderPassID()); - Window *w = &windows[p_window]; - return RDD::RenderPassID(&w->render_pass); -} - -RDD::FramebufferID D3D12Context::window_get_framebuffer(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), RDD::FramebufferID()); - ERR_FAIL_COND_V(!buffers_prepared, RDD::FramebufferID()); - Window *w = &windows[p_window]; - if (w->swapchain) { - return RDD::FramebufferID(&w->framebuffers[w->current_buffer]); - } else { - return RDD::FramebufferID(); - } -} - -void D3D12Context::window_destroy(DisplayServer::WindowID p_window_id) { - ERR_FAIL_COND(!windows.has(p_window_id)); - _wait_for_idle_queue(md.queue.Get()); - windows.erase(p_window_id); -} - -Error D3D12Context::_update_swap_chain(Window *window) { - if (window->width == 0 || window->height == 0) { - // Likely window minimized, no swapchain created. - return ERR_SKIP; - } - - DisplayServer::VSyncMode curr_vsync_mode = window->vsync_mode; - bool vsync_mode_available = false; - UINT swapchain_flags = 0; - do { - switch (window->vsync_mode) { - case DisplayServer::VSYNC_MAILBOX: { - window->sync_interval = 1; - window->present_flags = DXGI_PRESENT_RESTART; - swapchain_flags = 0; - vsync_mode_available = true; - } break; - case DisplayServer::VSYNC_ADAPTIVE: { - vsync_mode_available = false; // I don't know how to set this up. - } break; - case DisplayServer::VSYNC_ENABLED: { - window->sync_interval = 1; - window->present_flags = 0; - swapchain_flags = 0; - vsync_mode_available = true; - } break; - case DisplayServer::VSYNC_DISABLED: { - window->sync_interval = 0; - window->present_flags = tearing_supported ? DXGI_PRESENT_ALLOW_TEARING : 0; - swapchain_flags = tearing_supported ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0; - vsync_mode_available = true; - } break; - } - - // Set the windows swap effect if it is available, otherwise FLIP_DISCARD is used. - if (vsync_mode_available) { - if (window->vsync_mode != curr_vsync_mode || !window->swapchain) { - window->vsync_mode = curr_vsync_mode; - print_verbose("Using swapchain flags: " + itos(swapchain_flags) + ", sync interval: " + itos(window->sync_interval) + ", present flags: " + itos(window->present_flags)); - } - } else { - String present_mode_string; - switch (window->vsync_mode) { - case DisplayServer::VSYNC_MAILBOX: - present_mode_string = "Mailbox"; - break; - case DisplayServer::VSYNC_ADAPTIVE: - present_mode_string = "Adaptive"; - break; - case DisplayServer::VSYNC_ENABLED: - present_mode_string = "Enabled"; - break; - case DisplayServer::VSYNC_DISABLED: - present_mode_string = "Disabled"; - break; - } - WARN_PRINT(vformat("The requested V-Sync mode %s is not available. Falling back to V-Sync mode Enabled.", present_mode_string)); - window->vsync_mode = DisplayServer::VSYNC_ENABLED; // Set to default. - } - } while (!vsync_mode_available); - - if (window->swapchain) { - _wait_for_idle_queue(md.queue.Get()); - for (uint32_t i = 0; i < IMAGE_COUNT; i++) { - window->render_targets[i].Reset(); - } - - // D3D12 docs: "IDXGISwapChain::ResizeBuffers can't be used to add or remove this flag." - bool allow_tearing_flag_changed = (swapchain_flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) != (window->swapchain_flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING); - if (allow_tearing_flag_changed) { - window->swapchain.Reset(); - } - } - - if (!window->swapchain) { - DXGI_SWAP_CHAIN_DESC1 swapchain_desc = {}; - swapchain_desc.BufferCount = IMAGE_COUNT; - swapchain_desc.Width = 0; - swapchain_desc.Height = 0; - swapchain_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - swapchain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swapchain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - swapchain_desc.SampleDesc.Count = 1; - swapchain_desc.Flags = swapchain_flags; - swapchain_desc.Scaling = DXGI_SCALING_NONE; - - ComPtr<IDXGISwapChain1> swapchain; - HRESULT res = dxgi_factory->CreateSwapChainForHwnd(md.queue.Get(), window->hwnd, &swapchain_desc, nullptr, nullptr, swapchain.GetAddressOf()); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - swapchain.As(&window->swapchain); - ERR_FAIL_NULL_V(window->swapchain, ERR_CANT_CREATE); - - format = swapchain_desc.Format; - - res = dxgi_factory->MakeWindowAssociation(window->hwnd, DXGI_MWA_NO_ALT_ENTER | DXGI_MWA_NO_WINDOW_CHANGES); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - - res = window->swapchain->GetDesc1(&swapchain_desc); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - ERR_FAIL_COND_V(swapchain_desc.BufferCount != IMAGE_COUNT, ERR_BUG); - window->width = swapchain_desc.Width; - window->height = swapchain_desc.Height; - - } else { - HRESULT res = window->swapchain->ResizeBuffers(IMAGE_COUNT, window->width, window->height, DXGI_FORMAT_UNKNOWN, swapchain_flags); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_UNAVAILABLE); - } - - window->swapchain_flags = swapchain_flags; - window->current_buffer = window->swapchain->GetCurrentBackBufferIndex(); - - for (uint32_t i = 0; i < IMAGE_COUNT; i++) { - RenderingDeviceDriverD3D12::FramebufferInfo *fb_info = &window->framebuffers[i]; - RenderingDeviceDriverD3D12::DescriptorsHeap::Walker walker = fb_info->rtv_heap.make_walker(); - - HRESULT res = window->swapchain->GetBuffer(i, IID_PPV_ARGS(&window->render_targets[i])); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - - md.device->CreateRenderTargetView(window->render_targets[i].Get(), nullptr, walker.get_curr_cpu_handle()); - } - - return OK; -} - -Error D3D12Context::initialize() { - if (_use_validation_layers()) { - Error err = _initialize_debug_layers(); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - } - - int adapter_index = 0; - - Error err = _select_adapter(adapter_index); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - err = _create_device(md); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - _dump_adapter_info(adapter_index); - - err = _check_capabilities(); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - err = _get_device_limits(); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - { - HRESULT res = md.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(frame_fence.GetAddressOf())); - ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); - frame_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); - ERR_FAIL_NULL_V(frame_fence_event, ERR_CANT_CREATE); - } - - md.driver = memnew(RenderingDeviceDriverD3D12(this, md.device.Get(), IMAGE_COUNT + 1)); - - return OK; -} - -void D3D12Context::set_setup_buffer(RDD::CommandBufferID p_command_buffer) { - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - command_list_queue[0] = cmd_buf_info->cmd_list.Get(); -} - -void D3D12Context::append_command_buffer(RDD::CommandBufferID p_command_buffer) { - if (command_list_queue.size() <= command_list_count) { - command_list_queue.resize(command_list_count + 1); - } - - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - command_list_queue[command_list_count] = cmd_buf_info->cmd_list.Get(); - command_list_count++; -} - -void D3D12Context::_wait_for_idle_queue(ID3D12CommandQueue *p_queue) { - md.fence_value++; - p_queue->Signal(md.fence.Get(), md.fence_value); - md.fence->SetEventOnCompletion(md.fence_value, md.fence_event); - WaitForSingleObjectEx(md.fence_event, INFINITE, FALSE); -#ifdef PIX_ENABLED - PIXNotifyWakeFromFenceSignal(md.fence_event); -#endif -} - -void D3D12Context::flush(bool p_flush_setup, bool p_flush_pending) { - if (p_flush_setup && command_list_queue[0]) { - md.queue->ExecuteCommandLists(1, command_list_queue.ptr()); - command_list_queue[0] = nullptr; - } - - if (p_flush_pending && command_list_count > 1) { - md.queue->ExecuteCommandLists(command_list_count - 1, command_list_queue.ptr() + 1); - command_list_count = 1; - } - - if (p_flush_setup || p_flush_pending) { - _wait_for_idle_queue(md.queue.Get()); - } -} - -Error D3D12Context::prepare_buffers(RDD::CommandBufferID p_command_buffer) { - // Ensure no more than FRAME_LAG renderings are outstanding. - if (frame >= IMAGE_COUNT) { - UINT64 min_value = frame - IMAGE_COUNT; - if (frame_fence->GetCompletedValue() < min_value) { - frame_fence->SetEventOnCompletion(min_value, frame_fence_event); - WaitForSingleObjectEx(frame_fence_event, INFINITE, FALSE); -#ifdef PIX_ENABLED - PIXNotifyWakeFromFenceSignal(frame_fence_event); -#endif - } - } - - D3D12_RESOURCE_BARRIER *barriers = (D3D12_RESOURCE_BARRIER *)alloca(windows.size() * sizeof(D3D12_RESOURCE_BARRIER)); - uint32_t n = 0; - for (KeyValue<int, Window> &E : windows) { - Window *w = &E.value; - w->current_buffer = w->swapchain->GetCurrentBackBufferIndex(); - barriers[n++] = CD3DX12_RESOURCE_BARRIER::Transition(w->render_targets[w->current_buffer].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET); - } - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - cmd_buf_info->cmd_list->ResourceBarrier(n, barriers); - - buffers_prepared = true; - - return OK; -} - -void D3D12Context::postpare_buffers(RDD::CommandBufferID p_command_buffer) { - D3D12_RESOURCE_BARRIER *barriers = (D3D12_RESOURCE_BARRIER *)alloca(windows.size() * sizeof(D3D12_RESOURCE_BARRIER)); - - uint32_t n = 0; - for (KeyValue<int, Window> &E : windows) { - Window *w = &E.value; - barriers[n++] = CD3DX12_RESOURCE_BARRIER::Transition(w->render_targets[w->current_buffer].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); - } - - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - cmd_buf_info->cmd_list->ResourceBarrier(n, barriers); -} - -Error D3D12Context::swap_buffers() { - ID3D12CommandList *const *commands_ptr = nullptr; - UINT commands_to_submit = 0; - - if (command_list_queue[0] == nullptr) { - // No setup command, but commands to submit, submit from the first and skip command. - if (command_list_count > 1) { - commands_ptr = command_list_queue.ptr() + 1; - commands_to_submit = command_list_count - 1; - } - } else { - commands_ptr = command_list_queue.ptr(); - commands_to_submit = command_list_count; - } - - md.queue->ExecuteCommandLists(commands_to_submit, commands_ptr); - - command_list_queue[0] = nullptr; - command_list_count = 1; - - for (KeyValue<int, Window> &E : windows) { - Window *w = &E.value; - - if (!w->swapchain) { - continue; - } - HRESULT res = w->swapchain->Present(w->sync_interval, w->present_flags); - if (!SUCCEEDED(res)) { - print_verbose("D3D12: Presenting swapchain of window " + itos(E.key) + " failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - } - } - - md.queue->Signal(frame_fence.Get(), frame); - frame++; - - buffers_prepared = false; - return OK; -} - -void D3D12Context::resize_notify() { -} - -RenderingDevice::Capabilities D3D12Context::get_device_capabilities() const { - RenderingDevice::Capabilities c; - c.device_family = RenderingDevice::DEVICE_DIRECTX; - c.version_major = feature_level / 10; - c.version_minor = feature_level % 10; - return c; -} - -ID3D12Device *D3D12Context::get_device() { - return md.device.Get(); -} - -IDXGIAdapter *D3D12Context::get_adapter() { - return gpu.Get(); -} - -int D3D12Context::get_swapchain_image_count() const { - return IMAGE_COUNT; -} - -DXGI_FORMAT D3D12Context::get_screen_format() const { - return format; -} - -const D3D12Context::DeviceLimits &D3D12Context::get_device_limits() const { - return gpu_limits; -} - -RID D3D12Context::local_device_create() { - LocalDevice ld; - _create_device(ld); - ld.driver = memnew(RenderingDeviceDriverD3D12(this, ld.device.Get(), 1)); - return local_device_owner.make_rid(ld); -} - -void D3D12Context::local_device_push_command_buffers(RID p_local_device, const RDD::CommandBufferID *p_buffers, int p_count) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - ERR_FAIL_COND(ld->waiting); - - ld->queue->ExecuteCommandLists(p_count, (ID3D12CommandList *const *)p_buffers); - - ld->waiting = true; -} - -void D3D12Context::local_device_sync(RID p_local_device) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - ERR_FAIL_COND(!ld->waiting); - - ld->fence_value++; - ld->queue->Signal(ld->fence.Get(), ld->fence_value); - ld->fence->SetEventOnCompletion(ld->fence_value, ld->fence_event); - WaitForSingleObjectEx(ld->fence_event, INFINITE, FALSE); -#ifdef PIX_ENABLED - PIXNotifyWakeFromFenceSignal(ld->fence_event); -#endif - - ld->waiting = false; -} - -void D3D12Context::local_device_free(RID p_local_device) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - memdelete(ld->driver); - CloseHandle(ld->fence_event); - local_device_owner.free(p_local_device); -} - -void D3D12Context::command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { -#ifdef PIX_ENABLED - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - PIXBeginEvent(cmd_buf_info->cmd_list.Get(), p_color.to_argb32(), p_label_name.utf8().get_data()); -#endif -} - -void D3D12Context::command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { -#ifdef PIX_ENABLED - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - PIXSetMarker(cmd_buf_info->cmd_list.Get(), p_color.to_argb32(), p_label_name.utf8().get_data()); -#endif -} - -void D3D12Context::command_end_label(RDD::CommandBufferID p_command_buffer) { -#ifdef PIX_ENABLED - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - PIXEndEvent(cmd_buf_info->cmd_list.Get()); -#endif -} - -void D3D12Context::set_object_name(ID3D12Object *p_object, String p_object_name) { - ERR_FAIL_NULL(p_object); - int name_len = p_object_name.size(); - WCHAR *name_w = (WCHAR *)alloca(sizeof(WCHAR) * (name_len + 1)); - MultiByteToWideChar(CP_UTF8, 0, p_object_name.utf8().get_data(), -1, name_w, name_len); - p_object->SetName(name_w); -} - -String D3D12Context::get_device_vendor_name() const { - return adapter_vendor; -} -String D3D12Context::get_device_name() const { - return adapter_name; -} - -RenderingDevice::DeviceType D3D12Context::get_device_type() const { - return adapter_type; -} - -String D3D12Context::get_device_api_version() const { - return vformat("%d_%d", feature_level / 10, feature_level % 10); -} - -String D3D12Context::get_device_pipeline_cache_uuid() const { - return pipeline_cache_id; -} - -DisplayServer::VSyncMode D3D12Context::get_vsync_mode(DisplayServer::WindowID p_window) const { - ERR_FAIL_COND_V_MSG(!windows.has(p_window), DisplayServer::VSYNC_ENABLED, "Could not get V-Sync mode for window with WindowID " + itos(p_window) + " because it does not exist."); - return windows[p_window].vsync_mode; -} - -void D3D12Context::set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_mode) { - ERR_FAIL_COND_MSG(!windows.has(p_window), "Could not set V-Sync mode for window with WindowID " + itos(p_window) + " because it does not exist."); - windows[p_window].vsync_mode = p_mode; - _update_swap_chain(&windows[p_window]); -} - -RenderingDeviceDriver *D3D12Context::get_driver(RID p_local_device) { - if (p_local_device.is_valid()) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - ERR_FAIL_NULL_V(ld, nullptr); - return ld->driver; - } else { - return md.driver; - } -} - -D3D12Context::D3D12Context() { - command_list_queue.resize(1); // First one is always the setup command. - command_list_queue[0] = nullptr; - - CharString cs = Engine::get_singleton()->get_architecture_name().ascii(); - memcpy(godot_nir_arch_name, (const char *)cs.get_data(), cs.size()); -} - -D3D12Context::~D3D12Context() { - if (md.fence_event) { - CloseHandle(md.fence_event); - } - if (frame_fence_event) { - CloseHandle(frame_fence_event); - } -} diff --git a/drivers/d3d12/d3d12_context.h b/drivers/d3d12/d3d12_context.h deleted file mode 100644 index ec4bc832b6..0000000000 --- a/drivers/d3d12/d3d12_context.h +++ /dev/null @@ -1,261 +0,0 @@ -/**************************************************************************/ -/* d3d12_context.h */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#ifndef D3D12_CONTEXT_H -#define D3D12_CONTEXT_H - -#include "core/error/error_list.h" -#include "core/os/mutex.h" -#include "core/string/ustring.h" -#include "core/templates/rid_owner.h" -#include "rendering_device_driver_d3d12.h" -#include "servers/display_server.h" -#include "servers/rendering/renderer_rd/api_context_rd.h" - -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" -#pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wswitch" -#pragma GCC diagnostic ignored "-Wmissing-field-initializers" -#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" -#endif - -#if defined(AS) -#undef AS -#endif - -#include "d3dx12.h" -#include <dxgi1_6.h> - -#include <wrl/client.h> - -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - -using Microsoft::WRL::ComPtr; - -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - -class D3D12Context : public ApiContextRD { -public: - struct DeviceLimits { - uint64_t max_srvs_per_shader_stage; - uint64_t max_cbvs_per_shader_stage; - uint64_t max_samplers_across_all_stages; - uint64_t max_uavs_across_all_stages; - uint64_t timestamp_frequency; - }; - - struct SubgroupCapabilities { - uint32_t size; - bool wave_ops_supported; - uint32_t supported_stages_flags_rd() const; - uint32_t supported_operations_flags_rd() const; - }; - - struct VRSCapabilities { - bool draw_call_supported; // We can specify our fragment rate on a draw call level. - bool primitive_supported; // We can specify our fragment rate on each drawcall. - bool primitive_in_multiviewport; - bool ss_image_supported; // We can provide a density map attachment on our framebuffer. - uint32_t ss_image_tile_size; - bool additional_rates_supported; - }; - - struct ShaderCapabilities { - D3D_SHADER_MODEL shader_model; - bool native_16bit_ops; - }; - - struct StorageBufferCapabilities { - bool storage_buffer_16_bit_access_is_supported; - }; - - struct FormatCapabilities { - bool relaxed_casting_supported; - }; - -private: - enum { - FRAME_LAG = 2, - IMAGE_COUNT = FRAME_LAG + 1, - }; - - ComPtr<IDXGIFactory2> dxgi_factory; - ComPtr<IDXGIAdapter> gpu; - DeviceLimits gpu_limits = {}; - struct DeviceBasics { - ComPtr<ID3D12Device> device; - ComPtr<ID3D12CommandQueue> queue; - ComPtr<ID3D12Fence> fence; - HANDLE fence_event = nullptr; - UINT64 fence_value = 0; - RenderingDeviceDriverD3D12 *driver = nullptr; - } md; // 'Main device', as opposed to local device. - - uint32_t feature_level = 0; // Major * 10 + minor. - bool tearing_supported = false; - SubgroupCapabilities subgroup_capabilities; - RDD::MultiviewCapabilities multiview_capabilities; - VRSCapabilities vrs_capabilities; - ShaderCapabilities shader_capabilities; - StorageBufferCapabilities storage_buffer_capabilities; - FormatCapabilities format_capabilities; - - String adapter_vendor; - String adapter_name; - RenderingDevice::DeviceType adapter_type = {}; - String pipeline_cache_id; - - bool buffers_prepared = false; - - DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; - uint32_t frame = 0; - ComPtr<ID3D12Fence> frame_fence; - HANDLE frame_fence_event = nullptr; - - struct Window { - HWND hwnd = nullptr; - ComPtr<IDXGISwapChain3> swapchain; - UINT swapchain_flags = 0; - UINT sync_interval = 1; - UINT present_flags = 0; - ComPtr<ID3D12Resource> render_targets[IMAGE_COUNT]; - uint32_t current_buffer = 0; - int width = 0; - int height = 0; - DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED; - RenderingDeviceDriverD3D12::RenderPassInfo render_pass; - RenderingDeviceDriverD3D12::FramebufferInfo framebuffers[IMAGE_COUNT]; - }; - - struct LocalDevice : public DeviceBasics { - bool waiting = false; - HANDLE fence_event = nullptr; - UINT64 fence_value = 0; - }; - - RID_Owner<LocalDevice, true> local_device_owner; - - HashMap<DisplayServer::WindowID, Window> windows; - - // Commands. - - LocalVector<ID3D12CommandList *> command_list_queue; - uint32_t command_list_count = 1; - - static void _debug_message_func( - D3D12_MESSAGE_CATEGORY p_category, - D3D12_MESSAGE_SEVERITY p_severity, - D3D12_MESSAGE_ID p_id, - LPCSTR p_description, - void *p_context); - - Error _initialize_debug_layers(); - - Error _select_adapter(int &r_index); - void _dump_adapter_info(int p_index); - Error _create_device(DeviceBasics &r_basics); - Error _get_device_limits(); - Error _check_capabilities(); - - Error _update_swap_chain(Window *window); - - void _wait_for_idle_queue(ID3D12CommandQueue *p_queue); - -protected: - virtual bool _use_validation_layers(); - -public: - virtual const char *get_api_name() const override final { return "D3D12"; }; - virtual RenderingDevice::Capabilities get_device_capabilities() const override final; - const SubgroupCapabilities &get_subgroup_capabilities() const { return subgroup_capabilities; }; - virtual const RDD::MultiviewCapabilities &get_multiview_capabilities() const override final { return multiview_capabilities; }; - const VRSCapabilities &get_vrs_capabilities() const { return vrs_capabilities; }; - const ShaderCapabilities &get_shader_capabilities() const { return shader_capabilities; }; - const StorageBufferCapabilities &get_storage_buffer_capabilities() const { return storage_buffer_capabilities; }; - const FormatCapabilities &get_format_capabilities() const { return format_capabilities; }; - - ID3D12Device *get_device(); - IDXGIAdapter *get_adapter(); - virtual int get_swapchain_image_count() const override final; - - struct WindowPlatformData { - HWND window; - }; - virtual Error window_create(DisplayServer::WindowID p_window_id, DisplayServer::VSyncMode p_vsync_mode, int p_width, int p_height, const void *p_platform_data) override final; - virtual void window_resize(DisplayServer::WindowID p_window_id, int p_width, int p_height) override final; - virtual int window_get_width(DisplayServer::WindowID p_window = 0) override final; - virtual int window_get_height(DisplayServer::WindowID p_window = 0) override final; - virtual bool window_is_valid_swapchain(DisplayServer::WindowID p_window = 0) override final; - virtual void window_destroy(DisplayServer::WindowID p_window_id) override final; - virtual RDD::RenderPassID window_get_render_pass(DisplayServer::WindowID p_window = 0) override final; - virtual RDD::FramebufferID window_get_framebuffer(DisplayServer::WindowID p_window = 0) override final; - - virtual RID local_device_create() override final; - virtual void local_device_push_command_buffers(RID p_local_device, const RDD::CommandBufferID *p_buffers, int p_count) override final; - virtual void local_device_sync(RID p_local_device) override final; - virtual void local_device_free(RID p_local_device) override final; - - DXGI_FORMAT get_screen_format() const; - const DeviceLimits &get_device_limits() const; - - virtual void set_setup_buffer(RDD::CommandBufferID p_command_buffer) override final; - virtual void append_command_buffer(RDD::CommandBufferID p_command_buffer) override final; - void resize_notify(); - virtual void flush(bool p_flush_setup = false, bool p_flush_pending = false) override final; - virtual Error prepare_buffers(RDD::CommandBufferID p_command_buffer) override final; - virtual void postpare_buffers(RDD::CommandBufferID p_command_buffer) override final; - virtual Error swap_buffers() override final; - virtual Error initialize() override final; - - virtual void command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_end_label(RDD::CommandBufferID p_command_buffer) override final; - void set_object_name(ID3D12Object *p_object, String p_object_name); - - virtual String get_device_vendor_name() const override final; - virtual String get_device_name() const override final; - virtual RDD::DeviceType get_device_type() const override final; - virtual String get_device_api_version() const override final; - virtual String get_device_pipeline_cache_uuid() const override final; - - virtual void set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_mode) override final; - virtual DisplayServer::VSyncMode get_vsync_mode(DisplayServer::WindowID p_window = 0) const override final; - - virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) override final; - - D3D12Context(); - virtual ~D3D12Context(); -}; - -#endif // D3D12_CONTEXT_H diff --git a/drivers/d3d12/d3d12ma.cpp b/drivers/d3d12/d3d12ma.cpp index 571ec952e7..ad7b4e570d 100644 --- a/drivers/d3d12/d3d12ma.cpp +++ b/drivers/d3d12/d3d12ma.cpp @@ -28,7 +28,7 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /**************************************************************************/ -#include "d3d12_context.h" +#include "rendering_context_driver_d3d12.h" #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push diff --git a/drivers/d3d12/rendering_context_driver_d3d12.cpp b/drivers/d3d12/rendering_context_driver_d3d12.cpp new file mode 100644 index 0000000000..ad3b793305 --- /dev/null +++ b/drivers/d3d12/rendering_context_driver_d3d12.cpp @@ -0,0 +1,313 @@ +/**************************************************************************/ +/* rendering_context_driver_d3d12.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_context_driver_d3d12.h" + +#include "core/config/engine.h" +#include "core/config/project_settings.h" +#include "core/string/ustring.h" +#include "core/templates/local_vector.h" +#include "core/version.h" +#include "servers/rendering/rendering_device.h" + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wswitch" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#include "dxcapi.h" + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#if !defined(_MSC_VER) +#include <guiddef.h> + +#include <dxguids.h> +#endif + +// Note: symbols are not available in MinGW and old MSVC import libraries. +const CLSID CLSID_D3D12DeviceFactoryGodot = __uuidof(ID3D12DeviceFactory); +const CLSID CLSID_D3D12DebugGodot = __uuidof(ID3D12Debug); +const CLSID CLSID_D3D12SDKConfigurationGodot = __uuidof(ID3D12SDKConfiguration); + +extern "C" { +char godot_nir_arch_name[32]; +} + +#ifdef PIX_ENABLED +#if defined(__GNUC__) +#define _MSC_VER 1800 +#endif +#define USE_PIX +#include "WinPixEventRuntime/pix3.h" +#if defined(__GNUC__) +#undef _MSC_VER +#endif +#endif + +RenderingContextDriverD3D12::RenderingContextDriverD3D12() { + CharString cs = Engine::get_singleton()->get_architecture_name().ascii(); + memcpy(godot_nir_arch_name, (const char *)cs.get_data(), cs.size()); +} + +RenderingContextDriverD3D12::~RenderingContextDriverD3D12() { +} + +Error RenderingContextDriverD3D12::_init_device_factory() { + uint32_t agility_sdk_version = GLOBAL_GET("rendering/rendering_device/d3d12/agility_sdk_version"); + String agility_sdk_path = String(".\\") + Engine::get_singleton()->get_architecture_name(); + + // Note: symbol is not available in MinGW import library. + PFN_D3D12_GET_INTERFACE d3d_D3D12GetInterface = (PFN_D3D12_GET_INTERFACE)GetProcAddress(LoadLibraryW(L"D3D12.dll"), "D3D12GetInterface"); + if (d3d_D3D12GetInterface == nullptr) { + // FIXME: Is it intended for this to silently return when it fails to find the symbol? + return OK; + } + + ID3D12SDKConfiguration *sdk_config = nullptr; + if (SUCCEEDED(d3d_D3D12GetInterface(CLSID_D3D12SDKConfigurationGodot, IID_PPV_ARGS(&sdk_config)))) { + ID3D12SDKConfiguration1 *sdk_config1 = nullptr; + if (SUCCEEDED(sdk_config->QueryInterface(&sdk_config1))) { + if (SUCCEEDED(sdk_config1->CreateDeviceFactory(agility_sdk_version, agility_sdk_path.ascii().get_data(), IID_PPV_ARGS(device_factory.GetAddressOf())))) { + d3d_D3D12GetInterface(CLSID_D3D12DeviceFactoryGodot, IID_PPV_ARGS(device_factory.GetAddressOf())); + } else if (SUCCEEDED(sdk_config1->CreateDeviceFactory(agility_sdk_version, ".\\", IID_PPV_ARGS(device_factory.GetAddressOf())))) { + d3d_D3D12GetInterface(CLSID_D3D12DeviceFactoryGodot, IID_PPV_ARGS(device_factory.GetAddressOf())); + } + sdk_config1->Release(); + } + sdk_config->Release(); + } + + return OK; +} + +Error RenderingContextDriverD3D12::_initialize_debug_layers() { + ComPtr<ID3D12Debug> debug_controller; + HRESULT res; + if (device_factory) { + res = device_factory->GetConfigurationInterface(CLSID_D3D12DebugGodot, IID_PPV_ARGS(&debug_controller)); + } else { + res = D3D12GetDebugInterface(IID_PPV_ARGS(&debug_controller)); + } + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_QUERY_FAILED); + debug_controller->EnableDebugLayer(); + return OK; +} + +Error RenderingContextDriverD3D12::_initialize_devices() { + const UINT dxgi_factory_flags = use_validation_layers() ? DXGI_CREATE_FACTORY_DEBUG : 0; + HRESULT res = CreateDXGIFactory2(dxgi_factory_flags, IID_PPV_ARGS(&dxgi_factory)); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + + // Enumerate all possible adapters. + LocalVector<IDXGIAdapter1 *> adapters; + IDXGIAdapter1 *adapter = nullptr; + do { + adapter = create_adapter(adapters.size()); + if (adapter != nullptr) { + adapters.push_back(adapter); + } + } while (adapter != nullptr); + + ERR_FAIL_COND_V_MSG(adapters.is_empty(), ERR_CANT_CREATE, "Adapters enumeration reported zero accessible devices."); + + // Fill the device descriptions with the adapters. + driver_devices.resize(adapters.size()); + for (uint32_t i = 0; i < adapters.size(); ++i) { + DXGI_ADAPTER_DESC1 desc = {}; + adapters[i]->GetDesc1(&desc); + + Device &device = driver_devices[i]; + device.name = desc.Description; + device.vendor = Vendor(desc.VendorId); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + device.type = DEVICE_TYPE_CPU; + } else { + const bool has_dedicated_vram = desc.DedicatedVideoMemory > 0; + device.type = has_dedicated_vram ? DEVICE_TYPE_DISCRETE_GPU : DEVICE_TYPE_INTEGRATED_GPU; + } + } + + // Release all created adapters. + for (uint32_t i = 0; i < adapters.size(); ++i) { + adapters[i]->Release(); + } + + ComPtr<IDXGIFactory5> factory_5; + dxgi_factory.As(&factory_5); + if (factory_5 != nullptr) { + // The type is important as in general, sizeof(bool) != sizeof(BOOL). + BOOL feature_supported = FALSE; + res = factory_5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &feature_supported, sizeof(feature_supported)); + if (SUCCEEDED(res)) { + tearing_supported = feature_supported; + } else { + ERR_PRINT("CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + } + } + + return OK; +} + +bool RenderingContextDriverD3D12::use_validation_layers() const { + return Engine::get_singleton()->is_validation_layers_enabled(); +} + +Error RenderingContextDriverD3D12::initialize() { + Error err = _init_device_factory(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + if (use_validation_layers()) { + err = _initialize_debug_layers(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + } + + err = _initialize_devices(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + return OK; +} + +const RenderingContextDriver::Device &RenderingContextDriverD3D12::device_get(uint32_t p_device_index) const { + DEV_ASSERT(p_device_index < driver_devices.size()); + return driver_devices[p_device_index]; +} + +uint32_t RenderingContextDriverD3D12::device_get_count() const { + return driver_devices.size(); +} + +bool RenderingContextDriverD3D12::device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const { + // All devices should support presenting to any surface. + return true; +} + +RenderingDeviceDriver *RenderingContextDriverD3D12::driver_create() { + return memnew(RenderingDeviceDriverD3D12(this)); +} + +void RenderingContextDriverD3D12::driver_free(RenderingDeviceDriver *p_driver) { + memdelete(p_driver); +} + +RenderingContextDriver::SurfaceID RenderingContextDriverD3D12::surface_create(const void *p_platform_data) { + const WindowPlatformData *wpd = (const WindowPlatformData *)(p_platform_data); + Surface *surface = memnew(Surface); + surface->hwnd = wpd->window; + return SurfaceID(surface); +} + +void RenderingContextDriverD3D12::surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) { + Surface *surface = (Surface *)(p_surface); + surface->width = p_width; + surface->height = p_height; + surface->needs_resize = true; +} + +void RenderingContextDriverD3D12::surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) { + Surface *surface = (Surface *)(p_surface); + surface->vsync_mode = p_vsync_mode; + surface->needs_resize = true; +} + +DisplayServer::VSyncMode RenderingContextDriverD3D12::surface_get_vsync_mode(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->vsync_mode; +} + +uint32_t RenderingContextDriverD3D12::surface_get_width(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->width; +} + +uint32_t RenderingContextDriverD3D12::surface_get_height(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->height; +} + +void RenderingContextDriverD3D12::surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) { + Surface *surface = (Surface *)(p_surface); + surface->needs_resize = p_needs_resize; +} + +bool RenderingContextDriverD3D12::surface_get_needs_resize(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->needs_resize; +} + +void RenderingContextDriverD3D12::surface_destroy(SurfaceID p_surface) { + Surface *surface = (Surface *)(p_surface); + memdelete(surface); +} + +bool RenderingContextDriverD3D12::is_debug_utils_enabled() const { +#ifdef PIX_ENABLED + return true; +#else + return false; +#endif +} + +IDXGIAdapter1 *RenderingContextDriverD3D12::create_adapter(uint32_t p_adapter_index) const { + ComPtr<IDXGIFactory6> factory_6; + dxgi_factory.As(&factory_6); + + // TODO: Use IDXCoreAdapterList, which gives more comprehensive information. + IDXGIAdapter1 *adapter = nullptr; + if (factory_6) { + if (factory_6->EnumAdapterByGpuPreference(p_adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, IID_PPV_ARGS(&adapter)) == DXGI_ERROR_NOT_FOUND) { + return nullptr; + } + } else { + if (dxgi_factory->EnumAdapters1(p_adapter_index, &adapter) == DXGI_ERROR_NOT_FOUND) { + return nullptr; + } + } + + return adapter; +} + +ID3D12DeviceFactory *RenderingContextDriverD3D12::device_factory_get() const { + return device_factory.Get(); +} + +IDXGIFactory2 *RenderingContextDriverD3D12::dxgi_factory_get() const { + return dxgi_factory.Get(); +} + +bool RenderingContextDriverD3D12::get_tearing_supported() const { + return tearing_supported; +} diff --git a/drivers/d3d12/rendering_context_driver_d3d12.h b/drivers/d3d12/rendering_context_driver_d3d12.h new file mode 100644 index 0000000000..694d0b3e4c --- /dev/null +++ b/drivers/d3d12/rendering_context_driver_d3d12.h @@ -0,0 +1,120 @@ +/**************************************************************************/ +/* rendering_context_driver_d3d12.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef RENDERING_CONTEXT_DRIVER_D3D12_H +#define RENDERING_CONTEXT_DRIVER_D3D12_H + +#include "core/error/error_list.h" +#include "core/os/mutex.h" +#include "core/string/ustring.h" +#include "core/templates/rid_owner.h" +#include "rendering_device_driver_d3d12.h" +#include "servers/display_server.h" +#include "servers/rendering/rendering_context_driver.h" + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wswitch" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#endif + +#if defined(AS) +#undef AS +#endif + +#include "d3dx12.h" +#include <dxgi1_6.h> + +#include <wrl/client.h> + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + +using Microsoft::WRL::ComPtr; + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +class RenderingContextDriverD3D12 : public RenderingContextDriver { + ComPtr<ID3D12DeviceFactory> device_factory; + ComPtr<IDXGIFactory2> dxgi_factory; + TightLocalVector<Device> driver_devices; + bool tearing_supported = false; + + Error _init_device_factory(); + Error _initialize_debug_layers(); + Error _initialize_devices(); + +public: + virtual Error initialize() override; + virtual const Device &device_get(uint32_t p_device_index) const override; + virtual uint32_t device_get_count() const override; + virtual bool device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const override; + virtual RenderingDeviceDriver *driver_create() override; + virtual void driver_free(RenderingDeviceDriver *p_driver) override; + virtual SurfaceID surface_create(const void *p_platform_data) override; + virtual void surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) override; + virtual void surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) override; + virtual DisplayServer::VSyncMode surface_get_vsync_mode(SurfaceID p_surface) const override; + virtual uint32_t surface_get_width(SurfaceID p_surface) const override; + virtual uint32_t surface_get_height(SurfaceID p_surface) const override; + virtual void surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) override; + virtual bool surface_get_needs_resize(SurfaceID p_surface) const override; + virtual void surface_destroy(SurfaceID p_surface) override; + virtual bool is_debug_utils_enabled() const override; + + // Platform-specific data for the Windows embedded in this driver. + struct WindowPlatformData { + HWND window; + }; + + // D3D12-only methods. + struct Surface { + HWND hwnd = NULL; + uint32_t width = 0; + uint32_t height = 0; + DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED; + bool needs_resize = false; + }; + + IDXGIAdapter1 *create_adapter(uint32_t p_adapter_index) const; + ID3D12DeviceFactory *device_factory_get() const; + IDXGIFactory2 *dxgi_factory_get() const; + bool get_tearing_supported() const; + bool use_validation_layers() const; + + RenderingContextDriverD3D12(); + virtual ~RenderingContextDriverD3D12() override; +}; + +#endif // RENDERING_CONTEXT_DRIVER_D3D12_H diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 6a2a3c32b0..6517b4e91b 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -32,10 +32,12 @@ #include "core/config/project_settings.h" #include "core/io/marshalls.h" -#include "d3d12_context.h" -#include "d3d12_godot_nir_bridge.h" +#include "servers/rendering/rendering_device.h" #include "thirdparty/zlib/zlib.h" +#include "d3d12_godot_nir_bridge.h" +#include "rendering_context_driver_d3d12.h" + // No point in fighting warnings in Mesa. #if defined(_MSC_VER) #pragma warning(push) @@ -78,15 +80,25 @@ extern "C" { #undef UNUSED #endif +#ifdef PIX_ENABLED +#if defined(__GNUC__) +#define _MSC_VER 1800 +#endif +#define USE_PIX +#include "WinPixEventRuntime/pix3.h" +#if defined(__GNUC__) +#undef _MSC_VER +#endif +#endif + static const D3D12_RANGE VOID_RANGE = {}; -static const uint32_t ROOT_CONSTANT_SPACE = RDD::MAX_UNIFORM_SETS + 1; -static const uint32_t ROOT_CONSTANT_REGISTER = 0; -static const uint32_t RUNTIME_DATA_SPACE = RDD::MAX_UNIFORM_SETS + 2; -static const uint32_t RUNTIME_DATA_REGISTER = 0; +static const uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 1); +static const uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 2); #ifdef DEV_ENABLED //#define DEBUG_COUNT_BARRIERS +#define CUSTOM_INFO_QUEUE_ENABLED 0 #endif /*****************/ @@ -381,6 +393,91 @@ static const D3D12_COMPARISON_FUNC RD_TO_D3D12_COMPARE_OP[RD::COMPARE_OP_MAX] = D3D12_COMPARISON_FUNC_ALWAYS, }; +uint32_t RenderingDeviceDriverD3D12::SubgroupCapabilities::supported_stages_flags_rd() const { + // If there's a way to check exactly which are supported, I have yet to find it. + return ( + RenderingDevice::ShaderStage::SHADER_STAGE_FRAGMENT_BIT | + RenderingDevice::ShaderStage::SHADER_STAGE_COMPUTE_BIT); +} + +uint32_t RenderingDeviceDriverD3D12::SubgroupCapabilities::supported_operations_flags_rd() const { + if (!wave_ops_supported) { + return 0; + } else { + return ( + RenderingDevice::SubgroupOperations::SUBGROUP_BASIC_BIT | + RenderingDevice::SubgroupOperations::SUBGROUP_BALLOT_BIT | + RenderingDevice::SubgroupOperations::SUBGROUP_VOTE_BIT | + RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_BIT | + RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT | + RenderingDevice::SubgroupOperations::SUBGROUP_QUAD_BIT | + RenderingDevice::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT | + RenderingDevice::SubgroupOperations::SUBGROUP_CLUSTERED_BIT); + } +} + +void RenderingDeviceDriverD3D12::_debug_message_func(D3D12_MESSAGE_CATEGORY p_category, D3D12_MESSAGE_SEVERITY p_severity, D3D12_MESSAGE_ID p_id, LPCSTR p_description, void *p_context) { + String type_string; + switch (p_category) { + case D3D12_MESSAGE_CATEGORY_APPLICATION_DEFINED: + type_string = "APPLICATION_DEFINED"; + break; + case D3D12_MESSAGE_CATEGORY_MISCELLANEOUS: + type_string = "MISCELLANEOUS"; + break; + case D3D12_MESSAGE_CATEGORY_INITIALIZATION: + type_string = "INITIALIZATION"; + break; + case D3D12_MESSAGE_CATEGORY_CLEANUP: + type_string = "CLEANUP"; + break; + case D3D12_MESSAGE_CATEGORY_COMPILATION: + type_string = "COMPILATION"; + break; + case D3D12_MESSAGE_CATEGORY_STATE_CREATION: + type_string = "STATE_CREATION"; + break; + case D3D12_MESSAGE_CATEGORY_STATE_SETTING: + type_string = "STATE_SETTING"; + break; + case D3D12_MESSAGE_CATEGORY_STATE_GETTING: + type_string = "STATE_GETTING"; + break; + case D3D12_MESSAGE_CATEGORY_RESOURCE_MANIPULATION: + type_string = "RESOURCE_MANIPULATION"; + break; + case D3D12_MESSAGE_CATEGORY_EXECUTION: + type_string = "EXECUTION"; + break; + case D3D12_MESSAGE_CATEGORY_SHADER: + type_string = "SHADER"; + break; + } + + String error_message(type_string + + " - Message Id Number: " + String::num_int64(p_id) + + "\n\t" + p_description); + + // Convert D3D12 severity to our own log macros. + switch (p_severity) { + case D3D12_MESSAGE_SEVERITY_MESSAGE: + print_verbose(error_message); + break; + case D3D12_MESSAGE_SEVERITY_INFO: + print_line(error_message); + break; + case D3D12_MESSAGE_SEVERITY_WARNING: + WARN_PRINT(error_message); + break; + case D3D12_MESSAGE_SEVERITY_ERROR: + case D3D12_MESSAGE_SEVERITY_CORRUPTION: + ERR_PRINT(error_message); + CRASH_COND_MSG(Engine::get_singleton()->is_abort_on_gpu_errors_enabled(), + "Crashing, because abort on GPU errors is enabled."); + break; + } +} + /****************/ /**** MEMORY ****/ /****************/ @@ -435,7 +532,6 @@ static const D3D12_RESOURCE_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_RESOURCE_DIMENSIO void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state, ID3D12Resource *p_resource_override) { DEV_ASSERT(p_subresource != UINT32_MAX); // We don't support an "all-resources" command here. - DEV_ASSERT(p_new_state != D3D12_RESOURCE_STATE_COMMON); // No need to support this for now. #ifdef DEBUG_COUNT_BARRIERS uint64_t start = OS::get_singleton()->get_ticks_usec(); @@ -446,7 +542,10 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso ID3D12Resource *res_to_transition = p_resource_override ? p_resource_override : p_resource->resource; - bool redundant_transition = ((*curr_state) & p_new_state) == p_new_state; + // Transitions can be considered redundant if the current state has all the bits of the new state. + // This check does not apply to the common state however, which must resort to checking if the state is the same (0). + bool any_state_is_common = *curr_state == D3D12_RESOURCE_STATE_COMMON || p_new_state == D3D12_RESOURCE_STATE_COMMON; + bool redundant_transition = any_state_is_common ? *curr_state == p_new_state : ((*curr_state) & p_new_state) == p_new_state; if (redundant_transition) { bool just_written = *curr_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS; bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != res_barriers_batch; @@ -1039,7 +1138,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p // 1. If ID3DDevice10 is present and driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). // 2. Otherwise, fall back to an approach based on abusing aliasing, hoping for the best. [[CROSS_FAMILY_ALIASING]] if (p_format.shareable_formats.size()) { - if (context->get_format_capabilities().relaxed_casting_supported) { + if (format_capabilities.relaxed_casting_supported) { ComPtr<ID3D12Device10> device_10; device->QueryInterface(device_10.GetAddressOf()); if (device_10) { @@ -1446,24 +1545,26 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared_from_slice(Text } } break; case TEXTURE_SLICE_CUBEMAP: { - if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE) { - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); - } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || p_layer == 0) { + if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || p_layer == 0) { srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + srv_desc.TextureCube.MostDetailedMip = p_mipmap; + srv_desc.TextureCube.MipLevels = 1; DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + uav_desc.Texture2DArray.MipSlice = p_mipmap; uav_desc.Texture2DArray.FirstArraySlice = 0; uav_desc.Texture2DArray.ArraySize = 6; uav_desc.Texture2DArray.PlaneSlice = 0; } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || p_layer != 0) { srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + srv_desc.TextureCubeArray.MostDetailedMip = p_mipmap; + srv_desc.TextureCubeArray.MipLevels = 1; srv_desc.TextureCubeArray.First2DArrayFace = p_layer; srv_desc.TextureCubeArray.NumCubes = 1; srv_desc.TextureCubeArray.ResourceMinLODClamp = 0.0f; DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + uav_desc.Texture2DArray.MipSlice = p_mipmap; uav_desc.Texture2DArray.FirstArraySlice = p_layer; uav_desc.Texture2DArray.ArraySize = 6; uav_desc.Texture2DArray.PlaneSlice = 0; @@ -1486,6 +1587,8 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared_from_slice(Text uav_desc.Texture2DArray.FirstArraySlice = p_layer; uav_desc.Texture2DArray.ArraySize = p_layers; } break; + default: + break; } // Bookkeep. @@ -1570,15 +1673,19 @@ void RenderingDeviceDriverD3D12::texture_unmap(TextureID p_texture) { BitField<RDD::TextureUsageBits> RenderingDeviceDriverD3D12::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { D3D12_FEATURE_DATA_FORMAT_SUPPORT srv_rtv_support = {}; srv_rtv_support.Format = RD_TO_D3D12_FORMAT[p_format].general_format; - HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &srv_rtv_support, sizeof(srv_rtv_support)); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + if (srv_rtv_support.Format != DXGI_FORMAT_UNKNOWN) { // Some implementations (i.e., vkd3d-proton) error out instead of returning empty. + HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &srv_rtv_support, sizeof(srv_rtv_support)); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + } D3D12_FEATURE_DATA_FORMAT_SUPPORT &uav_support = srv_rtv_support; // Fine for now. D3D12_FEATURE_DATA_FORMAT_SUPPORT dsv_support = {}; dsv_support.Format = RD_TO_D3D12_FORMAT[p_format].dsv_format; - res = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &dsv_support, sizeof(dsv_support)); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + if (dsv_support.Format != DXGI_FORMAT_UNKNOWN) { // See above. + HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &dsv_support, sizeof(dsv_support)); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + } // Everything supported by default makes an all-or-nothing check easier for the caller. BitField<RDD::TextureUsageBits> supported = INT64_MAX; @@ -1761,25 +1868,173 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier( } } -/*************************/ -/**** COMMAND BUFFERS ****/ -/*************************/ +/****************/ +/**** FENCES ****/ +/****************/ + +RDD::FenceID RenderingDeviceDriverD3D12::fence_create() { + ComPtr<ID3D12Fence> d3d_fence; + HRESULT res = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(d3d_fence.GetAddressOf())); + ERR_FAIL_COND_V(!SUCCEEDED(res), FenceID()); + + HANDLE event_handle = CreateEvent(nullptr, FALSE, FALSE, nullptr); + ERR_FAIL_NULL_V(event_handle, FenceID()); + + FenceInfo *fence = memnew(FenceInfo); + fence->d3d_fence = d3d_fence; + fence->event_handle = event_handle; + return FenceID(fence); +} + +Error RenderingDeviceDriverD3D12::fence_wait(FenceID p_fence) { + FenceInfo *fence = (FenceInfo *)(p_fence.id); + DWORD res = WaitForSingleObjectEx(fence->event_handle, INFINITE, FALSE); +#ifdef PIX_ENABLED + PIXNotifyWakeFromFenceSignal(fence->event_handle); +#endif + + return (res == WAIT_FAILED) ? FAILED : OK; +} + +void RenderingDeviceDriverD3D12::fence_free(FenceID p_fence) { + FenceInfo *fence = (FenceInfo *)(p_fence.id); + CloseHandle(fence->event_handle); + memdelete(fence); +} + +/********************/ +/**** SEMAPHORES ****/ +/********************/ + +RDD::SemaphoreID RenderingDeviceDriverD3D12::semaphore_create() { + ComPtr<ID3D12Fence> d3d_fence; + HRESULT res = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(d3d_fence.GetAddressOf())); + ERR_FAIL_COND_V(!SUCCEEDED(res), SemaphoreID()); + + SemaphoreInfo *semaphore = memnew(SemaphoreInfo); + semaphore->d3d_fence = d3d_fence; + return SemaphoreID(semaphore); +} + +void RenderingDeviceDriverD3D12::semaphore_free(SemaphoreID p_semaphore) { + SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_semaphore.id); + memdelete(semaphore); +} + +/******************/ +/**** COMMANDS ****/ +/******************/ + +// ----- QUEUE FAMILY ----- + +RDD::CommandQueueFamilyID RenderingDeviceDriverD3D12::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) { + // Return the command list type encoded plus one so zero is an invalid value. + // The only ones that support presenting to a surface are direct queues. + if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT) || (p_surface != 0)) { + return CommandQueueFamilyID(D3D12_COMMAND_LIST_TYPE_DIRECT + 1); + } else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_COMPUTE_BIT)) { + return CommandQueueFamilyID(D3D12_COMMAND_LIST_TYPE_COMPUTE + 1); + } else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_TRANSFER_BIT)) { + return CommandQueueFamilyID(D3D12_COMMAND_LIST_TYPE_COPY + 1); + } else { + return CommandQueueFamilyID(); + } +} + +// ----- QUEUE ----- + +RDD::CommandQueueID RenderingDeviceDriverD3D12::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) { + ComPtr<ID3D12CommandQueue> d3d_queue; + D3D12_COMMAND_QUEUE_DESC queue_desc = {}; + queue_desc.Type = (D3D12_COMMAND_LIST_TYPE)(p_cmd_queue_family.id - 1); + HRESULT res = device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(d3d_queue.GetAddressOf())); + ERR_FAIL_COND_V(!SUCCEEDED(res), CommandQueueID()); + + CommandQueueInfo *command_queue = memnew(CommandQueueInfo); + command_queue->d3d_queue = d3d_queue; + return CommandQueueID(command_queue); +} + +Error RenderingDeviceDriverD3D12::command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) { + CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id); + for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { + const SemaphoreInfo *semaphore = (const SemaphoreInfo *)(p_wait_semaphores[i].id); + command_queue->d3d_queue->Wait(semaphore->d3d_fence.Get(), semaphore->fence_value); + } + + thread_local LocalVector<ID3D12CommandList *> command_lists; + command_lists.resize(p_cmd_buffers.size()); + for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffers[i].id); + command_lists[i] = cmd_buf_info->cmd_list.Get(); + } + + command_queue->d3d_queue->ExecuteCommandLists(command_lists.size(), command_lists.ptr()); + + for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) { + SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_signal_semaphores[i].id); + semaphore->fence_value++; + command_queue->d3d_queue->Signal(semaphore->d3d_fence.Get(), semaphore->fence_value); + } + + if (p_signal_fence) { + FenceInfo *fence = (FenceInfo *)(p_signal_fence.id); + fence->fence_value++; + command_queue->d3d_queue->Signal(fence->d3d_fence.Get(), fence->fence_value); + fence->d3d_fence->SetEventOnCompletion(fence->fence_value, fence->event_handle); + } + + return OK; +} + +Error RenderingDeviceDriverD3D12::command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) { + // D3D12 does not require waiting for the command queue's semaphores to handle presentation. + // We just present the swap chains that were specified and ignore the command queue and the semaphores. + HRESULT res; + bool any_present_failed = false; + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + res = swap_chain->d3d_swap_chain->Present(swap_chain->sync_interval, swap_chain->present_flags); + if (!SUCCEEDED(res)) { + print_verbose("D3D12: Presenting swapchain failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + any_present_failed = true; + } + } + + return any_present_failed ? FAILED : OK; +} + +void RenderingDeviceDriverD3D12::command_queue_free(CommandQueueID p_cmd_queue) { + CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id); + memdelete(command_queue); +} // ----- POOL ----- -RDD::CommandPoolID RenderingDeviceDriverD3D12::command_pool_create(CommandBufferType p_cmd_buffer_type) { - last_command_pool_id.id++; - return last_command_pool_id; +RDD::CommandPoolID RenderingDeviceDriverD3D12::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) { + CommandPoolInfo *command_pool = memnew(CommandPoolInfo); + command_pool->queue_family = p_cmd_queue_family; + command_pool->buffer_type = p_cmd_buffer_type; + return CommandPoolID(command_pool); } void RenderingDeviceDriverD3D12::command_pool_free(CommandPoolID p_cmd_pool) { - pools_command_buffers.erase(p_cmd_pool); + CommandPoolInfo *command_pool = (CommandPoolInfo *)(p_cmd_pool.id); + memdelete(command_pool); } // ----- BUFFER ----- -RDD::CommandBufferID RenderingDeviceDriverD3D12::command_buffer_create(CommandBufferType p_cmd_buffer_type, CommandPoolID p_cmd_pool) { - D3D12_COMMAND_LIST_TYPE list_type = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? D3D12_COMMAND_LIST_TYPE_DIRECT : D3D12_COMMAND_LIST_TYPE_BUNDLE; +RDD::CommandBufferID RenderingDeviceDriverD3D12::command_buffer_create(CommandPoolID p_cmd_pool) { + DEV_ASSERT(p_cmd_pool); + + const CommandPoolInfo *command_pool = (CommandPoolInfo *)(p_cmd_pool.id); + D3D12_COMMAND_LIST_TYPE list_type; + if (command_pool->buffer_type == COMMAND_BUFFER_TYPE_SECONDARY) { + list_type = D3D12_COMMAND_LIST_TYPE_BUNDLE; + } else { + list_type = D3D12_COMMAND_LIST_TYPE(command_pool->queue_family.id - 1); + } ID3D12CommandAllocator *cmd_allocator = nullptr; { @@ -1793,9 +2048,9 @@ RDD::CommandBufferID RenderingDeviceDriverD3D12::command_buffer_create(CommandBu device->QueryInterface(device_4.GetAddressOf()); HRESULT res = E_FAIL; if (device_4) { - res = device_4->CreateCommandList1(0, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS(&cmd_list)); + res = device_4->CreateCommandList1(0, list_type, D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS(&cmd_list)); } else { - res = device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, cmd_allocator, nullptr, IID_PPV_ARGS(&cmd_list)); + res = device->CreateCommandList(0, list_type, cmd_allocator, nullptr, IID_PPV_ARGS(&cmd_list)); } ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), CommandBufferID(), "CreateCommandList failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); if (!device_4) { @@ -1814,20 +2069,18 @@ RDD::CommandBufferID RenderingDeviceDriverD3D12::command_buffer_create(CommandBu bool RenderingDeviceDriverD3D12::command_buffer_begin(CommandBufferID p_cmd_buffer) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND_V(cmd_buf_info->cmd_list->GetType() != D3D12_COMMAND_LIST_TYPE_DIRECT, false); -#endif - HRESULT res = cmd_buf_info->cmd_list->Reset(cmd_buf_info->cmd_allocator.Get(), nullptr); + HRESULT res = cmd_buf_info->cmd_allocator->Reset(); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + res = cmd_buf_info->cmd_list->Reset(cmd_buf_info->cmd_allocator.Get(), nullptr); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); return true; } bool RenderingDeviceDriverD3D12::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND_V(cmd_buf_info->cmd_list->GetType() != D3D12_COMMAND_LIST_TYPE_BUNDLE, false); -#endif - HRESULT res = cmd_buf_info->cmd_list->Reset(cmd_buf_info->cmd_allocator.Get(), nullptr); + HRESULT res = cmd_buf_info->cmd_allocator->Reset(); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + res = cmd_buf_info->cmd_list->Reset(cmd_buf_info->cmd_allocator.Get(), nullptr); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); return true; } @@ -1841,22 +2094,223 @@ void RenderingDeviceDriverD3D12::command_buffer_end(CommandBufferID p_cmd_buffer cmd_buf_info->graphics_root_signature_crc = 0; cmd_buf_info->compute_pso = nullptr; cmd_buf_info->compute_root_signature_crc = 0; + cmd_buf_info->descriptor_heaps_set = false; } void RenderingDeviceDriverD3D12::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND(cmd_buf_info->cmd_list->GetType() != D3D12_COMMAND_LIST_TYPE_DIRECT); -#endif for (uint32_t i = 0; i < p_secondary_cmd_buffers.size(); i++) { const CommandBufferInfo *secondary_cb_info = (const CommandBufferInfo *)p_secondary_cmd_buffers[i].id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND(secondary_cb_info->cmd_list->GetType() != D3D12_COMMAND_LIST_TYPE_BUNDLE); -#endif cmd_buf_info->cmd_list->ExecuteBundle(secondary_cb_info->cmd_list.Get()); } } +/********************/ +/**** SWAP CHAIN ****/ +/********************/ + +void RenderingDeviceDriverD3D12::_swap_chain_release(SwapChain *p_swap_chain) { + _swap_chain_release_buffers(p_swap_chain); + + p_swap_chain->d3d_swap_chain.Reset(); +} + +void RenderingDeviceDriverD3D12::_swap_chain_release_buffers(SwapChain *p_swap_chain) { + for (ID3D12Resource *render_target : p_swap_chain->render_targets) { + render_target->Release(); + } + + p_swap_chain->render_targets.clear(); + p_swap_chain->render_targets_info.clear(); + + for (RDD::FramebufferID framebuffer : p_swap_chain->framebuffers) { + framebuffer_free(framebuffer); + } + + p_swap_chain->framebuffers.clear(); +} + +RDD::SwapChainID RenderingDeviceDriverD3D12::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) { + // Create the render pass that will be used to draw to the swap chain's framebuffers. + RDD::Attachment attachment; + attachment.format = DATA_FORMAT_R8G8B8A8_UNORM; + attachment.samples = RDD::TEXTURE_SAMPLES_1; + attachment.load_op = RDD::ATTACHMENT_LOAD_OP_CLEAR; + attachment.store_op = RDD::ATTACHMENT_STORE_OP_STORE; + + RDD::Subpass subpass; + RDD::AttachmentReference color_ref; + color_ref.attachment = 0; + color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT); + subpass.color_references.push_back(color_ref); + + RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1); + ERR_FAIL_COND_V(!render_pass, SwapChainID()); + + // Create the empty swap chain until it is resized. + SwapChain *swap_chain = memnew(SwapChain); + swap_chain->surface = p_surface; + swap_chain->data_format = attachment.format; + swap_chain->render_pass = render_pass; + return SwapChainID(swap_chain); +} + +Error RenderingDeviceDriverD3D12::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) { + DEV_ASSERT(p_cmd_queue.id != 0); + DEV_ASSERT(p_swap_chain.id != 0); + + CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id); + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + RenderingContextDriverD3D12::Surface *surface = (RenderingContextDriverD3D12::Surface *)(swap_chain->surface); + if (surface->width == 0 || surface->height == 0) { + // Very likely the window is minimized, don't create a swap chain. + return ERR_SKIP; + } + + HRESULT res; + const bool is_tearing_supported = context_driver->get_tearing_supported(); + UINT sync_interval = 0; + UINT present_flags = 0; + UINT creation_flags = 0; + switch (surface->vsync_mode) { + case DisplayServer::VSYNC_MAILBOX: { + sync_interval = 1; + present_flags = DXGI_PRESENT_RESTART; + } break; + case DisplayServer::VSYNC_ENABLED: { + sync_interval = 1; + present_flags = 0; + } break; + case DisplayServer::VSYNC_DISABLED: { + sync_interval = 0; + present_flags = is_tearing_supported ? DXGI_PRESENT_ALLOW_TEARING : 0; + creation_flags = is_tearing_supported ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0; + } break; + case DisplayServer::VSYNC_ADAPTIVE: // Unsupported. + default: + sync_interval = 1; + present_flags = 0; + break; + } + + print_verbose("Using swap chain flags: " + itos(creation_flags) + ", sync interval: " + itos(sync_interval) + ", present flags: " + itos(present_flags)); + + if (swap_chain->d3d_swap_chain != nullptr && creation_flags != swap_chain->creation_flags) { + // The swap chain must be recreated if the creation flags are different. + _swap_chain_release(swap_chain); + } + + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; + if (swap_chain->d3d_swap_chain != nullptr) { + _swap_chain_release_buffers(swap_chain); + res = swap_chain->d3d_swap_chain->ResizeBuffers(p_desired_framebuffer_count, 0, 0, DXGI_FORMAT_UNKNOWN, creation_flags); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_UNAVAILABLE); + } else { + swap_chain_desc.BufferCount = p_desired_framebuffer_count; + swap_chain_desc.Format = RD_TO_D3D12_FORMAT[swap_chain->data_format].general_format; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.Flags = creation_flags; + swap_chain_desc.Scaling = DXGI_SCALING_NONE; + + ComPtr<IDXGISwapChain1> swap_chain_1; + res = context_driver->dxgi_factory_get()->CreateSwapChainForHwnd(command_queue->d3d_queue.Get(), surface->hwnd, &swap_chain_desc, nullptr, nullptr, swap_chain_1.GetAddressOf()); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + + swap_chain_1.As(&swap_chain->d3d_swap_chain); + ERR_FAIL_NULL_V(swap_chain->d3d_swap_chain, ERR_CANT_CREATE); + + res = context_driver->dxgi_factory_get()->MakeWindowAssociation(surface->hwnd, DXGI_MWA_NO_ALT_ENTER | DXGI_MWA_NO_WINDOW_CHANGES); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + } + + res = swap_chain->d3d_swap_chain->GetDesc1(&swap_chain_desc); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + ERR_FAIL_COND_V(swap_chain_desc.BufferCount == 0, ERR_CANT_CREATE); + + surface->width = swap_chain_desc.Width; + surface->height = swap_chain_desc.Height; + + swap_chain->creation_flags = creation_flags; + swap_chain->sync_interval = sync_interval; + swap_chain->present_flags = present_flags; + + // Retrieve the render targets associated to the swap chain and recreate the framebuffers. The following code + // relies on the address of the elements remaining static when new elements are inserted, so the container must + // follow this restriction when reserving the right amount of elements beforehand. + swap_chain->render_targets.reserve(swap_chain_desc.BufferCount); + swap_chain->render_targets_info.reserve(swap_chain_desc.BufferCount); + swap_chain->framebuffers.reserve(swap_chain_desc.BufferCount); + + for (uint32_t i = 0; i < swap_chain_desc.BufferCount; i++) { + // Retrieve the resource corresponding to the swap chain's buffer. + ID3D12Resource *render_target = nullptr; + res = swap_chain->d3d_swap_chain->GetBuffer(i, IID_PPV_ARGS(&render_target)); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + swap_chain->render_targets.push_back(render_target); + + // Create texture information for the framebuffer to reference the resource. Since the states pointer must + // reference an address of the element itself, we must insert it first and then modify it. + swap_chain->render_targets_info.push_back(TextureInfo()); + TextureInfo &texture_info = swap_chain->render_targets_info[i]; + texture_info.owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_PRESENT); + texture_info.states_ptr = &texture_info.owner_info.states; + texture_info.format = swap_chain->data_format; +#if defined(_MSC_VER) || !defined(_WIN32) + texture_info.desc = CD3DX12_RESOURCE_DESC(render_target->GetDesc()); +#else + render_target->GetDesc(&texture_info.desc); +#endif + texture_info.layers = 1; + texture_info.mipmaps = 1; + texture_info.resource = render_target; + texture_info.view_descs.srv.Format = texture_info.desc.Format; + texture_info.view_descs.srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + + // Create the framebuffer for this buffer. + FramebufferID framebuffer = _framebuffer_create(swap_chain->render_pass, TextureID(&swap_chain->render_targets_info[i]), swap_chain_desc.Width, swap_chain_desc.Height, true); + ERR_FAIL_COND_V(!framebuffer, ERR_CANT_CREATE); + swap_chain->framebuffers.push_back(framebuffer); + } + + // Once everything's been created correctly, indicate the surface no longer needs to be resized. + context_driver->surface_set_needs_resize(swap_chain->surface, false); + + return OK; +} + +RDD::FramebufferID RenderingDeviceDriverD3D12::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) { + DEV_ASSERT(p_swap_chain.id != 0); + + const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id); + if (context_driver->surface_get_needs_resize(swap_chain->surface)) { + r_resize_required = true; + return FramebufferID(); + } + + const uint32_t buffer_index = swap_chain->d3d_swap_chain->GetCurrentBackBufferIndex(); + DEV_ASSERT(buffer_index < swap_chain->framebuffers.size()); + return swap_chain->framebuffers[buffer_index]; +} + +RDD::RenderPassID RenderingDeviceDriverD3D12::swap_chain_get_render_pass(SwapChainID p_swap_chain) { + const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id); + return swap_chain->render_pass; +} + +RDD::DataFormat RenderingDeviceDriverD3D12::swap_chain_get_format(SwapChainID p_swap_chain) { + const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id); + return swap_chain->data_format; +} + +void RenderingDeviceDriverD3D12::swap_chain_free(SwapChainID p_swap_chain) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + _swap_chain_release(swap_chain); + render_pass_free(swap_chain->render_pass); + memdelete(swap_chain); +} + /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -1953,9 +2407,10 @@ D3D12_DEPTH_STENCIL_VIEW_DESC RenderingDeviceDriverD3D12::_make_dsv_for_texture( return dsv_desc; } -RDD::FramebufferID RenderingDeviceDriverD3D12::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) { +RDD::FramebufferID RenderingDeviceDriverD3D12::_framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height, bool p_is_screen) { // Pre-bookkeep. FramebufferInfo *fb_info = VersatileResource::allocate<FramebufferInfo>(resources_allocator); + fb_info->is_screen = p_is_screen; const RenderPassInfo *pass_info = (const RenderPassInfo *)p_render_pass.id; @@ -1978,7 +2433,7 @@ RDD::FramebufferID RenderingDeviceDriverD3D12::framebuffer_create(RenderPassID p } if (num_color) { - Error err = fb_info->rtv_heap.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, num_color, false); + Error err = fb_info->rtv_heap.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, num_color, false); if (err) { VersatileResource::free(resources_allocator, fb_info); ERR_FAIL_V(FramebufferID()); @@ -1987,7 +2442,7 @@ RDD::FramebufferID RenderingDeviceDriverD3D12::framebuffer_create(RenderPassID p DescriptorsHeap::Walker rtv_heap_walker = fb_info->rtv_heap.make_walker(); if (num_depth_stencil) { - Error err = fb_info->dsv_heap.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, num_depth_stencil, false); + Error err = fb_info->dsv_heap.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, num_depth_stencil, false); if (err) { VersatileResource::free(resources_allocator, fb_info); ERR_FAIL_V(FramebufferID()); @@ -2039,6 +2494,10 @@ RDD::FramebufferID RenderingDeviceDriverD3D12::framebuffer_create(RenderPassID p return FramebufferID(fb_info); } +RDD::FramebufferID RenderingDeviceDriverD3D12::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) { + return _framebuffer_create(p_render_pass, p_attachments, p_width, p_height, false); +} + void RenderingDeviceDriverD3D12::framebuffer_free(FramebufferID p_framebuffer) { FramebufferInfo *fb_info = (FramebufferInfo *)p_framebuffer.id; VersatileResource::free(resources_allocator, fb_info); @@ -2069,8 +2528,6 @@ dxil_validator *RenderingDeviceDriverD3D12::_get_dxil_validator_for_current_thre #endif dxil_validator *dxil_validator = dxil_create_validator(nullptr); - CRASH_COND(!dxil_validator); - dxil_validators.insert(thread_idx, dxil_validator); return dxil_validator; } @@ -2207,6 +2664,14 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( bool RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) { dxil_validator *validator = _get_dxil_validator_for_current_thread(); + if (!validator) { + if (is_in_developer_mode()) { + return true; + } else { + OS::get_singleton()->alert("Shader validation failed: DXIL.dll was not found, and developer mode is disabled.\n\nClick OK to exit."); + CRASH_NOW(); + } + } char *err = nullptr; bool res = dxil_validate_module(validator, r_dxil_blob.ptrw(), r_dxil_blob.size(), &err); @@ -2222,7 +2687,7 @@ bool RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, } String RenderingDeviceDriverD3D12::shader_get_binary_cache_key() { - return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(context->get_shader_capabilities().shader_model); + return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model) + (is_in_developer_mode() ? "dev" : ""); } Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) { @@ -2293,9 +2758,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec nir_options.lower_base_vertex = false; dxil_spirv_runtime_conf dxil_runtime_conf = {}; - dxil_runtime_conf.runtime_data_cbv.register_space = RUNTIME_DATA_SPACE; dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER; - dxil_runtime_conf.push_constant_cbv.register_space = ROOT_CONSTANT_SPACE; dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER; dxil_runtime_conf.zero_based_vertex_instance_id = true; dxil_runtime_conf.zero_based_compute_workgroup_id = true; @@ -2413,10 +2876,10 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec DEV_ASSERT(p_dxil_type < ARRAY_SIZE(DXIL_TYPE_TO_CLASS)); ResourceClass res_class = DXIL_TYPE_TO_CLASS[p_dxil_type]; - if (p_register == ROOT_CONSTANT_REGISTER && p_space == ROOT_CONSTANT_SPACE) { + if (p_register == ROOT_CONSTANT_REGISTER && p_space == 0) { DEV_ASSERT(res_class == RES_CLASS_CBV); shader_data_in.binary_data.dxil_push_constant_stages |= (1 << shader_data_in.stage); - } else if (p_register == RUNTIME_DATA_REGISTER && p_space == RUNTIME_DATA_SPACE) { + } else if (p_register == RUNTIME_DATA_REGISTER && p_space == 0) { DEV_ASSERT(res_class == RES_CLASS_CBV); shader_data_in.binary_data.nir_runtime_data_root_param_idx = 1; // Temporary, to be determined later. } else { @@ -2494,8 +2957,11 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec nir_to_dxil_options nir_to_dxil_options = {}; nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN; - nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(context->get_shader_capabilities().shader_model); - nir_to_dxil_options.validator_version_max = dxil_get_validator_version(_get_dxil_validator_for_current_thread()); + nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(shader_capabilities.shader_model); + dxil_validator *validator = _get_dxil_validator_for_current_thread(); + if (validator) { + nir_to_dxil_options.validator_version_max = dxil_get_validator_version(validator); + } nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks; dxil_logger logger = {}; @@ -2575,7 +3041,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec push_constant.InitAsConstants( binary_data.push_constant_size / sizeof(uint32_t), ROOT_CONSTANT_REGISTER, - ROOT_CONSTANT_SPACE, + 0, stages_to_d3d12_visibility(binary_data.dxil_push_constant_stages)); root_params.push_back(push_constant); } @@ -2588,7 +3054,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec nir_runtime_data.InitAsConstants( sizeof(dxil_spirv_vertex_runtime_data) / sizeof(uint32_t), RUNTIME_DATA_REGISTER, - RUNTIME_DATA_SPACE, + 0, D3D12_SHADER_VISIBILITY_VERTEX); root_params.push_back(nir_runtime_data); } @@ -3090,14 +3556,14 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun #endif if (num_resource_descs) { - Error err = uniform_set_info->desc_heaps.resources.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, num_resource_descs, false); + Error err = uniform_set_info->desc_heaps.resources.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, num_resource_descs, false); if (err) { VersatileResource::free(resources_allocator, uniform_set_info); ERR_FAIL_V(UniformSetID()); } } if (num_sampler_descs) { - Error err = uniform_set_info->desc_heaps.samplers.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, num_sampler_descs, false); + Error err = uniform_set_info->desc_heaps.samplers.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, num_sampler_descs, false); if (err) { VersatileResource::free(resources_allocator, uniform_set_info); ERR_FAIL_V(UniformSetID()); @@ -3466,11 +3932,24 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff } } -void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, bool p_for_compute) { - if (!unlikely(segment_begun)) { - // Support out-of-frame rendering, like the boot splash screen. - begin_segment(p_cmd_buffer, frame_idx, frames_drawn); +void RenderingDeviceDriverD3D12::_command_check_descriptor_sets(CommandBufferID p_cmd_buffer) { + DEV_ASSERT(segment_begun && "Unable to use commands that rely on descriptors because a segment was never begun."); + + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + if (!cmd_buf_info->descriptor_heaps_set) { + // Set descriptor heaps for the command buffer if they haven't been set yet. + ID3D12DescriptorHeap *heaps[] = { + frames[frame_idx].desc_heaps.resources.get_heap(), + frames[frame_idx].desc_heaps.samplers.get_heap(), + }; + + cmd_buf_info->cmd_list->SetDescriptorHeaps(2, heaps); + cmd_buf_info->descriptor_heaps_set = true; } +} + +void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, bool p_for_compute) { + _command_check_descriptor_sets(p_cmd_buffer); UniformSetInfo *uniform_set_info = (UniformSetInfo *)p_uniform_set.id; const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id; @@ -3700,6 +4179,8 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd /******************/ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { + _command_check_descriptor_sets(p_cmd_buffer); + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *buf_info = (BufferInfo *)p_buffer.id; @@ -3882,6 +4363,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c frames[frame_idx].desc_heap_walkers.rtv.advance(); } else { // Clear via UAV. + _command_check_descriptor_sets(p_cmd_buffer); if (frames[frame_idx].desc_heap_walkers.resources.is_at_eof()) { if (!frames[frame_idx].desc_heaps_exhausted_reported.resources) { @@ -4187,7 +4669,6 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd } }; - // This is empty if a screen framebuffer. Transition in that case happens in D3D12Context::prepare_buffers(). for (uint32_t i = 0; i < fb_info->attachments.size(); i++) { TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id; if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { @@ -4216,16 +4697,14 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd cmd_buf_info->render_pass_state.region_rect.right == fb_info->size.x && cmd_buf_info->render_pass_state.region_rect.bottom == fb_info->size.y); - if (fb_info->is_screen) { - for (uint32_t i = 0; i < pass_info->attachments.size(); i++) { - if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_DONT_CARE) { - const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id; - _discard_texture_subresources(tex_info, cmd_buf_info); - } + for (uint32_t i = 0; i < pass_info->attachments.size(); i++) { + if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_DONT_CARE) { + const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id; + _discard_texture_subresources(tex_info, cmd_buf_info); } } - if (fb_info->vrs_attachment && context->get_vrs_capabilities().ss_image_supported) { + if (fb_info->vrs_attachment && vrs_capabilities.ss_image_supported) { ComPtr<ID3D12GraphicsCommandList5> cmd_list_5; cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf()); if (cmd_list_5) { @@ -4242,41 +4721,33 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd cmd_buf_info->render_pass_state.pass_info = pass_info; command_next_render_subpass(p_cmd_buffer, p_cmd_buffer_type); - AttachmentClear *clears = ALLOCA_ARRAY(AttachmentClear, fb_info->is_screen ? 1 : pass_info->attachments.size()); - Rect2i *clear_rects = ALLOCA_ARRAY(Rect2i, fb_info->is_screen ? 1 : pass_info->attachments.size()); + AttachmentClear *clears = ALLOCA_ARRAY(AttachmentClear, pass_info->attachments.size()); + Rect2i *clear_rects = ALLOCA_ARRAY(Rect2i, pass_info->attachments.size()); uint32_t num_clears = 0; - if (fb_info->is_screen) { - clears[0].aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT); - clears[0].color_attachment = 0; - clears[0].value = p_attachment_clears[0]; - clear_rects[0] = p_rect; - num_clears++; - } else { - for (uint32_t i = 0; i < pass_info->attachments.size(); i++) { - TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id; - if (!tex_info) { - continue; - } + for (uint32_t i = 0; i < pass_info->attachments.size(); i++) { + TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id; + if (!tex_info) { + continue; + } - AttachmentClear clear; - if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { - if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) { - clear.aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT); - clear.color_attachment = i; - } - } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { - if (pass_info->attachments[i].stencil_load_op == ATTACHMENT_LOAD_OP_CLEAR) { - clear.aspect.set_flag(TEXTURE_ASPECT_DEPTH_BIT); - } + AttachmentClear clear; + if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { + if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) { + clear.aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT); + clear.color_attachment = i; } - if (!clear.aspect.is_empty()) { - clear.value = p_attachment_clears[i]; - clears[num_clears] = clear; - clear_rects[num_clears] = p_rect; - num_clears++; + } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { + if (pass_info->attachments[i].stencil_load_op == ATTACHMENT_LOAD_OP_CLEAR) { + clear.aspect.set_flag(TEXTURE_ASPECT_DEPTH_BIT); } } + if (!clear.aspect.is_empty()) { + clear.value = p_attachment_clears[i]; + clears[num_clears] = clear; + clear_rects[num_clears] = p_rect; + num_clears++; + } } if (num_clears) { @@ -4293,6 +4764,15 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info; const Subpass &subpass = pass_info->subpasses[cmd_buf_info->render_pass_state.current_subpass]; + if (fb_info->is_screen) { + // Screen framebuffers must transition back to present state when the render pass is finished. + for (uint32_t i = 0; i < fb_info->attachments.size(); i++) { + TextureInfo *src_tex_info = (TextureInfo *)(fb_info->attachments[i].id); + uint32_t src_subresource = D3D12CalcSubresource(src_tex_info->base_mip, src_tex_info->base_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize()); + _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_PRESENT); + } + } + struct Resolve { ID3D12Resource *src_res = nullptr; uint32_t src_subres = 0; @@ -4343,7 +4823,7 @@ void RenderingDeviceDriverD3D12::command_end_render_pass(CommandBufferID p_cmd_b const FramebufferInfo *fb_info = cmd_buf_info->render_pass_state.fb_info; const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info; - if (context->get_vrs_capabilities().ss_image_supported) { + if (vrs_capabilities.ss_image_supported) { ComPtr<ID3D12GraphicsCommandList5> cmd_list_5; cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf()); if (cmd_list_5) { @@ -4351,12 +4831,10 @@ void RenderingDeviceDriverD3D12::command_end_render_pass(CommandBufferID p_cmd_b } } - if (fb_info->attachments.size()) { // Otherwise, it's screen. - for (uint32_t i = 0; i < pass_info->attachments.size(); i++) { - if (pass_info->attachments[i].store_op == ATTACHMENT_STORE_OP_DONT_CARE) { - const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id; - _discard_texture_subresources(tex_info, cmd_buf_info); - } + for (uint32_t i = 0; i < pass_info->attachments.size(); i++) { + if (pass_info->attachments[i].store_op == ATTACHMENT_STORE_OP_DONT_CARE) { + const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id; + _discard_texture_subresources(tex_info, cmd_buf_info); } } @@ -5148,7 +5626,7 @@ void RenderingDeviceDriverD3D12::timestamp_query_pool_get_results(QueryPoolID p_ } uint64_t RenderingDeviceDriverD3D12::timestamp_query_result_to_time(uint64_t p_result) { - return p_result / (double)context->get_device_limits().timestamp_frequency * 1000000000.0; + return p_result / (double)device_limits.timestamp_frequency * 1000000000.0; } void RenderingDeviceDriverD3D12::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) { @@ -5159,32 +5637,28 @@ void RenderingDeviceDriverD3D12::command_timestamp_write(CommandBufferID p_cmd_b TimestampQueryPoolInfo *tqp_info = (TimestampQueryPoolInfo *)p_pool_id.id; ID3D12Resource *results_buffer = tqp_info->results_buffer_allocation->GetResource(); cmd_buf_info->cmd_list->EndQuery(tqp_info->query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, p_index); - cmd_buf_info->cmd_list->ResolveQueryData(tqp_info->query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, p_index, tqp_info->query_count, results_buffer, p_index * sizeof(uint64_t)); + cmd_buf_info->cmd_list->ResolveQueryData(tqp_info->query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, p_index, 1, results_buffer, p_index * sizeof(uint64_t)); } -/****************/ -/**** SCREEN ****/ -/****************/ +void RenderingDeviceDriverD3D12::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { +#ifdef PIX_ENABLED + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + PIXBeginEvent(cmd_buf_info->cmd_list.Get(), p_color.to_argb32(), p_label_name); +#endif +} -RDD::DataFormat RenderingDeviceDriverD3D12::screen_get_format() { - // Very hacky, but not used often per frame, so I guess ok. - DXGI_FORMAT d3d12_format = context->get_screen_format(); - DataFormat format = DATA_FORMAT_MAX; - for (int i = 0; i < DATA_FORMAT_MAX; i++) { - if (d3d12_format == RD_TO_D3D12_FORMAT[i].general_format) { - format = DataFormat(i); - break; - } - } - ERR_FAIL_COND_V(format == DATA_FORMAT_MAX, DATA_FORMAT_MAX); - return format; +void RenderingDeviceDriverD3D12::command_end_label(CommandBufferID p_cmd_buffer) { +#ifdef PIX_ENABLED + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + PIXEndEvent(cmd_buf_info->cmd_list.Get()); +#endif } /********************/ /**** SUBMISSION ****/ /********************/ -void RenderingDeviceDriverD3D12::begin_segment(CommandBufferID p_cmd_buffer, uint32_t p_frame_index, uint32_t p_frames_drawn) { +void RenderingDeviceDriverD3D12::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) { frame_idx = p_frame_index; frames_drawn = p_frames_drawn; @@ -5195,17 +5669,9 @@ void RenderingDeviceDriverD3D12::begin_segment(CommandBufferID p_cmd_buffer, uin frames[frame_idx].desc_heap_walkers.aux.rewind(); frames[frame_idx].desc_heap_walkers.rtv.rewind(); frames[frame_idx].desc_heaps_exhausted_reported = {}; - frames[frame_idx].null_rtv_handle = { 0 }; + frames[frame_idx].null_rtv_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE{}; frames[frame_idx].segment_serial = segment_serial; - ID3D12DescriptorHeap *heaps[] = { - frames[frame_idx].desc_heaps.resources.get_heap(), - frames[frame_idx].desc_heaps.samplers.get_heap(), - }; - - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; - cmd_buf_info->cmd_list->SetDescriptorHeaps(2, heaps); - segment_begun = true; } @@ -5218,36 +5684,44 @@ void RenderingDeviceDriverD3D12::end_segment() { /**** MISC ****/ /**************/ +void RenderingDeviceDriverD3D12::_set_object_name(ID3D12Object *p_object, String p_object_name) { + ERR_FAIL_NULL(p_object); + int name_len = p_object_name.size(); + WCHAR *name_w = (WCHAR *)alloca(sizeof(WCHAR) * (name_len + 1)); + MultiByteToWideChar(CP_UTF8, 0, p_object_name.utf8().get_data(), -1, name_w, name_len); + p_object->SetName(name_w); +} + void RenderingDeviceDriverD3D12::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) { switch (p_type) { case OBJECT_TYPE_TEXTURE: { const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id; if (tex_info->owner_info.allocation) { - context->set_object_name(tex_info->resource, p_name); + _set_object_name(tex_info->resource, p_name); } } break; case OBJECT_TYPE_SAMPLER: { } break; case OBJECT_TYPE_BUFFER: { const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id; - context->set_object_name(buf_info->resource, p_name); + _set_object_name(buf_info->resource, p_name); } break; case OBJECT_TYPE_SHADER: { const ShaderInfo *shader_info_in = (const ShaderInfo *)p_driver_id.id; - context->set_object_name(shader_info_in->root_signature.Get(), p_name); + _set_object_name(shader_info_in->root_signature.Get(), p_name); } break; case OBJECT_TYPE_UNIFORM_SET: { const UniformSetInfo *uniform_set_info = (const UniformSetInfo *)p_driver_id.id; if (uniform_set_info->desc_heaps.resources.get_heap()) { - context->set_object_name(uniform_set_info->desc_heaps.resources.get_heap(), p_name + " resources heap"); + _set_object_name(uniform_set_info->desc_heaps.resources.get_heap(), p_name + " resources heap"); } if (uniform_set_info->desc_heaps.samplers.get_heap()) { - context->set_object_name(uniform_set_info->desc_heaps.samplers.get_heap(), p_name + " samplers heap"); + _set_object_name(uniform_set_info->desc_heaps.samplers.get_heap(), p_name + " samplers heap"); } } break; case OBJECT_TYPE_PIPELINE: { ID3D12PipelineState *pso = (ID3D12PipelineState *)p_driver_id.id; - context->set_object_name(pso, p_name); + _set_object_name(pso, p_name); } break; default: { DEV_ASSERT(false); @@ -5258,10 +5732,10 @@ void RenderingDeviceDriverD3D12::set_object_name(ObjectType p_type, ID p_driver_ uint64_t RenderingDeviceDriverD3D12::get_resource_native_handle(DriverResource p_type, ID p_driver_id) { switch (p_type) { case DRIVER_RESOURCE_LOGICAL_DEVICE: { - return (uint64_t)device; + return (uint64_t)device.Get(); } case DRIVER_RESOURCE_PHYSICAL_DEVICE: { - return (uint64_t)context->get_adapter(); + return (uint64_t)adapter.Get(); } case DRIVER_RESOURCE_TOPMOST_OBJECT: { return 0; @@ -5313,7 +5787,7 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { case LIMIT_MAX_BOUND_UNIFORM_SETS: return safe_unbounded; case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE: - return context->get_device_limits().max_srvs_per_shader_stage; + return device_limits.max_srvs_per_shader_stage; case LIMIT_MAX_UNIFORM_BUFFER_SIZE: return 65536; case LIMIT_MAX_VIEWPORT_DIMENSIONS_X: @@ -5335,22 +5809,15 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { // Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html), // but at this time I don't know the implications on the transpilation to DXIL, etc. case LIMIT_SUBGROUP_MIN_SIZE: - case LIMIT_SUBGROUP_MAX_SIZE: { - const D3D12Context::SubgroupCapabilities &subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_MAX_SIZE: return subgroup_capabilities.size; - } - case LIMIT_SUBGROUP_IN_SHADERS: { - const D3D12Context::SubgroupCapabilities &subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_IN_SHADERS: return subgroup_capabilities.supported_stages_flags_rd(); - } - case LIMIT_SUBGROUP_OPERATIONS: { - const D3D12Context::SubgroupCapabilities &subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_OPERATIONS: return subgroup_capabilities.supported_operations_flags_rd(); - } case LIMIT_VRS_TEXEL_WIDTH: - case LIMIT_VRS_TEXEL_HEIGHT: { - return context->get_vrs_capabilities().ss_image_tile_size; - } + case LIMIT_VRS_TEXEL_HEIGHT: + return vrs_capabilities.ss_image_tile_size; default: { #ifdef DEV_ENABLED WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + "."); @@ -5383,109 +5850,435 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: { - const RDD::MultiviewCapabilities &multiview_capabilies = context->get_multiview_capabilities(); - return multiview_capabilies.is_supported && multiview_capabilies.max_view_count > 1; - } break; - case SUPPORTS_FSR_HALF_FLOAT: { - return context->get_shader_capabilities().native_16bit_ops && context->get_storage_buffer_capabilities().storage_buffer_16_bit_access_is_supported; - } break; - case SUPPORTS_ATTACHMENT_VRS: { - const D3D12Context::VRSCapabilities &vrs_capabilities = context->get_vrs_capabilities(); + case SUPPORTS_MULTIVIEW: + return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; + case SUPPORTS_FSR_HALF_FLOAT: + return shader_capabilities.native_16bit_ops && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; + case SUPPORTS_ATTACHMENT_VRS: return vrs_capabilities.ss_image_supported; - } break; - case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: { + case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; - } break; - default: { + default: return false; - } } } const RDD::MultiviewCapabilities &RenderingDeviceDriverD3D12::get_multiview_capabilities() { - return context->get_multiview_capabilities(); + return multiview_capabilities; +} + +String RenderingDeviceDriverD3D12::get_api_name() const { + return "D3D12"; +} + +String RenderingDeviceDriverD3D12::get_api_version() const { + return vformat("%d_%d", feature_level / 10, feature_level % 10); +} + +String RenderingDeviceDriverD3D12::get_pipeline_cache_uuid() const { + return pipeline_cache_id; +} + +const RDD::Capabilities &RenderingDeviceDriverD3D12::get_capabilities() const { + return device_capabilities; } /******************/ -RenderingDeviceDriverD3D12::RenderingDeviceDriverD3D12(D3D12Context *p_context, ID3D12Device *p_device, uint32_t p_frame_count) : - context(p_context), - device(p_device) { - D3D12MA::ALLOCATOR_DESC allocator_desc = {}; - allocator_desc.pDevice = device; - allocator_desc.pAdapter = context->get_adapter(); +RenderingDeviceDriverD3D12::RenderingDeviceDriverD3D12(RenderingContextDriverD3D12 *p_context_driver) { + DEV_ASSERT(p_context_driver != nullptr); - HRESULT res = D3D12MA::CreateAllocator(&allocator_desc, &allocator); - ERR_FAIL_COND_MSG(!SUCCEEDED(res), "D3D12MA::CreateAllocator failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + this->context_driver = p_context_driver; +} +RenderingDeviceDriverD3D12::~RenderingDeviceDriverD3D12() { { - uint32_t resource_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame"); - uint32_t sampler_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame"); - uint32_t misc_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame"); - - frames.resize(p_frame_count); - for (uint32_t i = 0; i < frames.size(); i++) { - Error err = frames[i].desc_heaps.resources.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, resource_descriptors_per_frame, true); - ERR_FAIL_COND_MSG(err, "Creating the frame's RESOURCE descriptors heap failed."); - err = frames[i].desc_heaps.samplers.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sampler_descriptors_per_frame, true); - ERR_FAIL_COND_MSG(err, "Creating the frame's SAMPLER descriptors heap failed."); - err = frames[i].desc_heaps.aux.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, misc_descriptors_per_frame, false); - ERR_FAIL_COND_MSG(err, "Creating the frame's AUX descriptors heap failed."); - err = frames[i].desc_heaps.rtv.allocate(device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, misc_descriptors_per_frame, false); - ERR_FAIL_COND_MSG(err, "Creating the frame's RENDER TARGET descriptors heap failed."); - - frames[i].desc_heap_walkers.resources = frames[i].desc_heaps.resources.make_walker(); - frames[i].desc_heap_walkers.samplers = frames[i].desc_heaps.samplers.make_walker(); - frames[i].desc_heap_walkers.aux = frames[i].desc_heaps.aux.make_walker(); - frames[i].desc_heap_walkers.rtv = frames[i].desc_heaps.rtv.make_walker(); + MutexLock lock(dxil_mutex); + for (const KeyValue<int, dxil_validator *> &E : dxil_validators) { + if (E.value) { + dxil_destroy_validator(E.value); + } + } + } - { - D3D12MA::ALLOCATION_DESC allocation_desc = {}; - allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + glsl_type_singleton_decref(); +} - CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); +bool RenderingDeviceDriverD3D12::is_in_developer_mode() { + HKEY hkey = NULL; + LSTATUS result = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &hkey); + if (result != ERROR_SUCCESS) { + return false; + } - ID3D12Resource *resource = nullptr; - res = allocator->CreateResource( - &allocation_desc, - &resource_desc, - D3D12_RESOURCE_STATE_COMMON, - nullptr, - &frames[frame_idx].aux_resource, - IID_PPV_ARGS(&resource)); - ERR_FAIL_COND_MSG(!SUCCEEDED(res), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + DWORD value = 0; + DWORD dword_size = sizeof(DWORD); + result = RegQueryValueExW(hkey, L"AllowDevelopmentWithoutDevLicense", nullptr, nullptr, (PBYTE)&value, &dword_size); + RegCloseKey(hkey); + + if (result != ERROR_SUCCESS) { + return false; + } + + return (value != 0); +} + +Error RenderingDeviceDriverD3D12::_initialize_device() { + HRESULT res; + + if (is_in_developer_mode()) { + UUID experimental_features[] = { D3D12ExperimentalShaderModels }; + D3D12EnableExperimentalFeatures(1, experimental_features, nullptr, nullptr); + } + + ID3D12DeviceFactory *device_factory = context_driver->device_factory_get(); + if (device_factory != nullptr) { + res = device_factory->CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device.GetAddressOf())); + } else { + res = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device.GetAddressOf())); + } + + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12CreateDevice failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + + if (context_driver->use_validation_layers()) { + ComPtr<ID3D12InfoQueue> info_queue; + res = device.As(&info_queue); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + +#if CUSTOM_INFO_QUEUE_ENABLED + ComPtr<ID3D12InfoQueue1> info_queue_1; + device.As(&info_queue_1); + if (info_queue_1) { + // Custom printing supported (added in Windows 10 Release Preview build 20236). Even if the callback cookie is unused, it seems the + // argument is not optional and the function will fail if it's not specified. + DWORD callback_cookie; + info_queue_1->SetMuteDebugOutput(TRUE); + res = info_queue_1->RegisterMessageCallback(&_debug_message_func, D3D12_MESSAGE_CALLBACK_IGNORE_FILTERS, nullptr, &callback_cookie); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + } else +#endif + { + // Rely on D3D12's own debug printing. + if (Engine::get_singleton()->is_abort_on_gpu_errors_enabled()) { + res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); } } - } - { // Create command signatures for indirect commands. - auto _create_command_signature = [&](D3D12_INDIRECT_ARGUMENT_TYPE p_type, uint32_t p_stride, ComPtr<ID3D12CommandSignature> *r_cmd_sig) { - D3D12_INDIRECT_ARGUMENT_DESC iarg_desc = {}; - iarg_desc.Type = p_type; - D3D12_COMMAND_SIGNATURE_DESC cs_desc = {}; - cs_desc.ByteStride = p_stride; - cs_desc.NumArgumentDescs = 1; - cs_desc.pArgumentDescs = &iarg_desc; - cs_desc.NodeMask = 0; - res = device->CreateCommandSignature(&cs_desc, nullptr, IID_PPV_ARGS(r_cmd_sig->GetAddressOf())); - ERR_FAIL_COND_MSG(!SUCCEEDED(res), "CreateCommandSignature failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + D3D12_MESSAGE_SEVERITY severities_to_mute[] = { + D3D12_MESSAGE_SEVERITY_INFO, }; - _create_command_signature(D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, sizeof(D3D12_DRAW_ARGUMENTS), &indirect_cmd_signatures.draw); - _create_command_signature(D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, sizeof(D3D12_DRAW_INDEXED_ARGUMENTS), &indirect_cmd_signatures.draw_indexed); - _create_command_signature(D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, sizeof(D3D12_DISPATCH_ARGUMENTS), &indirect_cmd_signatures.dispatch); + + D3D12_MESSAGE_ID messages_to_mute[] = { + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, + // These happen due to how D3D12MA manages buffers; seems benign. + D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_HAS_NO_RESOURCE, + D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS, + }; + + D3D12_INFO_QUEUE_FILTER filter = {}; + filter.DenyList.NumSeverities = ARRAY_SIZE(severities_to_mute); + filter.DenyList.pSeverityList = severities_to_mute; + filter.DenyList.NumIDs = ARRAY_SIZE(messages_to_mute); + filter.DenyList.pIDList = messages_to_mute; + + res = info_queue->PushStorageFilter(&filter); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); } - glsl_type_singleton_init_or_ref(); + return OK; } -RenderingDeviceDriverD3D12::~RenderingDeviceDriverD3D12() { - { - MutexLock lock(dxil_mutex); - for (const KeyValue<int, dxil_validator *> &E : dxil_validators) { - dxil_destroy_validator(E.value); +Error RenderingDeviceDriverD3D12::_check_capabilities() { + // Check feature levels. + const D3D_FEATURE_LEVEL FEATURE_LEVELS[] = { + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_2, + }; + + D3D12_FEATURE_DATA_FEATURE_LEVELS feat_levels = {}; + feat_levels.NumFeatureLevels = ARRAY_SIZE(FEATURE_LEVELS); + feat_levels.pFeatureLevelsRequested = FEATURE_LEVELS; + + HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &feat_levels, sizeof(feat_levels)); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_UNAVAILABLE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + + // Example: D3D_FEATURE_LEVEL_12_1 = 0xc100. + uint32_t feat_level_major = feat_levels.MaxSupportedFeatureLevel >> 12; + uint32_t feat_level_minor = (feat_levels.MaxSupportedFeatureLevel >> 16) & 0xff; + feature_level = feat_level_major * 10 + feat_level_minor; + + // Fill device capabilities. + device_capabilities.device_family = DEVICE_DIRECTX; + device_capabilities.version_major = feature_level / 10; + device_capabilities.version_minor = feature_level % 10; + + // Assume not supported until proven otherwise. + vrs_capabilities.draw_call_supported = false; + vrs_capabilities.primitive_supported = false; + vrs_capabilities.primitive_in_multiviewport = false; + vrs_capabilities.ss_image_supported = false; + vrs_capabilities.ss_image_tile_size = 1; + vrs_capabilities.additional_rates_supported = false; + multiview_capabilities.is_supported = false; + multiview_capabilities.geometry_shader_is_supported = false; + multiview_capabilities.tessellation_shader_is_supported = false; + multiview_capabilities.max_view_count = 0; + multiview_capabilities.max_instance_count = 0; + multiview_capabilities.is_supported = false; + subgroup_capabilities.size = 0; + subgroup_capabilities.wave_ops_supported = false; + shader_capabilities.shader_model = D3D_SHADER_MODEL_6_0; + shader_capabilities.native_16bit_ops = false; + storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = false; + format_capabilities.relaxed_casting_supported = false; + + // Check shader model. + D3D12_FEATURE_DATA_SHADER_MODEL shader_model = {}; + shader_model.HighestShaderModel = MIN(D3D_HIGHEST_SHADER_MODEL, D3D_SHADER_MODEL_6_6); + res = device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + + shader_capabilities.shader_model = shader_model.HighestShaderModel; + print_verbose("- Shader:"); + print_verbose(" model: " + itos(shader_capabilities.shader_model >> 4) + "." + itos(shader_capabilities.shader_model & 0xf)); + + D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); + if (SUCCEEDED(res)) { + storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = options.TypedUAVLoadAdditionalFormats; + } + + D3D12_FEATURE_DATA_D3D12_OPTIONS1 options1 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &options1, sizeof(options1)); + if (SUCCEEDED(res)) { + subgroup_capabilities.size = options1.WaveLaneCountMin; + subgroup_capabilities.wave_ops_supported = options1.WaveOps; + } + + D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)); + if (SUCCEEDED(res)) { + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_view_instancing_tier + // https://microsoft.github.io/DirectX-Specs/d3d/ViewInstancing.html#sv_viewid + if (options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_1) { + multiview_capabilities.is_supported = true; + multiview_capabilities.geometry_shader_is_supported = options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_3; + multiview_capabilities.tessellation_shader_is_supported = options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_3; + multiview_capabilities.max_view_count = D3D12_MAX_VIEW_INSTANCE_COUNT; + multiview_capabilities.max_instance_count = UINT32_MAX; } } - glsl_type_singleton_decref(); + D3D12_FEATURE_DATA_D3D12_OPTIONS4 options4 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS4, &options4, sizeof(options4)); + if (SUCCEEDED(res)) { + shader_capabilities.native_16bit_ops = options4.Native16BitShaderOpsSupported; + } + + D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)); + if (SUCCEEDED(res)) { + if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_1) { + vrs_capabilities.draw_call_supported = true; + if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_2) { + vrs_capabilities.primitive_supported = true; + vrs_capabilities.primitive_in_multiviewport = options6.PerPrimitiveShadingRateSupportedWithViewportIndexing; + vrs_capabilities.ss_image_supported = true; + vrs_capabilities.ss_image_tile_size = options6.ShadingRateImageTileSize; + vrs_capabilities.additional_rates_supported = options6.AdditionalShadingRatesSupported; + } + } + } + + D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); + if (SUCCEEDED(res)) { + format_capabilities.relaxed_casting_supported = options12.RelaxedFormatCastingSupported; + } + + if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) { + print_verbose("- D3D12 Variable Rate Shading supported:"); + if (vrs_capabilities.draw_call_supported) { + print_verbose(" Draw call"); + } + if (vrs_capabilities.primitive_supported) { + print_verbose(String(" Per-primitive (multi-viewport: ") + (vrs_capabilities.primitive_in_multiviewport ? "yes" : "no") + ")"); + } + if (vrs_capabilities.ss_image_supported) { + print_verbose(String(" Screen-space image (tile size: ") + itos(vrs_capabilities.ss_image_tile_size) + ")"); + } + if (vrs_capabilities.additional_rates_supported) { + print_verbose(String(" Additional rates: ") + (vrs_capabilities.additional_rates_supported ? "yes" : "no")); + } + } else { + print_verbose("- D3D12 Variable Rate Shading not supported"); + } + + if (multiview_capabilities.is_supported) { + print_verbose("- D3D12 multiview supported:"); + print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); + //print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); // Hardcoded; not very useful at the moment. + } else { + print_verbose("- D3D12 multiview not supported"); + } + + if (format_capabilities.relaxed_casting_supported) { + print_verbose("- Relaxed casting supported"); + } else { + print_verbose("- Relaxed casting not supported"); + } + + print_verbose(String("- D3D12 16-bit ops supported: ") + (shader_capabilities.native_16bit_ops ? "yes" : "no")); + + return OK; +} + +Error RenderingDeviceDriverD3D12::_get_device_limits() { + D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; + HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_UNAVAILABLE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + + // https://docs.microsoft.com/en-us/windows/win32/direct3d12/hardware-support + device_limits.max_srvs_per_shader_stage = options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 128 : UINT64_MAX; + device_limits.max_cbvs_per_shader_stage = options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ? 14 : UINT64_MAX; + device_limits.max_samplers_across_all_stages = options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 16 : 2048; + if (options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1) { + device_limits.max_uavs_across_all_stages = feature_level <= 110 ? 8 : 64; + } else if (options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_2) { + device_limits.max_uavs_across_all_stages = 64; + } else { + device_limits.max_uavs_across_all_stages = UINT64_MAX; + } + + // Retrieving the timestamp frequency requires creating a command queue that will be discarded immediately. + ComPtr<ID3D12CommandQueue> unused_command_queue; + D3D12_COMMAND_QUEUE_DESC queue_desc = {}; + queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + res = device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(unused_command_queue.GetAddressOf())); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + + res = unused_command_queue->GetTimestampFrequency(&device_limits.timestamp_frequency); + if (!SUCCEEDED(res)) { + print_verbose("D3D12: GetTimestampFrequency failed with error " + vformat("0x%08ux", (uint64_t)res) + ". Timestamps will be inaccurate."); + } + + return OK; +} + +Error RenderingDeviceDriverD3D12::_initialize_allocator() { + D3D12MA::ALLOCATOR_DESC allocator_desc = {}; + allocator_desc.pDevice = device.Get(); + allocator_desc.pAdapter = adapter.Get(); + + HRESULT res = D3D12MA::CreateAllocator(&allocator_desc, &allocator); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateAllocator failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + + return OK; +} + +static Error create_command_signature(ID3D12Device *device, D3D12_INDIRECT_ARGUMENT_TYPE p_type, uint32_t p_stride, ComPtr<ID3D12CommandSignature> *r_cmd_sig) { + D3D12_INDIRECT_ARGUMENT_DESC iarg_desc = {}; + iarg_desc.Type = p_type; + D3D12_COMMAND_SIGNATURE_DESC cs_desc = {}; + cs_desc.ByteStride = p_stride; + cs_desc.NumArgumentDescs = 1; + cs_desc.pArgumentDescs = &iarg_desc; + cs_desc.NodeMask = 0; + HRESULT res = device->CreateCommandSignature(&cs_desc, nullptr, IID_PPV_ARGS(r_cmd_sig->GetAddressOf())); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CreateCommandSignature failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + return OK; +}; + +Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) { + Error err; + D3D12MA::ALLOCATION_DESC allocation_desc = {}; + allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + + CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + uint32_t resource_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame"); + uint32_t sampler_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame"); + uint32_t misc_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame"); + + frames.resize(p_frame_count); + for (uint32_t i = 0; i < frames.size(); i++) { + err = frames[i].desc_heaps.resources.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, resource_descriptors_per_frame, true); + ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's RESOURCE descriptors heap failed."); + + err = frames[i].desc_heaps.samplers.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sampler_descriptors_per_frame, true); + ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's SAMPLER descriptors heap failed."); + + err = frames[i].desc_heaps.aux.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, misc_descriptors_per_frame, false); + ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's AUX descriptors heap failed."); + + err = frames[i].desc_heaps.rtv.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, misc_descriptors_per_frame, false); + ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's RENDER TARGET descriptors heap failed."); + + frames[i].desc_heap_walkers.resources = frames[i].desc_heaps.resources.make_walker(); + frames[i].desc_heap_walkers.samplers = frames[i].desc_heaps.samplers.make_walker(); + frames[i].desc_heap_walkers.aux = frames[i].desc_heaps.aux.make_walker(); + frames[i].desc_heap_walkers.rtv = frames[i].desc_heaps.rtv.make_walker(); + + ID3D12Resource *resource = nullptr; + HRESULT res = allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COMMON, nullptr, &frames[frame_idx].aux_resource, IID_PPV_ARGS(&resource)); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + } + + return OK; +} + +Error RenderingDeviceDriverD3D12::_initialize_command_signatures() { + Error err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, sizeof(D3D12_DRAW_ARGUMENTS), &indirect_cmd_signatures.draw); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, sizeof(D3D12_DRAW_INDEXED_ARGUMENTS), &indirect_cmd_signatures.draw_indexed); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, sizeof(D3D12_DISPATCH_ARGUMENTS), &indirect_cmd_signatures.dispatch); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + return OK; +} + +Error RenderingDeviceDriverD3D12::initialize(uint32_t p_device_index, uint32_t p_frame_count) { + context_device = context_driver->device_get(p_device_index); + adapter = context_driver->create_adapter(p_device_index); + ERR_FAIL_NULL_V(adapter, ERR_CANT_CREATE); + + HRESULT res = adapter->GetDesc(&adapter_desc); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + + // Set the pipeline cache ID based on the adapter information. + pipeline_cache_id = String::hex_encode_buffer((uint8_t *)&adapter_desc.AdapterLuid, sizeof(LUID)); + pipeline_cache_id += "-driver-" + itos(adapter_desc.Revision); + + Error err = _initialize_device(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + err = _check_capabilities(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + err = _get_device_limits(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + err = _initialize_allocator(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + err = _initialize_frames(p_frame_count); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + err = _initialize_command_signatures(); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + + glsl_type_singleton_init_or_ref(); + + return OK; } diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index bd19572878..595ee30966 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -65,14 +65,11 @@ using Microsoft::WRL::ComPtr; #define D3D12_BITCODE_OFFSETS_NUM_STAGES 3 struct dxil_validator; - -class D3D12Context; +class RenderingContextDriverD3D12; // Design principles: // - D3D12 structs are zero-initialized and fields not requiring a non-zero value are omitted (except in cases where expresivity reasons apply). class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { - friend class D3D12Context; // For FramebufferInfo, RenderPassInfo and CommandBufferInfo. - /*****************/ /**** GENERIC ****/ /*****************/ @@ -86,8 +83,58 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { static const D3D12Format RD_TO_D3D12_FORMAT[RDD::DATA_FORMAT_MAX]; - D3D12Context *context = nullptr; - ID3D12Device *device = nullptr; // Owned by the context. + struct DeviceLimits { + uint64_t max_srvs_per_shader_stage = 0; + uint64_t max_cbvs_per_shader_stage = 0; + uint64_t max_samplers_across_all_stages = 0; + uint64_t max_uavs_across_all_stages = 0; + uint64_t timestamp_frequency = 0; + }; + + struct SubgroupCapabilities { + uint32_t size = 0; + bool wave_ops_supported = false; + uint32_t supported_stages_flags_rd() const; + uint32_t supported_operations_flags_rd() const; + }; + + struct VRSCapabilities { + bool draw_call_supported = false; // We can specify our fragment rate on a draw call level. + bool primitive_supported = false; // We can specify our fragment rate on each drawcall. + bool primitive_in_multiviewport = false; + bool ss_image_supported = false; // We can provide a density map attachment on our framebuffer. + uint32_t ss_image_tile_size = 0; + bool additional_rates_supported = false; + }; + + struct ShaderCapabilities { + D3D_SHADER_MODEL shader_model = (D3D_SHADER_MODEL)0; + bool native_16bit_ops = false; + }; + + struct StorageBufferCapabilities { + bool storage_buffer_16_bit_access_is_supported = false; + }; + + struct FormatCapabilities { + bool relaxed_casting_supported = false; + }; + + RenderingContextDriverD3D12 *context_driver = nullptr; + RenderingContextDriver::Device context_device; + ComPtr<IDXGIAdapter> adapter; + DXGI_ADAPTER_DESC adapter_desc; + ComPtr<ID3D12Device> device; + DeviceLimits device_limits; + RDD::Capabilities device_capabilities; + uint32_t feature_level = 0; // Major * 10 + minor. + SubgroupCapabilities subgroup_capabilities; + RDD::MultiviewCapabilities multiview_capabilities; + VRSCapabilities vrs_capabilities; + ShaderCapabilities shader_capabilities; + StorageBufferCapabilities storage_buffer_capabilities; + FormatCapabilities format_capabilities; + String pipeline_cache_id; class DescriptorsHeap { D3D12_DESCRIPTOR_HEAP_DESC desc = {}; @@ -127,6 +174,19 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { ComPtr<ID3D12CommandSignature> dispatch; } indirect_cmd_signatures; + static void STDMETHODCALLTYPE _debug_message_func(D3D12_MESSAGE_CATEGORY p_category, D3D12_MESSAGE_SEVERITY p_severity, D3D12_MESSAGE_ID p_id, LPCSTR p_description, void *p_context); + void _set_object_name(ID3D12Object *p_object, String p_object_name); + Error _initialize_device(); + Error _check_capabilities(); + Error _get_device_limits(); + Error _initialize_allocator(); + Error _initialize_frames(uint32_t p_frame_count); + Error _initialize_command_signatures(); + +public: + Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override final; + +private: /****************/ /**** MEMORY ****/ /****************/ @@ -183,7 +243,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint64_t subres_mask[MAX_SUBRESOURCES / 64] = {}; } groups[MAX_GROUPS]; uint8_t groups_count = 0; - static const D3D12_RESOURCE_STATES DELETED_GROUP = D3D12_RESOURCE_STATE_COMMON; + static const D3D12_RESOURCE_STATES DELETED_GROUP = D3D12_RESOURCE_STATES(0xFFFFFFFFU); }; PagedAllocator<HashMapElement<ResourceInfo::States *, BarrierRequest>> res_barriers_requests_allocator; HashMap<ResourceInfo::States *, BarrierRequest, HashMapHasherDefault, HashMapComparatorDefault<ResourceInfo::States *>, decltype(res_barriers_requests_allocator)> res_barriers_requests; @@ -307,13 +367,65 @@ public: VectorView<RDD::BufferBarrier> p_buffer_barriers, VectorView<RDD::TextureBarrier> p_texture_barriers) override final; - /*************************/ - /**** COMMAND BUFFERS ****/ - /*************************/ +private: + /****************/ + /**** FENCES ****/ + /****************/ + + struct FenceInfo { + ComPtr<ID3D12Fence> d3d_fence = nullptr; + HANDLE event_handle = NULL; + UINT64 fence_value = 0; + }; + +public: + virtual FenceID fence_create() override; + virtual Error fence_wait(FenceID p_fence) override; + virtual void fence_free(FenceID p_fence) override; + +private: + /********************/ + /**** SEMAPHORES ****/ + /********************/ + struct SemaphoreInfo { + ComPtr<ID3D12Fence> d3d_fence = nullptr; + UINT64 fence_value = 0; + }; + + virtual SemaphoreID semaphore_create() override; + virtual void semaphore_free(SemaphoreID p_semaphore) override; + + /******************/ + /**** COMMANDS ****/ + /******************/ + + // ----- QUEUE FAMILY ----- + + virtual CommandQueueFamilyID command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface = 0) override; + +private: + // ----- QUEUE ----- + + struct CommandQueueInfo { + ComPtr<ID3D12CommandQueue> d3d_queue; + }; + +public: + virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override; + virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) override; + virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) override; + virtual void command_queue_free(CommandQueueID p_cmd_queue) override; + +private: // ----- POOL ----- + struct CommandPoolInfo { + CommandQueueFamilyID queue_family; + CommandBufferType buffer_type = COMMAND_BUFFER_TYPE_PRIMARY; + }; - virtual CommandPoolID command_pool_create(CommandBufferType p_cmd_buffer_type) override final; +public: + virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final; virtual void command_pool_free(CommandPoolID p_cmd_pool) override final; // ----- BUFFER ----- @@ -347,17 +459,45 @@ private: uint32_t compute_root_signature_crc = 0; RenderPassState render_pass_state; + bool descriptor_heaps_set = false; }; - RBMap<CommandPoolID, LocalVector<CommandBufferInfo *>> pools_command_buffers; - CommandPoolID last_command_pool_id; public: - virtual CommandBufferID command_buffer_create(CommandBufferType p_cmd_buffer_type, CommandPoolID p_cmd_pool) override final; + virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final; virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final; virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final; virtual void command_buffer_end(CommandBufferID p_cmd_buffer) override final; virtual void command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) override final; +private: + /********************/ + /**** SWAP CHAIN ****/ + /********************/ + + struct SwapChain { + ComPtr<IDXGISwapChain3> d3d_swap_chain; + RenderingContextDriver::SurfaceID surface = RenderingContextDriver::SurfaceID(); + UINT present_flags = 0; + UINT sync_interval = 1; + UINT creation_flags = 0; + RenderPassID render_pass; + TightLocalVector<ID3D12Resource *> render_targets; + TightLocalVector<TextureInfo> render_targets_info; + TightLocalVector<FramebufferID> framebuffers; + RDD::DataFormat data_format = DATA_FORMAT_MAX; + }; + + void _swap_chain_release(SwapChain *p_swap_chain); + void _swap_chain_release_buffers(SwapChain *p_swap_chain); + +public: + virtual SwapChainID swap_chain_create(RenderingContextDriver::SurfaceID p_surface) override; + virtual Error swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) override; + virtual FramebufferID swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) override; + virtual RenderPassID swap_chain_get_render_pass(SwapChainID p_swap_chain) override; + virtual DataFormat swap_chain_get_format(SwapChainID p_swap_chain) override; + virtual void swap_chain_free(SwapChainID p_swap_chain) override; + /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -376,6 +516,8 @@ private: D3D12_RENDER_TARGET_VIEW_DESC _make_rtv_for_texture(const TextureInfo *p_texture_info, uint32_t p_mipmap_offset, uint32_t p_layer_offset, uint32_t p_layers, bool p_add_bases = true); D3D12_DEPTH_STENCIL_VIEW_DESC _make_dsv_for_texture(const TextureInfo *p_texture_info); + FramebufferID _framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height, bool p_is_screen); + public: virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) override final; virtual void framebuffer_free(FramebufferID p_framebuffer) override final; @@ -602,6 +744,7 @@ public: virtual void command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; private: + void _command_check_descriptor_sets(CommandBufferID p_cmd_buffer); void _command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, bool p_for_compute); public: @@ -771,10 +914,11 @@ public: virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) override final; /****************/ - /**** SCREEN ****/ + /**** LABELS ****/ /****************/ - virtual DataFormat screen_get_format() override final; + virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final; + virtual void command_end_label(CommandBufferID p_cmd_buffer) override final; /********************/ /**** SUBMISSION ****/ @@ -814,7 +958,7 @@ private: bool segment_begun = false; public: - virtual void begin_segment(CommandBufferID p_cmd_buffer, uint32_t p_frame_index, uint32_t p_frames_drawn) override final; + virtual void begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) override final; virtual void end_segment() override final; /**************/ @@ -828,6 +972,12 @@ public: virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual String get_api_name() const override final; + virtual String get_api_version() const override final; + virtual String get_pipeline_cache_uuid() const override final; + virtual const Capabilities &get_capabilities() const override final; + + static bool is_in_developer_mode(); private: /*********************/ @@ -851,7 +1001,7 @@ private: /******************/ public: - RenderingDeviceDriverD3D12(D3D12Context *p_context, ID3D12Device *p_device, uint32_t p_frame_count); + RenderingDeviceDriverD3D12(RenderingContextDriverD3D12 *p_context_driver); virtual ~RenderingDeviceDriverD3D12(); }; diff --git a/drivers/egl/egl_manager.h b/drivers/egl/egl_manager.h index c3a749cd19..61d3289b2d 100644 --- a/drivers/egl/egl_manager.h +++ b/drivers/egl/egl_manager.h @@ -45,7 +45,7 @@ class EGLManager { private: - // An EGL-side rappresentation of a display with its own rendering + // An EGL-side representation of a display with its own rendering // context. struct GLDisplay { void *display = nullptr; diff --git a/drivers/gles3/effects/copy_effects.cpp b/drivers/gles3/effects/copy_effects.cpp index 996e7eee7f..29e7de873b 100644 --- a/drivers/gles3/effects/copy_effects.cpp +++ b/drivers/gles3/effects/copy_effects.cpp @@ -125,6 +125,36 @@ void CopyEffects::copy_to_rect(const Rect2 &p_rect) { draw_screen_quad(); } +void CopyEffects::copy_to_rect_3d(const Rect2 &p_rect, float p_layer, int p_type, float p_lod) { + ERR_FAIL_COND(p_type != Texture::TYPE_LAYERED && p_type != Texture::TYPE_3D); + + CopyShaderGLES3::ShaderVariant variant = p_type == Texture::TYPE_LAYERED + ? CopyShaderGLES3::MODE_COPY_SECTION_2D_ARRAY + : CopyShaderGLES3::MODE_COPY_SECTION_3D; + + bool success = copy.shader.version_bind_shader(copy.shader_version, variant); + if (!success) { + return; + } + + copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, variant); + copy.shader.version_set_uniform(CopyShaderGLES3::LAYER, p_layer, copy.shader_version, variant); + copy.shader.version_set_uniform(CopyShaderGLES3::LOD, p_lod, copy.shader_version, variant); + draw_screen_quad(); +} + +void CopyEffects::copy_to_and_from_rect(const Rect2 &p_rect) { + bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION_SOURCE); + if (!success) { + return; + } + + copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION_SOURCE); + copy.shader.version_set_uniform(CopyShaderGLES3::SOURCE_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION_SOURCE); + + draw_screen_quad(); +} + void CopyEffects::copy_screen() { bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_DEFAULT); if (!success) { @@ -140,7 +170,17 @@ void CopyEffects::copy_cube_to_rect(const Rect2 &p_rect) { return; } - copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION); + copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_CUBE_TO_OCTAHEDRAL); + draw_screen_quad(); +} + +void CopyEffects::copy_cube_to_panorama(float p_mip_level) { + bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_CUBE_TO_PANORAMA); + if (!success) { + return; + } + + copy.shader.version_set_uniform(CopyShaderGLES3::MIP_LEVEL, p_mip_level, copy.shader_version, CopyShaderGLES3::MODE_CUBE_TO_PANORAMA); draw_screen_quad(); } diff --git a/drivers/gles3/effects/copy_effects.h b/drivers/gles3/effects/copy_effects.h index 6e2cb07382..e65ebbce03 100644 --- a/drivers/gles3/effects/copy_effects.h +++ b/drivers/gles3/effects/copy_effects.h @@ -62,8 +62,11 @@ public: // These functions assume that a framebuffer and texture are bound already. They only manage the shader, uniforms, and vertex array. void copy_to_rect(const Rect2 &p_rect); + void copy_to_rect_3d(const Rect2 &p_rect, float p_layer, int p_type, float p_lod = 0.0f); + void copy_to_and_from_rect(const Rect2 &p_rect); void copy_screen(); void copy_cube_to_rect(const Rect2 &p_rect); + void copy_cube_to_panorama(float p_mip_level); void bilinear_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region); void gaussian_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region, const Size2i &p_size); void set_color(const Color &p_color, const Rect2i &p_region); diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 5d6cef6e05..80daa9a907 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -627,6 +627,11 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou state.canvas_instance_batches[state.current_batch_index].material = material; state.canvas_instance_batches[state.current_batch_index].material_data = material_data; + if (shader_data_cache) { + state.canvas_instance_batches[state.current_batch_index].vertex_input_mask = shader_data_cache->vertex_input_mask; + } else { + state.canvas_instance_batches[state.current_batch_index].vertex_input_mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_COLOR | RS::ARRAY_TEX_UV; + } } GLES3::CanvasShaderData::BlendMode blend_mode = shader_data_cache ? shader_data_cache->blend_mode : GLES3::CanvasShaderData::BLEND_MODE_MIX; @@ -775,6 +780,7 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou _render_batch(p_lights, i, r_render_info); } + glDisable(GL_SCISSOR_TEST); state.current_batch_index = 0; state.canvas_instance_batches.clear(); state.last_item_index += index; @@ -1412,11 +1418,12 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index, Ren GLuint vertex_array_gl = 0; GLuint index_array_gl = 0; - uint64_t input_mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_FORMAT_COLOR | RS::ARRAY_FORMAT_TEX_UV; // 2D meshes always use the same vertex format. + uint64_t vertex_input_mask = state.canvas_instance_batches[p_index].vertex_input_mask; + if (mesh_instance.is_valid()) { - mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(mesh_instance, j, input_mask, vertex_array_gl); + mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(mesh_instance, j, vertex_input_mask, vertex_array_gl); } else { - mesh_storage->mesh_surface_get_vertex_arrays_and_format(surface, input_mask, vertex_array_gl); + mesh_storage->mesh_surface_get_vertex_arrays_and_format(surface, vertex_input_mask, vertex_array_gl); } index_array_gl = mesh_storage->mesh_surface_get_index_buffer(surface, 0); @@ -1472,9 +1479,9 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index, Ren glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); if (use_instancing) { glDisableVertexAttribArray(5); - glDisableVertexAttribArray(6); - glDisableVertexAttribArray(7); glDisableVertexAttribArray(8); + glDisableVertexAttribArray(9); + glDisableVertexAttribArray(10); } if (r_render_info) { // Meshes, Particles, and MultiMesh are always just one object with one draw call. @@ -1539,15 +1546,15 @@ void RasterizerCanvasGLES3::_new_batch(bool &r_batch_broken) { } void RasterizerCanvasGLES3::_enable_attributes(uint32_t p_start, bool p_primitive, uint32_t p_rate) { - uint32_t split = p_primitive ? 11 : 12; - for (uint32_t i = 6; i < split; i++) { + uint32_t split = p_primitive ? 13 : 14; + for (uint32_t i = 8; i < split; i++) { glEnableVertexAttribArray(i); - glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float))); + glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 8) * 4 * sizeof(float))); glVertexAttribDivisor(i, p_rate); } - for (uint32_t i = split; i <= 13; i++) { + for (uint32_t i = split; i <= 15; i++) { glEnableVertexAttribArray(i); - glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float))); + glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 8) * 4 * sizeof(float))); glVertexAttribDivisor(i, p_rate); } } @@ -2295,14 +2302,18 @@ void RasterizerCanvasGLES3::_prepare_canvas_texture(RID p_texture, RS::CanvasIte GLES3::Texture *texture = texture_storage->get_texture(ct->diffuse); Size2i size_cache; + + // Cache default white resource ID. + const RID default_texture_id = texture_storage->texture_gl_get_default(GLES3::DEFAULT_GL_TEXTURE_WHITE); + + // If no texture is assigned, assign default white. if (!texture) { - ct->diffuse = texture_storage->texture_gl_get_default(GLES3::DEFAULT_GL_TEXTURE_WHITE); - GLES3::Texture *tex = texture_storage->get_texture(ct->diffuse); - size_cache = Size2i(tex->width, tex->height); - } else { - size_cache = Size2i(texture->width, texture->height); + ct->diffuse = default_texture_id; } + // Enforce a 1x1 size if default white texture. + size_cache = ct->diffuse == default_texture_id ? Size2i(1, 1) : Size2i(texture->width, texture->height); + GLES3::Texture *normal_map = texture_storage->get_texture(ct->normal_map); if (ct->specular_color.a < 0.999) { diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index 552b2afd6b..2b70df3238 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -269,6 +269,7 @@ public: RID material; GLES3::CanvasMaterialData *material_data = nullptr; CanvasShaderGLES3::ShaderVariant shader_variant = CanvasShaderGLES3::MODE_QUAD; + uint64_t vertex_input_mask; const Item::Command *command = nullptr; Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch. diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index c048e9bf37..f5296f969f 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -103,6 +103,11 @@ void RasterizerGLES3::begin_frame(double frame_step) { } void RasterizerGLES3::end_frame(bool p_swap_buffers) { + GLES3::Utilities *utils = GLES3::Utilities::get_singleton(); + utils->capture_timestamps_end(); +} + +void RasterizerGLES3::end_viewport(bool p_swap_buffers) { if (p_swap_buffers) { DisplayServer::get_singleton()->swap_buffers(); } else { @@ -188,7 +193,7 @@ typedef void(GLAPIENTRY *DEBUGPROCARB)(GLenum source, typedef void(GLAPIENTRY *DebugMessageCallbackARB)(DEBUGPROCARB callback, const void *userParam); void RasterizerGLES3::initialize() { - print_line(vformat("OpenGL API %s - Compatibility - Using Device: %s - %s", RS::get_singleton()->get_video_adapter_api_version(), RS::get_singleton()->get_video_adapter_vendor(), RS::get_singleton()->get_video_adapter_name())); + Engine::get_singleton()->print_header(vformat("OpenGL API %s - Compatibility - Using Device: %s - %s", RS::get_singleton()->get_video_adapter_api_version(), RS::get_singleton()->get_video_adapter_vendor(), RS::get_singleton()->get_video_adapter_name())); } void RasterizerGLES3::finalize() { @@ -351,16 +356,6 @@ RasterizerGLES3::RasterizerGLES3() { RasterizerGLES3::~RasterizerGLES3() { } -void RasterizerGLES3::prepare_for_blitting_render_targets() { - // This is a hack, but this function is called one time after all viewports have been updated. - // So it marks the end of the frame for all viewports - // In the OpenGL renderer we have to call end_frame for each viewport so we can swap the - // buffers for each window before proceeding to the next. - // This allows us to only increment the frame after all viewports are done. - GLES3::Utilities *utils = GLES3::Utilities::get_singleton(); - utils->capture_timestamps_end(); -} - void RasterizerGLES3::_blit_render_target_to_screen(RID p_render_target, DisplayServer::WindowID p_screen, const Rect2 &p_screen_rect, uint32_t p_layer, bool p_first) { GLES3::RenderTarget *rt = GLES3::TextureStorage::get_singleton()->get_render_target(p_render_target); @@ -474,7 +469,7 @@ void RasterizerGLES3::set_boot_image(const Ref<Image> &p_image, const Color &p_c copy_effects->copy_to_rect(screenrect); glBindTexture(GL_TEXTURE_2D, 0); - end_frame(true); + end_viewport(true); texture_storage->texture_free(texture); } diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h index b19ca0e9c9..cf3cedfea1 100644 --- a/drivers/gles3/rasterizer_gles3.h +++ b/drivers/gles3/rasterizer_gles3.h @@ -90,9 +90,9 @@ public: void initialize(); void begin_frame(double frame_step); - void prepare_for_blitting_render_targets(); void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount); + void end_viewport(bool p_swap_buffers); void end_frame(bool p_swap_buffers); void finalize(); diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index a646b1ec7d..efd554eac9 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -280,7 +280,9 @@ void RasterizerSceneGLES3::_geometry_instance_add_surface_with_material(Geometry GLES3::Mesh::Surface *s = reinterpret_cast<GLES3::Mesh::Surface *>(sdcache->surface); if (p_material->shader_data->uses_tangent && !(s->format & RS::ARRAY_FORMAT_TANGENT)) { - WARN_PRINT_ED("Attempting to use a shader that requires tangents with a mesh that doesn't contain tangents. Ensure that meshes are imported with the 'ensure_tangents' option. If creating your own meshes, add an `ARRAY_TANGENT` array (when using ArrayMesh) or call `generate_tangents()` (when using SurfaceTool)."); + String shader_path = p_material->shader_data->path.is_empty() ? "" : "(" + p_material->shader_data->path + ")"; + String mesh_path = mesh_storage->mesh_get_path(p_mesh).is_empty() ? "" : "(" + mesh_storage->mesh_get_path(p_mesh) + ")"; + WARN_PRINT_ED(vformat("Attempting to use a shader %s that requires tangents with a mesh %s that doesn't contain tangents. Ensure that meshes are imported with the 'ensure_tangents' option. If creating your own meshes, add an `ARRAY_TANGENT` array (when using ArrayMesh) or call `generate_tangents()` (when using SurfaceTool).", shader_path, mesh_path)); } } @@ -541,77 +543,48 @@ void RasterizerSceneGLES3::_invalidate_sky(Sky *p_sky) { } } -void RasterizerSceneGLES3::_update_dirty_skys() { - Sky *sky = dirty_sky_list; - - while (sky) { - if (sky->radiance == 0) { - sky->mipmap_count = Image::get_image_required_mipmaps(sky->radiance_size, sky->radiance_size, Image::FORMAT_RGBA8) - 1; - // Left uninitialized, will attach a texture at render time - glGenFramebuffers(1, &sky->radiance_framebuffer); - - GLenum internal_format = GL_RGB10_A2; - - glGenTextures(1, &sky->radiance); - glBindTexture(GL_TEXTURE_CUBE_MAP, sky->radiance); +GLuint _init_radiance_texture(int p_size, int p_mipmaps, String p_name) { + GLuint radiance_id = 0; + glGenTextures(1, &radiance_id); + glBindTexture(GL_TEXTURE_CUBE_MAP, radiance_id); #ifdef GL_API_ENABLED - if (RasterizerGLES3::is_gles_over_gl()) { - GLenum format = GL_RGBA; - GLenum type = GL_UNSIGNED_INT_2_10_10_10_REV; - //TODO, on low-end compare this to allocating each face of each mip individually - // see: https://www.khronos.org/registry/OpenGL-Refpages/es3.0/html/glTexStorage2D.xhtml - for (int i = 0; i < 6; i++) { - glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, 0, internal_format, sky->radiance_size, sky->radiance_size, 0, format, type, nullptr); - } + if (RasterizerGLES3::is_gles_over_gl()) { + //TODO, on low-end compare this to allocating each face of each mip individually + // see: https://www.khronos.org/registry/OpenGL-Refpages/es3.0/html/glTexStorage2D.xhtml + for (int i = 0; i < 6; i++) { + glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, 0, GL_RGB10_A2, p_size, p_size, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr); + } - glGenerateMipmap(GL_TEXTURE_CUBE_MAP); - } + glGenerateMipmap(GL_TEXTURE_CUBE_MAP); + } #endif // GL_API_ENABLED #ifdef GLES_API_ENABLED - if (!RasterizerGLES3::is_gles_over_gl()) { - glTexStorage2D(GL_TEXTURE_CUBE_MAP, sky->mipmap_count, internal_format, sky->radiance_size, sky->radiance_size); - } + if (!RasterizerGLES3::is_gles_over_gl()) { + glTexStorage2D(GL_TEXTURE_CUBE_MAP, p_mipmaps, GL_RGB10_A2, p_size, p_size); + } #endif // GLES_API_ENABLED - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, sky->mipmap_count - 1); - - GLES3::Utilities::get_singleton()->texture_allocated_data(sky->radiance, Image::get_image_data_size(sky->radiance_size, sky->radiance_size, Image::FORMAT_RGBA8, true), "Sky radiance map"); - - glGenTextures(1, &sky->raw_radiance); - glBindTexture(GL_TEXTURE_CUBE_MAP, sky->raw_radiance); - -#ifdef GL_API_ENABLED - if (RasterizerGLES3::is_gles_over_gl()) { - GLenum format = GL_RGBA; - GLenum type = GL_UNSIGNED_INT_2_10_10_10_REV; - //TODO, on low-end compare this to allocating each face of each mip individually - // see: https://www.khronos.org/registry/OpenGL-Refpages/es3.0/html/glTexStorage2D.xhtml - for (int i = 0; i < 6; i++) { - glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, 0, internal_format, sky->radiance_size, sky->radiance_size, 0, format, type, nullptr); - } + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, p_mipmaps - 1); + + GLES3::Utilities::get_singleton()->texture_allocated_data(radiance_id, Image::get_image_data_size(p_size, p_size, Image::FORMAT_RGBA8, true), p_name); + return radiance_id; +} - glGenerateMipmap(GL_TEXTURE_CUBE_MAP); - } -#endif // GL_API_ENABLED -#ifdef GLES_API_ENABLED - if (!RasterizerGLES3::is_gles_over_gl()) { - glTexStorage2D(GL_TEXTURE_CUBE_MAP, sky->mipmap_count, internal_format, sky->radiance_size, sky->radiance_size); - } -#endif // GLES_API_ENABLED - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, sky->mipmap_count - 1); +void RasterizerSceneGLES3::_update_dirty_skys() { + Sky *sky = dirty_sky_list; - glBindTexture(GL_TEXTURE_CUBE_MAP, 0); - GLES3::Utilities::get_singleton()->texture_allocated_data(sky->raw_radiance, Image::get_image_data_size(sky->radiance_size, sky->radiance_size, Image::FORMAT_RGBA8, true), "Sky raw radiance map"); + while (sky) { + if (sky->radiance == 0) { + sky->mipmap_count = Image::get_image_required_mipmaps(sky->radiance_size, sky->radiance_size, Image::FORMAT_RGBA8) - 1; + // Left uninitialized, will attach a texture at render time + glGenFramebuffers(1, &sky->radiance_framebuffer); + sky->radiance = _init_radiance_texture(sky->radiance_size, sky->mipmap_count, "Sky radiance texture"); + sky->raw_radiance = _init_radiance_texture(sky->radiance_size, sky->mipmap_count, "Sky raw radiance texture"); } sky->reflection_dirty = true; @@ -1140,7 +1113,80 @@ void RasterizerSceneGLES3::_filter_sky_radiance(Sky *p_sky, int p_base_layer) { } Ref<Image> RasterizerSceneGLES3::sky_bake_panorama(RID p_sky, float p_energy, bool p_bake_irradiance, const Size2i &p_size) { - return Ref<Image>(); + Sky *sky = sky_owner.get_or_null(p_sky); + ERR_FAIL_NULL_V(sky, Ref<Image>()); + + _update_dirty_skys(); + + if (sky->radiance == 0) { + return Ref<Image>(); + } + + GLES3::CopyEffects *copy_effects = GLES3::CopyEffects::get_singleton(); + GLES3::Config *config = GLES3::Config::get_singleton(); + + GLuint rad_tex = 0; + glGenTextures(1, &rad_tex); + glBindTexture(GL_TEXTURE_2D, rad_tex); + if (config->float_texture_supported) { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, p_size.width, p_size.height, 0, GL_RGBA, GL_FLOAT, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(rad_tex, p_size.width * p_size.height * 16, "Temp sky panorama"); + } else { + // Fallback to RGBA8 on devices that don't support rendering to floating point textures. This will look bad, but we have no choice. + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, p_size.width, p_size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(rad_tex, p_size.width * p_size.height * 4, "Temp sky panorama"); + } + + GLuint rad_fbo = 0; + glGenFramebuffers(1, &rad_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rad_fbo); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rad_tex, 0); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_CUBE_MAP, sky->radiance); + glViewport(0, 0, p_size.width, p_size.height); + + glClearColor(0.0, 0.0, 0.0, 1.0); + glClear(GL_COLOR_BUFFER_BIT); + + copy_effects->copy_cube_to_panorama(p_bake_irradiance ? float(sky->mipmap_count) : 0.0); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteFramebuffers(1, &rad_fbo); + // Create a dummy texture so we can use texture_2d_get. + RID tex_rid = GLES3::TextureStorage::get_singleton()->texture_allocate(); + GLES3::Texture texture; + texture.width = p_size.width; + texture.height = p_size.height; + texture.alloc_width = p_size.width; + texture.alloc_height = p_size.height; + texture.format = Image::FORMAT_RGBAF; + texture.real_format = Image::FORMAT_RGBAF; + texture.gl_format_cache = GL_RGBA; + texture.gl_type_cache = GL_FLOAT; + texture.type = GLES3::Texture::TYPE_2D; + texture.target = GL_TEXTURE_2D; + texture.active = true; + texture.tex_id = rad_tex; + texture.is_render_target = true; + + GLES3::TextureStorage::get_singleton()->texture_2d_initialize_from_texture(tex_rid, texture); + Ref<Image> img = GLES3::TextureStorage::get_singleton()->texture_2d_get(tex_rid); + GLES3::Utilities::get_singleton()->texture_free_data(rad_tex); + + texture.is_render_target = false; + texture.tex_id = 0; + GLES3::TextureStorage::get_singleton()->texture_free(tex_rid); + + for (int i = 0; i < p_size.width; i++) { + for (int j = 0; j < p_size.height; j++) { + Color c = img->get_pixel(i, j); + c.r *= p_energy; + c.g *= p_energy; + c.b *= p_energy; + img->set_pixel(i, j, c); + } + } + return img; } /* ENVIRONMENT API */ @@ -1174,7 +1220,65 @@ void RasterizerSceneGLES3::environment_set_volumetric_fog_filter_active(bool p_e } Ref<Image> RasterizerSceneGLES3::environment_bake_panorama(RID p_env, bool p_bake_irradiance, const Size2i &p_size) { - return Ref<Image>(); + ERR_FAIL_COND_V(p_env.is_null(), Ref<Image>()); + + RS::EnvironmentBG environment_background = environment_get_background(p_env); + + if (environment_background == RS::ENV_BG_CAMERA_FEED || environment_background == RS::ENV_BG_CANVAS || environment_background == RS::ENV_BG_KEEP) { + return Ref<Image>(); // Nothing to bake. + } + + RS::EnvironmentAmbientSource ambient_source = environment_get_ambient_source(p_env); + + bool use_ambient_light = false; + bool use_cube_map = false; + if (ambient_source == RS::ENV_AMBIENT_SOURCE_BG && (environment_background == RS::ENV_BG_CLEAR_COLOR || environment_background == RS::ENV_BG_COLOR)) { + use_ambient_light = true; + } else { + use_cube_map = (ambient_source == RS::ENV_AMBIENT_SOURCE_BG && environment_background == RS::ENV_BG_SKY) || ambient_source == RS::ENV_AMBIENT_SOURCE_SKY; + use_ambient_light = use_cube_map || ambient_source == RS::ENV_AMBIENT_SOURCE_COLOR; + } + + use_cube_map = use_cube_map || (environment_background == RS::ENV_BG_SKY && environment_get_sky(p_env).is_valid()); + + Color ambient_color; + float ambient_color_sky_mix = 0.0; + if (use_ambient_light) { + ambient_color_sky_mix = environment_get_ambient_sky_contribution(p_env); + const float ambient_energy = environment_get_ambient_light_energy(p_env); + ambient_color = environment_get_ambient_light(p_env); + ambient_color = ambient_color.srgb_to_linear(); + ambient_color.r *= ambient_energy; + ambient_color.g *= ambient_energy; + ambient_color.b *= ambient_energy; + } + + if (use_cube_map) { + Ref<Image> panorama = sky_bake_panorama(environment_get_sky(p_env), environment_get_bg_energy_multiplier(p_env), p_bake_irradiance, p_size); + if (use_ambient_light) { + for (int x = 0; x < p_size.width; x++) { + for (int y = 0; y < p_size.height; y++) { + panorama->set_pixel(x, y, ambient_color.lerp(panorama->get_pixel(x, y), ambient_color_sky_mix)); + } + } + } + return panorama; + } else { + const float bg_energy_multiplier = environment_get_bg_energy_multiplier(p_env); + Color panorama_color = ((environment_background == RS::ENV_BG_CLEAR_COLOR) ? RSG::texture_storage->get_default_clear_color() : environment_get_bg_color(p_env)); + panorama_color = panorama_color.srgb_to_linear(); + panorama_color.r *= bg_energy_multiplier; + panorama_color.g *= bg_energy_multiplier; + panorama_color.b *= bg_energy_multiplier; + + if (use_ambient_light) { + panorama_color = ambient_color.lerp(panorama_color, ambient_color_sky_mix); + } + + Ref<Image> panorama = Image::create_empty(p_size.width, p_size.height, false, Image::FORMAT_RGBAF); + panorama->fill(panorama_color); + return panorama; + } } void RasterizerSceneGLES3::positional_soft_shadow_filter_set_quality(RS::ShadowQuality p_quality) { @@ -1451,6 +1555,7 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da GLES3::MaterialStorage::store_camera(projection.inverse(), scene_state.ubo.inv_projection_matrix); GLES3::MaterialStorage::store_transform(p_render_data->cam_transform, scene_state.ubo.inv_view_matrix); GLES3::MaterialStorage::store_transform(p_render_data->inv_cam_transform, scene_state.ubo.view_matrix); + GLES3::MaterialStorage::store_transform(p_render_data->main_cam_transform, scene_state.ubo.main_cam_inv_view_matrix); scene_state.ubo.camera_visible_layers = p_render_data->camera_visible_layers; if (p_render_data->view_count > 1) { @@ -1485,7 +1590,15 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da //time global variables scene_state.ubo.time = time; - if (is_environment(p_render_data->environment)) { + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { + scene_state.ubo.use_ambient_light = true; + scene_state.ubo.ambient_light_color_energy[0] = 1; + scene_state.ubo.ambient_light_color_energy[1] = 1; + scene_state.ubo.ambient_light_color_energy[2] = 1; + scene_state.ubo.ambient_light_color_energy[3] = 1.0; + scene_state.ubo.use_ambient_cubemap = false; + scene_state.ubo.use_reflection_cubemap = false; + } else if (is_environment(p_render_data->environment)) { RS::EnvironmentBG env_bg = environment_get_background(p_render_data->environment); RS::EnvironmentAmbientSource ambient_src = environment_get_ambient_source(p_render_data->environment); @@ -1529,8 +1642,12 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da } scene_state.ubo.fog_enabled = environment_get_fog_enabled(p_render_data->environment); + scene_state.ubo.fog_mode = environment_get_fog_mode(p_render_data->environment); scene_state.ubo.fog_density = environment_get_fog_density(p_render_data->environment); scene_state.ubo.fog_height = environment_get_fog_height(p_render_data->environment); + scene_state.ubo.fog_depth_curve = environment_get_fog_depth_curve(p_render_data->environment); + scene_state.ubo.fog_depth_end = environment_get_fog_depth_end(p_render_data->environment) > 0.0 ? environment_get_fog_depth_end(p_render_data->environment) : scene_state.ubo.z_far; + scene_state.ubo.fog_depth_begin = MIN(environment_get_fog_depth_begin(p_render_data->environment), scene_state.ubo.fog_depth_end - 0.001); scene_state.ubo.fog_height_density = environment_get_fog_height_density(p_render_data->environment); scene_state.ubo.fog_aerial_perspective = environment_get_fog_aerial_perspective(p_render_data->environment); @@ -1983,20 +2100,20 @@ void RasterizerSceneGLES3::_render_shadows(const RenderDataGLES3 *p_render_data, // Render cubemap shadows. for (const int &index : cube_shadows) { - _render_shadow_pass(p_render_data->render_shadows[index].light, p_render_data->shadow_atlas, p_render_data->render_shadows[index].pass, p_render_data->render_shadows[index].instances, camera_plane, lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, p_render_data->render_info, p_viewport_size); + _render_shadow_pass(p_render_data->render_shadows[index].light, p_render_data->shadow_atlas, p_render_data->render_shadows[index].pass, p_render_data->render_shadows[index].instances, camera_plane, lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, p_render_data->render_info, p_viewport_size, p_render_data->cam_transform); } // Render directional shadows. for (uint32_t i = 0; i < directional_shadows.size(); i++) { - _render_shadow_pass(p_render_data->render_shadows[directional_shadows[i]].light, p_render_data->shadow_atlas, p_render_data->render_shadows[directional_shadows[i]].pass, p_render_data->render_shadows[directional_shadows[i]].instances, camera_plane, lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, p_render_data->render_info, p_viewport_size); + _render_shadow_pass(p_render_data->render_shadows[directional_shadows[i]].light, p_render_data->shadow_atlas, p_render_data->render_shadows[directional_shadows[i]].pass, p_render_data->render_shadows[directional_shadows[i]].instances, camera_plane, lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, p_render_data->render_info, p_viewport_size, p_render_data->cam_transform); } // Render positional shadows (Spotlight and Omnilight with dual-paraboloid). for (uint32_t i = 0; i < shadows.size(); i++) { - _render_shadow_pass(p_render_data->render_shadows[shadows[i]].light, p_render_data->shadow_atlas, p_render_data->render_shadows[shadows[i]].pass, p_render_data->render_shadows[shadows[i]].instances, camera_plane, lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, p_render_data->render_info, p_viewport_size); + _render_shadow_pass(p_render_data->render_shadows[shadows[i]].light, p_render_data->shadow_atlas, p_render_data->render_shadows[shadows[i]].pass, p_render_data->render_shadows[shadows[i]].instances, camera_plane, lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, p_render_data->render_info, p_viewport_size, p_render_data->cam_transform); } } } -void RasterizerSceneGLES3::_render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<RenderGeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_mesh_lod_threshold, RenderingMethod::RenderInfo *p_render_info, const Size2i &p_viewport_size) { +void RasterizerSceneGLES3::_render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<RenderGeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_mesh_lod_threshold, RenderingMethod::RenderInfo *p_render_info, const Size2i &p_viewport_size, const Transform3D &p_main_cam_transform) { GLES3::LightStorage *light_storage = GLES3::LightStorage::get_singleton(); ERR_FAIL_COND(!light_storage->owns_light_instance(p_light)); @@ -2130,6 +2247,7 @@ void RasterizerSceneGLES3::_render_shadow_pass(RID p_light, RID p_shadow_atlas, render_data.z_far = zfar; // Only used by OmniLights. render_data.z_near = 0.0; render_data.lod_distance_multiplier = p_lod_distance_multiplier; + render_data.main_cam_transform = p_main_cam_transform; render_data.instances = &p_instances; render_data.render_info = p_render_info; @@ -2163,6 +2281,7 @@ void RasterizerSceneGLES3::_render_shadow_pass(RID p_light, RID p_shadow_atlas, scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; glColorMask(0, 0, 0, 0); + glDrawBuffers(0, nullptr); RasterizerGLES3::clear_depth(1.0); if (needs_clear) { glClear(GL_DEPTH_BUFFER_BIT); @@ -2217,6 +2336,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ render_data.cam_projection = p_camera_data->main_projection; render_data.cam_orthogonal = p_camera_data->is_orthogonal; render_data.camera_visible_layers = p_camera_data->visible_layers; + render_data.main_cam_transform = p_camera_data->main_transform; render_data.view_count = p_camera_data->view_count; for (uint32_t v = 0; v < p_camera_data->view_count; v++) { @@ -2324,7 +2444,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ bool keep_color = false; float sky_energy_multiplier = 1.0; - if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_OVERDRAW) { + if (unlikely(get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_OVERDRAW)) { clear_color = Color(0, 0, 0, 1); //in overdraw mode, BG should always be black } else if (render_data.environment.is_valid()) { RS::EnvironmentBG bg_mode = environment_get_background(render_data.environment); @@ -2421,8 +2541,9 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ glColorMask(0, 0, 0, 0); RasterizerGLES3::clear_depth(1.0); - glClear(GL_DEPTH_BUFFER_BIT); + glDrawBuffers(0, nullptr); + uint64_t spec_constant = SceneShaderGLES3::DISABLE_FOG | SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL | SceneShaderGLES3::DISABLE_LIGHTMAP | SceneShaderGLES3::DISABLE_LIGHT_OMNI | SceneShaderGLES3::DISABLE_LIGHT_SPOT; @@ -2455,6 +2576,11 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ scene_state.current_depth_test = GLES3::SceneShaderData::DEPTH_TEST_ENABLED; scene_state.current_depth_draw = GLES3::SceneShaderData::DEPTH_DRAW_ALWAYS; + { + GLuint db = GL_COLOR_ATTACHMENT0; + glDrawBuffers(1, &db); + } + if (!fb_cleared) { RasterizerGLES3::clear_depth(1.0); glClear(GL_DEPTH_BUFFER_BIT); @@ -2476,6 +2602,10 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ if (render_data.environment.is_null() || (render_data.environment.is_valid() && !environment_get_fog_enabled(render_data.environment))) { spec_constant_base_flags |= SceneShaderGLES3::DISABLE_FOG; } + + if (render_data.environment.is_valid() && environment_get_fog_mode(render_data.environment) == RS::EnvironmentFogMode::ENV_FOG_MODE_DEPTH) { + spec_constant_base_flags |= SceneShaderGLES3::USE_DEPTH_FOG; + } } // Render Opaque Objects. RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, spec_constant_base_flags, use_wireframe); @@ -2693,6 +2823,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } glBindTexture(GL_TEXTURE_CUBE_MAP, texture_to_bind); } + } else if constexpr (p_pass_mode == PASS_MODE_DEPTH || p_pass_mode == PASS_MODE_SHADOW) { shader_variant = SceneShaderGLES3::MODE_DEPTH; } @@ -2730,8 +2861,16 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, material_data = surf->material_shadow; mesh_surface = surf->surface_shadow; } else { - shader = surf->shader; - material_data = surf->material; + if (unlikely(get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_OVERDRAW)) { + material_data = overdraw_material_data_ptr; + shader = material_data->shader_data; + } else if (unlikely(get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_LIGHTING)) { + material_data = default_material_data_ptr; + shader = material_data->shader_data; + } else { + shader = surf->shader; + material_data = surf->material; + } mesh_surface = surf->surface; } @@ -2864,7 +3003,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, // Find cull variant. GLES3::SceneShaderData::Cull cull_mode = shader->cull_mode; - if ((surf->flags & GeometryInstanceSurface::FLAG_USES_DOUBLE_SIDED_SHADOWS)) { + if (p_pass_mode == PASS_MODE_MATERIAL || (surf->flags & GeometryInstanceSurface::FLAG_USES_DOUBLE_SIDED_SHADOWS)) { cull_mode = GLES3::SceneShaderData::CULL_DISABLED; } else { bool mirror = inst->mirror; @@ -2901,7 +3040,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, GLuint vertex_array_gl = 0; GLuint index_array_gl = 0; uint64_t vertex_input_mask = shader->vertex_input_mask; - if (inst->lightmap_instance.is_valid()) { + if (inst->lightmap_instance.is_valid() || p_pass_mode == PASS_MODE_MATERIAL) { vertex_input_mask |= 1 << RS::ARRAY_TEX_UV2; } @@ -3175,6 +3314,10 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::MODEL_FLAGS, inst->flags_cache, shader->version, instance_variant, spec_constants); + if (p_pass_mode == PASS_MODE_MATERIAL) { + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::UV_OFFSET, p_params->uv_offset, shader->version, instance_variant, spec_constants); + } + // Can be index count or vertex count uint32_t count = 0; if (surf->lod_index > 0) { @@ -3316,6 +3459,7 @@ void RasterizerSceneGLES3::render_particle_collider_heightfield(RID p_collider, render_data.cam_orthogonal = true; render_data.z_near = 0.0; render_data.z_far = cm.get_z_far(); + render_data.main_cam_transform = cam_xform; render_data.instances = &p_instances; @@ -3345,6 +3489,8 @@ void RasterizerSceneGLES3::render_particle_collider_heightfield(RID p_collider, glEnable(GL_CULL_FACE); scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; + glDrawBuffers(0, nullptr); + glColorMask(0, 0, 0, 0); RasterizerGLES3::clear_depth(1.0); @@ -3358,6 +3504,93 @@ void RasterizerSceneGLES3::render_particle_collider_heightfield(RID p_collider, glBindFramebuffer(GL_FRAMEBUFFER, 0); } +void RasterizerSceneGLES3::_render_uv2(const PagedArray<RenderGeometryInstance *> &p_instances, GLuint p_framebuffer, const Rect2i &p_region) { + RENDER_TIMESTAMP("Setup Rendering UV2"); + + RenderDataGLES3 render_data; + render_data.instances = &p_instances; + + scene_state.ubo.emissive_exposure_normalization = -1.0; // Use default exposure normalization. + + _setup_environment(&render_data, true, Vector2(1, 1), true, Color(), false); + + PassMode pass_mode = PASS_MODE_MATERIAL; + + _fill_render_list(RENDER_LIST_SECONDARY, &render_data, pass_mode); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + + RENDER_TIMESTAMP("Render 3D Material"); + + { + glBindFramebuffer(GL_FRAMEBUFFER, p_framebuffer); + glViewport(p_region.position.x, p_region.position.y, p_region.size.x, p_region.size.y); + + GLuint global_buffer = GLES3::MaterialStorage::get_singleton()->global_shader_parameters_get_uniform_buffer(); + + glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_GLOBALS_UNIFORM_LOCATION, global_buffer); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + glDisable(GL_BLEND); + glDepthMask(GL_TRUE); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + glDisable(GL_SCISSOR_TEST); + glCullFace(GL_BACK); + glEnable(GL_CULL_FACE); + scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; + + TightLocalVector<GLenum> draw_buffers; + draw_buffers.push_back(GL_COLOR_ATTACHMENT0); + draw_buffers.push_back(GL_COLOR_ATTACHMENT1); + draw_buffers.push_back(GL_COLOR_ATTACHMENT2); + draw_buffers.push_back(GL_COLOR_ATTACHMENT3); + glDrawBuffers(draw_buffers.size(), draw_buffers.ptr()); + + glClearColor(0.0, 0.0, 0.0, 0.0); + RasterizerGLES3::clear_depth(1.0); + glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); + + uint64_t base_spec_constant = 0; + base_spec_constant |= SceneShaderGLES3::RENDER_MATERIAL; + base_spec_constant |= SceneShaderGLES3::DISABLE_FOG; + base_spec_constant |= SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL; + base_spec_constant |= SceneShaderGLES3::DISABLE_LIGHT_OMNI; + base_spec_constant |= SceneShaderGLES3::DISABLE_LIGHT_SPOT; + base_spec_constant |= SceneShaderGLES3::DISABLE_LIGHTMAP; + + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, base_spec_constant, true, Vector2(0, 0)); + + const int uv_offset_count = 9; + static const Vector2 uv_offsets[uv_offset_count] = { + Vector2(-1, 1), + Vector2(1, 1), + Vector2(1, -1), + Vector2(-1, -1), + Vector2(-1, 0), + Vector2(1, 0), + Vector2(0, -1), + Vector2(0, 1), + Vector2(0, 0), + }; + + for (int i = 0; i < uv_offset_count; i++) { + Vector2 ofs = uv_offsets[i]; + ofs.x /= p_region.size.width; + ofs.y /= p_region.size.height; + render_list_params.uv_offset = ofs; + _render_list_template<PASS_MODE_MATERIAL>(&render_list_params, &render_data, 0, render_list[RENDER_LIST_SECONDARY].elements.size()); + } + + render_list_params.uv_offset = Vector2(0, 0); + render_list_params.force_wireframe = false; + _render_list_template<PASS_MODE_MATERIAL>(&render_list_params, &render_data, 0, render_list[RENDER_LIST_SECONDARY].elements.size()); + + GLuint db = GL_COLOR_ATTACHMENT0; + glDrawBuffers(1, &db); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + } +} + void RasterizerSceneGLES3::set_time(double p_time, double p_step) { time = p_time; time_step = p_step; @@ -3505,7 +3738,155 @@ void RasterizerSceneGLES3::sub_surface_scattering_set_scale(float p_scale, float } TypedArray<Image> RasterizerSceneGLES3::bake_render_uv2(RID p_base, const TypedArray<RID> &p_material_overrides, const Size2i &p_image_size) { - return TypedArray<Image>(); + GLES3::Config *config = GLES3::Config::get_singleton(); + ERR_FAIL_COND_V_MSG(p_image_size.width <= 0, TypedArray<Image>(), "Image width must be greater than 0."); + ERR_FAIL_COND_V_MSG(p_image_size.height <= 0, TypedArray<Image>(), "Image height must be greater than 0."); + + GLuint albedo_alpha_tex = 0; + GLuint normal_tex = 0; + GLuint orm_tex = 0; + GLuint emission_tex = 0; + GLuint depth_tex = 0; + glGenTextures(1, &albedo_alpha_tex); + glGenTextures(1, &normal_tex); + glGenTextures(1, &orm_tex); + glGenTextures(1, &emission_tex); + glGenTextures(1, &depth_tex); + + glBindTexture(GL_TEXTURE_2D, albedo_alpha_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, p_image_size.width, p_image_size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(albedo_alpha_tex, p_image_size.width * p_image_size.height * 4, "Lightmap albedo texture"); + + glBindTexture(GL_TEXTURE_2D, normal_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, p_image_size.width, p_image_size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(normal_tex, p_image_size.width * p_image_size.height * 4, "Lightmap normal texture"); + + glBindTexture(GL_TEXTURE_2D, orm_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, p_image_size.width, p_image_size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(orm_tex, p_image_size.width * p_image_size.height * 4, "Lightmap ORM texture"); + + // Consider rendering to RGBA8 encoded as RGBE, then manually convert to RGBAH on CPU. + glBindTexture(GL_TEXTURE_2D, emission_tex); + if (config->float_texture_supported) { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, p_image_size.width, p_image_size.height, 0, GL_RGBA, GL_FLOAT, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(emission_tex, p_image_size.width * p_image_size.height * 16, "Lightmap emission texture"); + } else { + // Fallback to RGBA8 on devices that don't support rendering to floating point textures. This will look bad, but we have no choice. + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, p_image_size.width, p_image_size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(emission_tex, p_image_size.width * p_image_size.height * 4, "Lightmap emission texture"); + } + + glBindTexture(GL_TEXTURE_2D, depth_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, p_image_size.width, p_image_size.height, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, nullptr); + GLES3::Utilities::get_singleton()->texture_allocated_data(depth_tex, p_image_size.width * p_image_size.height * 3, "Lightmap depth texture"); + + GLuint fbo = 0; + glGenFramebuffers(1, &fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, albedo_alpha_tex, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, normal_tex, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, orm_tex, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, emission_tex, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_tex, 0); + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + glDeleteFramebuffers(1, &fbo); + GLES3::Utilities::get_singleton()->texture_free_data(albedo_alpha_tex); + GLES3::Utilities::get_singleton()->texture_free_data(normal_tex); + GLES3::Utilities::get_singleton()->texture_free_data(orm_tex); + GLES3::Utilities::get_singleton()->texture_free_data(emission_tex); + GLES3::Utilities::get_singleton()->texture_free_data(depth_tex); + + WARN_PRINT("Could not create render target, status: " + GLES3::TextureStorage::get_singleton()->get_framebuffer_error(status)); + return TypedArray<Image>(); + } + + RenderGeometryInstance *gi_inst = geometry_instance_create(p_base); + ERR_FAIL_NULL_V(gi_inst, TypedArray<Image>()); + + uint32_t sc = RSG::mesh_storage->mesh_get_surface_count(p_base); + Vector<RID> materials; + materials.resize(sc); + + for (uint32_t i = 0; i < sc; i++) { + if (i < (uint32_t)p_material_overrides.size()) { + materials.write[i] = p_material_overrides[i]; + } + } + + gi_inst->set_surface_materials(materials); + + if (cull_argument.size() == 0) { + cull_argument.push_back(nullptr); + } + cull_argument[0] = gi_inst; + _render_uv2(cull_argument, fbo, Rect2i(0, 0, p_image_size.width, p_image_size.height)); + + geometry_instance_free(gi_inst); + + TypedArray<Image> ret; + + // Create a dummy texture so we can use texture_2d_get. + RID tex_rid = GLES3::TextureStorage::get_singleton()->texture_allocate(); + GLES3::Texture texture; + texture.width = p_image_size.width; + texture.height = p_image_size.height; + texture.alloc_width = p_image_size.width; + texture.alloc_height = p_image_size.height; + texture.format = Image::FORMAT_RGBA8; + texture.real_format = Image::FORMAT_RGBA8; + texture.gl_format_cache = GL_RGBA; + texture.gl_type_cache = GL_UNSIGNED_BYTE; + texture.type = GLES3::Texture::TYPE_2D; + texture.target = GL_TEXTURE_2D; + texture.active = true; + texture.is_render_target = true; // Enable this so the texture isn't cached in the editor. + + GLES3::TextureStorage::get_singleton()->texture_2d_initialize_from_texture(tex_rid, texture); + GLES3::Texture *tex = GLES3::TextureStorage::get_singleton()->get_texture(tex_rid); + + { + tex->tex_id = albedo_alpha_tex; + Ref<Image> img = GLES3::TextureStorage::get_singleton()->texture_2d_get(tex_rid); + GLES3::Utilities::get_singleton()->texture_free_data(albedo_alpha_tex); + ret.push_back(img); + } + + { + tex->tex_id = normal_tex; + Ref<Image> img = GLES3::TextureStorage::get_singleton()->texture_2d_get(tex_rid); + GLES3::Utilities::get_singleton()->texture_free_data(normal_tex); + ret.push_back(img); + } + + { + tex->tex_id = orm_tex; + Ref<Image> img = GLES3::TextureStorage::get_singleton()->texture_2d_get(tex_rid); + GLES3::Utilities::get_singleton()->texture_free_data(orm_tex); + ret.push_back(img); + } + + { + tex->tex_id = emission_tex; + if (config->float_texture_supported) { + tex->format = Image::FORMAT_RGBAF; + tex->real_format = Image::FORMAT_RGBAH; + tex->gl_type_cache = GL_FLOAT; + } + Ref<Image> img = GLES3::TextureStorage::get_singleton()->texture_2d_get(tex_rid); + GLES3::Utilities::get_singleton()->texture_free_data(emission_tex); + ret.push_back(img); + } + + tex->is_render_target = false; + tex->tex_id = 0; + GLES3::TextureStorage::get_singleton()->texture_free(tex_rid); + + GLES3::Utilities::get_singleton()->texture_free_data(depth_tex); + glDeleteFramebuffers(1, &fbo); + return ret; } bool RasterizerSceneGLES3::free(RID p_rid) { @@ -3546,6 +3927,8 @@ RasterizerSceneGLES3::RasterizerSceneGLES3() { GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); GLES3::Config *config = GLES3::Config::get_singleton(); + cull_argument.set_page_pool(&cull_argument_pool); + // Quality settings. use_physical_light_units = GLOBAL_GET("rendering/lights_and_shadows/use_physical_light_units"); @@ -3638,6 +4021,29 @@ void fragment() { scene_globals.default_material = material_storage->material_allocate(); material_storage->material_initialize(scene_globals.default_material); material_storage->material_set_shader(scene_globals.default_material, scene_globals.default_shader); + default_material_data_ptr = static_cast<GLES3::SceneMaterialData *>(GLES3::MaterialStorage::get_singleton()->material_get_data(scene_globals.default_material, RS::SHADER_SPATIAL)); + } + + { + // Overdraw material and shader. + scene_globals.overdraw_shader = material_storage->shader_allocate(); + material_storage->shader_initialize(scene_globals.overdraw_shader); + material_storage->shader_set_code(scene_globals.overdraw_shader, R"( +// 3D editor Overdraw debug draw mode shader. + +shader_type spatial; + +render_mode blend_add, unshaded; + +void fragment() { + ALBEDO = vec3(0.4, 0.8, 0.8); + ALPHA = 0.2; +} +)"); + scene_globals.overdraw_material = material_storage->material_allocate(); + material_storage->material_initialize(scene_globals.overdraw_material); + material_storage->material_set_shader(scene_globals.overdraw_material, scene_globals.overdraw_shader); + overdraw_material_data_ptr = static_cast<GLES3::SceneMaterialData *>(GLES3::MaterialStorage::get_singleton()->material_get_data(scene_globals.overdraw_material, RS::SHADER_SPATIAL)); } { @@ -3752,6 +4158,10 @@ RasterizerSceneGLES3::~RasterizerSceneGLES3() { RSG::material_storage->material_free(scene_globals.default_material); RSG::material_storage->shader_free(scene_globals.default_shader); + // Overdraw Shader + RSG::material_storage->material_free(scene_globals.overdraw_material); + RSG::material_storage->shader_free(scene_globals.overdraw_shader); + // Sky Shader GLES3::MaterialStorage::get_singleton()->shaders.sky_shader.version_free(sky_globals.shader_default_version); RSG::material_storage->material_free(sky_globals.default_material); diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index 045511321a..ed59aba266 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -61,6 +61,7 @@ enum PassMode { PASS_MODE_COLOR_TRANSPARENT, PASS_MODE_SHADOW, PASS_MODE_DEPTH, + PASS_MODE_MATERIAL, }; // These should share as much as possible with SkyUniform Location @@ -97,6 +98,9 @@ struct RenderDataGLES3 { bool cam_orthogonal = false; uint32_t camera_visible_layers = 0xFFFFFFFF; + // For billboards to cast correct shadows. + Transform3D main_cam_transform; + // For stereo rendering uint32_t view_count = 1; Vector3 view_eye_offset[RendererSceneRender::MAX_RENDER_VIEWS]; @@ -152,8 +156,13 @@ private: RID default_material; RID default_shader; RID cubemap_filter_shader_version; + RID overdraw_material; + RID overdraw_shader; } scene_globals; + GLES3::SceneMaterialData *default_material_data_ptr = nullptr; + GLES3::SceneMaterialData *overdraw_material_data_ptr = nullptr; + /* LIGHT INSTANCE */ struct LightData { @@ -364,13 +373,15 @@ private: float inv_view_matrix[16]; float view_matrix[16]; + float main_cam_inv_view_matrix[16]; + float viewport_size[2]; float screen_pixel_size[2]; float ambient_light_color_energy[4]; float ambient_color_sky_mix; - uint32_t material_uv2_mode; + uint32_t pad2; float emissive_exposure_normalization; uint32_t use_ambient_light = 0; @@ -387,15 +398,21 @@ private: float IBL_exposure_normalization; uint32_t fog_enabled; + uint32_t fog_mode; float fog_density; float fog_height; + float fog_height_density; + float fog_depth_curve; + float pad; + float fog_depth_begin; float fog_light_color[3]; + float fog_depth_end; + float fog_sun_scatter; float shadow_bias; - float pad; uint32_t camera_visible_layers; bool pancake_shadows; }; @@ -460,13 +477,15 @@ private: bool reverse_cull = false; uint64_t spec_constant_base_flags = 0; bool force_wireframe = false; + Vector2 uv_offset = Vector2(0, 0); - RenderListParameters(GeometryInstanceSurface **p_elements, int p_element_count, bool p_reverse_cull, uint64_t p_spec_constant_base_flags, bool p_force_wireframe = false) { + RenderListParameters(GeometryInstanceSurface **p_elements, int p_element_count, bool p_reverse_cull, uint64_t p_spec_constant_base_flags, bool p_force_wireframe = false, Vector2 p_uv_offset = Vector2()) { elements = p_elements; element_count = p_element_count; reverse_cull = p_reverse_cull; spec_constant_base_flags = p_spec_constant_base_flags; force_wireframe = p_force_wireframe; + uv_offset = p_uv_offset; } }; @@ -530,7 +549,7 @@ private: void _setup_environment(const RenderDataGLES3 *p_render_data, bool p_no_fog, const Size2i &p_screen_size, bool p_flip_y, const Color &p_default_bg_color, bool p_pancake_shadows, float p_shadow_bias = 0.0); void _fill_render_list(RenderListType p_render_list, const RenderDataGLES3 *p_render_data, PassMode p_pass_mode, bool p_append = false); void _render_shadows(const RenderDataGLES3 *p_render_data, const Size2i &p_viewport_size = Size2i(1, 1)); - void _render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<RenderGeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_mesh_lod_threshold = 0.0, RenderingMethod::RenderInfo *p_render_info = nullptr, const Size2i &p_viewport_size = Size2i(1, 1)); + void _render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<RenderGeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_mesh_lod_threshold = 0.0, RenderingMethod::RenderInfo *p_render_info = nullptr, const Size2i &p_viewport_size = Size2i(1, 1), const Transform3D &p_main_cam_transform = Transform3D()); void _render_post_processing(const RenderDataGLES3 *p_render_data); template <PassMode p_pass_mode> @@ -642,6 +661,10 @@ protected: void _draw_sky(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_luminance_multiplier, bool p_use_multiview, bool p_flip_y); void _free_sky_data(Sky *p_sky); + // Needed for a single argument calls (material and uv2). + PagedArrayPool<RenderGeometryInstance *> cull_argument_pool; + PagedArray<RenderGeometryInstance *> cull_argument; + public: static RasterizerSceneGLES3 *get_singleton() { return singleton; } @@ -742,6 +765,7 @@ public: void sub_surface_scattering_set_scale(float p_scale, float p_depth_scale) override; TypedArray<Image> bake_render_uv2(RID p_base, const TypedArray<RID> &p_material_overrides, const Size2i &p_image_size) override; + void _render_uv2(const PagedArray<RenderGeometryInstance *> &p_instances, GLuint p_framebuffer, const Rect2i &p_region); bool free(RID p_rid) override; void update() override; diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 75b2662a1c..551136ce36 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -49,7 +49,7 @@ void ShaderGLES3::_add_stage(const char *p_code, StageType p_stage_type) { String text; for (int i = 0; i < lines.size(); i++) { - String l = lines[i]; + const String &l = lines[i]; bool push_chunk = false; StageTemplate::Chunk chunk; @@ -213,6 +213,7 @@ void ShaderGLES3::_build_variant_code(StringBuilder &builder, uint32_t p_variant builder.append("precision highp sampler2D;\n"); builder.append("precision highp samplerCube;\n"); builder.append("precision highp sampler2DArray;\n"); + builder.append("precision highp sampler3D;\n"); } const StageTemplate &stage_template = stage_templates[p_stage_type]; @@ -561,7 +562,7 @@ bool ShaderGLES3::_load_from_cache(Version *p_version) { } int cache_variant_count = static_cast<int>(f->get_32()); - ERR_FAIL_COND_V_MSG(cache_variant_count != this->variant_count, false, "shader cache variant count mismatch, expected " + itos(this->variant_count) + " got " + itos(cache_variant_count)); //should not happen but check + ERR_FAIL_COND_V_MSG(cache_variant_count != variant_count, false, "shader cache variant count mismatch, expected " + itos(variant_count) + " got " + itos(cache_variant_count)); //should not happen but check LocalVector<OAHashMap<uint64_t, Version::Specialization>> variants; for (int i = 0; i < cache_variant_count; i++) { @@ -611,6 +612,7 @@ void ShaderGLES3::_save_to_cache(Version *p_version) { #ifdef WEB_ENABLED // not supported in webgl return; #else + ERR_FAIL_COND(!shader_cache_dir_valid); #if !defined(ANDROID_ENABLED) && !defined(IOS_ENABLED) if (RasterizerGLES3::is_gles_over_gl() && (glGetProgramBinary == NULL)) { // ARB_get_program_binary extension not available. return; diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index 0cb53da316..8968e76c12 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -209,7 +209,9 @@ protected: _compile_specialization(s, p_variant, version, p_specialization); version->variants[p_variant].insert(p_specialization, s); spec = version->variants[p_variant].lookup_ptr(p_specialization); - _save_to_cache(version); + if (shader_cache_dir_valid) { + _save_to_cache(version); + } } } else if (spec->build_queued) { // Still queued, wait diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index ce8fe25625..80e28cf9fc 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -9,7 +9,7 @@ mode_instanced = #define USE_ATTRIBUTES \n#define USE_INSTANCING #[specializations] -DISABLE_LIGHTING = false +DISABLE_LIGHTING = true USE_RGBA_SHADOWS = false SINGLE_INSTANCE = false @@ -32,18 +32,26 @@ layout(location = 5) in highp uvec4 instance_color_custom_data; // Color packed #include "stdlib_inc.glsl" -layout(location = 6) in highp vec4 attrib_A; -layout(location = 7) in highp vec4 attrib_B; -layout(location = 8) in highp vec4 attrib_C; -layout(location = 9) in highp vec4 attrib_D; -layout(location = 10) in highp vec4 attrib_E; +#if defined(CUSTOM0_USED) +layout(location = 6) in highp vec4 custom0_attrib; +#endif + +#if defined(CUSTOM1_USED) +layout(location = 7) in highp vec4 custom1_attrib; +#endif + +layout(location = 8) in highp vec4 attrib_A; +layout(location = 9) in highp vec4 attrib_B; +layout(location = 10) in highp vec4 attrib_C; +layout(location = 11) in highp vec4 attrib_D; +layout(location = 12) in highp vec4 attrib_E; #ifdef USE_PRIMITIVE -layout(location = 11) in highp uvec4 attrib_F; +layout(location = 13) in highp uvec4 attrib_F; #else -layout(location = 11) in highp vec4 attrib_F; +layout(location = 13) in highp vec4 attrib_F; #endif -layout(location = 12) in highp uvec4 attrib_G; -layout(location = 13) in highp uvec4 attrib_H; +layout(location = 14) in highp uvec4 attrib_G; +layout(location = 15) in highp uvec4 attrib_H; #define read_draw_data_world_x attrib_A.xy #define read_draw_data_world_y attrib_A.zw @@ -137,6 +145,13 @@ void main() { vec4 instance_custom = vec4(0.0); +#if defined(CUSTOM0_USED) + vec4 custom0 = vec4(0.0); +#endif +#if defined(CUSTOM1_USED) + vec4 custom1 = vec4(0.0); +#endif + #ifdef USE_PRIMITIVE vec2 vertex; vec2 uv; @@ -169,9 +184,9 @@ void main() { if (bool(read_draw_data_flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) { instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w)); } -#endif +#endif // !USE_INSTANCING -#else +#else // !USE_ATTRIBUTES vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0)); vec2 vertex_base = vertex_base_arr[gl_VertexID % 6]; @@ -179,6 +194,14 @@ void main() { vec4 color = read_draw_data_modulation; vec2 vertex = read_draw_data_dst_rect.xy + abs(read_draw_data_dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(read_draw_data_src_rect.zw, vec2(0.0, 0.0))); +#endif // USE_ATTRIBUTES + +#if defined(CUSTOM0_USED) + custom0 = custom0_attrib; +#endif + +#if defined(CUSTOM1_USED) + custom1 = custom1_attrib; #endif mat4 model_matrix = mat4(vec4(read_draw_data_world_x, 0.0, 0.0), vec4(read_draw_data_world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(read_draw_data_world_ofs, 0.0, 1.0)); @@ -587,6 +610,9 @@ void main() { if (normal_used || (using_light && bool(read_draw_data_flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) { normal.xy = texture(normal_texture, uv).xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); + if (bool(read_draw_data_flags & FLAGS_TRANSPOSE_RECT)) { + normal.xy = normal.yx; + } if (bool(read_draw_data_flags & FLAGS_FLIP_H)) { normal.x = -normal.x; } diff --git a/drivers/gles3/shaders/copy.glsl b/drivers/gles3/shaders/copy.glsl index 265acc1430..db63b5d348 100644 --- a/drivers/gles3/shaders/copy.glsl +++ b/drivers/gles3/shaders/copy.glsl @@ -3,10 +3,14 @@ mode_default = #define MODE_SIMPLE_COPY mode_copy_section = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY +mode_copy_section_source = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY \n#define MODE_COPY_FROM +mode_copy_section_3d = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY \n#define USE_TEXTURE_3D +mode_copy_section_2d_array = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY \n#define USE_TEXTURE_2D_ARRAY mode_gaussian_blur = #define MODE_GAUSSIAN_BLUR mode_mipmap = #define MODE_MIPMAP mode_simple_color = #define MODE_SIMPLE_COLOR \n#define USE_COPY_SECTION mode_cube_to_octahedral = #define CUBE_TO_OCTAHEDRAL \n#define USE_COPY_SECTION +mode_cube_to_panorama = #define CUBE_TO_PANORAMA #[specializations] @@ -21,7 +25,7 @@ out vec2 uv_interp; // Defined in 0-1 coords. uniform highp vec4 copy_section; #endif -#ifdef MODE_GAUSSIAN_BLUR +#if defined(MODE_GAUSSIAN_BLUR) || defined(MODE_COPY_FROM) uniform highp vec4 source_section; #endif @@ -32,7 +36,7 @@ void main() { #if defined(USE_COPY_SECTION) || defined(MODE_GAUSSIAN_BLUR) gl_Position.xy = (copy_section.xy + uv_interp.xy * copy_section.zw) * 2.0 - 1.0; #endif -#ifdef MODE_GAUSSIAN_BLUR +#if defined(MODE_GAUSSIAN_BLUR) || defined(MODE_COPY_FROM) uv_interp = source_section.xy + uv_interp * source_section.zw; #endif } @@ -42,6 +46,11 @@ void main() { in vec2 uv_interp; /* clang-format on */ +#if defined(USE_TEXTURE_3D) || defined(USE_TEXTURE_2D_ARRAY) +uniform float layer; +uniform float lod; +#endif + #ifdef MODE_SIMPLE_COLOR uniform vec4 color_in; #endif @@ -52,24 +61,52 @@ uniform highp vec2 pixel_size; #endif #ifdef CUBE_TO_OCTAHEDRAL -uniform samplerCube source_cube; // texunit:0 - vec3 oct_to_vec3(vec2 e) { vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); float t = max(-v.z, 0.0); v.xy += t * -sign(v.xy); return normalize(v); } +#endif + +#ifdef CUBE_TO_PANORAMA +uniform lowp float mip_level; +#endif + +#if defined(CUBE_TO_OCTAHEDRAL) || defined(CUBE_TO_PANORAMA) +uniform samplerCube source_cube; // texunit:0 + +#else // ~(defined(CUBE_TO_OCTAHEDRAL) || defined(CUBE_TO_PANORAMA)) + +#if defined(USE_TEXTURE_3D) +uniform sampler3D source_3d; // texunit:0 +#elif defined(USE_TEXTURE_2D_ARRAY) +uniform sampler2DArray source_2d_array; // texunit:0 #else uniform sampler2D source; // texunit:0 - #endif +#endif // !(defined(CUBE_TO_OCTAHEDRAL) || defined(CUBE_TO_PANORAMA)) + layout(location = 0) out vec4 frag_color; +// This expects 0-1 range input, outside that range it behaves poorly. +vec3 srgb_to_linear(vec3 color) { + // Approximation from http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html + return color * (color * (color * 0.305306011 + 0.682171111) + 0.012522878); +} + void main() { #ifdef MODE_SIMPLE_COPY + +#ifdef USE_TEXTURE_3D + vec4 color = textureLod(source_3d, vec3(uv_interp, layer), lod); +#elif defined(USE_TEXTURE_2D_ARRAY) + vec4 color = textureLod(source_2d_array, vec3(uv_interp, layer), lod); +#else vec4 color = texture(source, uv_interp); +#endif + frag_color = color; #endif @@ -110,4 +147,21 @@ void main() { frag_color = texture(source_cube, dir); #endif + +#ifdef CUBE_TO_PANORAMA + + const float PI = 3.14159265359; + + float phi = uv_interp.x * 2.0 * PI; + float theta = uv_interp.y * PI; + + vec3 normal; + normal.x = sin(phi) * sin(theta) * -1.0; + normal.y = cos(theta); + normal.z = cos(phi) * sin(theta) * -1.0; + + vec3 color = srgb_to_linear(textureLod(source_cube, normal, mip_level).rgb); + frag_color = vec4(color, 1.0); + +#endif } diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 1d9ba623c4..667cbb3d90 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -13,6 +13,7 @@ DISABLE_LIGHT_DIRECTIONAL = false DISABLE_LIGHT_OMNI = false DISABLE_LIGHT_SPOT = false DISABLE_FOG = false +USE_DEPTH_FOG = false USE_RADIANCE_MAP = true USE_LIGHTMAP = false USE_SH_LIGHTMAP = false @@ -31,6 +32,7 @@ USE_ADDITIVE_LIGHTING = false // these are false, we are doing a directional light pass. ADDITIVE_OMNI = false ADDITIVE_SPOT = false +RENDER_MATERIAL = false #[vertex] @@ -90,7 +92,7 @@ layout(location = 3) in vec4 color_attrib; layout(location = 4) in vec2 uv_attrib; #endif -#if defined(UV2_USED) || defined(USE_LIGHTMAP) +#if defined(UV2_USED) || defined(USE_LIGHTMAP) || defined(RENDER_MATERIAL) layout(location = 5) in vec2 uv2_attrib; #endif @@ -154,18 +156,21 @@ layout(std140) uniform SceneData { // ubo:2 highp mat4 inv_view_matrix; highp mat4 view_matrix; + // Used for billboards to cast correct shadows. + highp mat4 main_cam_inv_view_matrix; + vec2 viewport_size; vec2 screen_pixel_size; mediump vec4 ambient_light_color_energy; mediump float ambient_color_sky_mix; - bool material_uv2_mode; + float pad2; float emissive_exposure_normalization; bool use_ambient_light; + bool use_ambient_cubemap; bool use_reflection_cubemap; - float fog_aerial_perspective; float time; @@ -177,15 +182,21 @@ layout(std140) uniform SceneData { // ubo:2 float IBL_exposure_normalization; bool fog_enabled; + uint fog_mode; float fog_density; float fog_height; float fog_height_density; + float fog_depth_curve; + float pad; + float fog_depth_begin; + vec3 fog_light_color; + float fog_depth_end; + float fog_sun_scatter; float shadow_bias; - float pad; uint camera_visible_layers; bool pancake_shadows; } @@ -249,6 +260,10 @@ uniform highp vec4 uv_scale; uniform highp uint model_flags; +#ifdef RENDER_MATERIAL +uniform mediump vec2 uv_offset; +#endif + /* Varyings */ out highp vec3 vertex_interp; @@ -511,6 +526,12 @@ void main() { #else gl_Position = projection_matrix * vec4(vertex_interp, 1.0); #endif + +#ifdef RENDER_MATERIAL + gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0; + gl_Position.z = 0.00001; + gl_Position.w = 1.0; +#endif } /* clang-format off */ @@ -607,8 +628,7 @@ layout(std140) uniform GlobalShaderUniformData { //ubo:1 vec4 global_shader_uniforms[MAX_GLOBAL_SHADER_UNIFORMS]; }; - /* Material Uniforms */ - +/* Material Uniforms */ #ifdef MATERIAL_UNIFORMS_USED /* clang-format off */ @@ -627,18 +647,21 @@ layout(std140) uniform SceneData { // ubo:2 highp mat4 inv_view_matrix; highp mat4 view_matrix; + // Used for billboards to cast correct shadows. + highp mat4 main_cam_inv_view_matrix; + vec2 viewport_size; vec2 screen_pixel_size; mediump vec4 ambient_light_color_energy; mediump float ambient_color_sky_mix; - bool material_uv2_mode; + float pad2; float emissive_exposure_normalization; bool use_ambient_light; + bool use_ambient_cubemap; bool use_reflection_cubemap; - float fog_aerial_perspective; float time; @@ -650,15 +673,21 @@ layout(std140) uniform SceneData { // ubo:2 float IBL_exposure_normalization; bool fog_enabled; + uint fog_mode; float fog_density; float fog_height; float fog_height_density; + float fog_depth_curve; + float pad; + float fog_depth_begin; + vec3 fog_light_color; + float fog_depth_end; + float fog_sun_scatter; float shadow_bias; - float pad; uint camera_visible_layers; bool pancake_shadows; } @@ -868,19 +897,37 @@ uniform highp sampler2DArray color_buffer; // texunit:-5 vec3 multiview_uv(vec2 uv) { return vec3(uv, ViewIndex); } +ivec3 multiview_uv(ivec2 uv) { + return ivec3(uv, int(ViewIndex)); +} #else uniform highp sampler2D depth_buffer; // texunit:-6 uniform highp sampler2D color_buffer; // texunit:-5 vec2 multiview_uv(vec2 uv) { return uv; } +ivec2 multiview_uv(ivec2 uv) { + return uv; +} #endif uniform highp mat4 world_transform; uniform mediump float opaque_prepass_threshold; +#ifndef MODE_RENDER_DEPTH +#ifdef RENDER_MATERIAL +layout(location = 0) out vec4 albedo_output_buffer; +layout(location = 1) out vec4 normal_output_buffer; +layout(location = 2) out vec4 orm_output_buffer; +layout(location = 3) out vec4 emission_output_buffer; + +#else // !RENDER_MATERIAL +// Normal color rendering. layout(location = 0) out vec4 frag_color; +#endif // !RENDER_MATERIAL +#endif // !MODE_RENDER_DEPTH + vec3 F0(float metallic, float specular, vec3 albedo) { float dielectric = 0.16 * specular * specular; // use albedo * metallic as colored specular reflectance at 0 angle for metallic materials; @@ -937,7 +984,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, bool is_di #endif inout vec3 diffuse_light, inout vec3 specular_light) { -#if defined(USE_LIGHT_SHADER_CODE) +#if defined(LIGHT_CODE_USED) // light is written by the light shader highp mat4 model_matrix = world_transform; @@ -1073,7 +1120,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, bool is_di alpha = min(alpha, clamp(1.0 - attenuation, 0.0, 1.0)); #endif -#endif // USE_LIGHT_SHADER_CODE +#endif // LIGHT_CODE_USED } float get_omni_spot_attenuation(float distance, float inv_range, float decay) { @@ -1216,7 +1263,14 @@ vec4 fog_process(vec3 vertex) { } #endif // !DISABLE_LIGHT_DIRECTIONAL - float fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data.fog_density)); + float fog_amount = 0.0; + +#ifdef USE_DEPTH_FOG + float fog_z = smoothstep(scene_data.fog_depth_begin, scene_data.fog_depth_end, length(vertex)); + fog_amount = pow(fog_z, scene_data.fog_depth_curve) * scene_data.fog_density; +#else + fog_amount = 1 - exp(min(0.0, -length(vertex) * scene_data.fog_density)); +#endif // USE_DEPTH_FOG if (abs(scene_data.fog_height_density) >= 0.0001) { float y = (scene_data.inv_view_matrix * vec4(vertex, 1.0)).y; @@ -1661,6 +1715,23 @@ void main() { // Nothing happens, so a tree-ssa optimizer will result in no fragment shader :) #else // !MODE_RENDER_DEPTH + +#ifdef RENDER_MATERIAL + + albedo_output_buffer.rgb = albedo; + albedo_output_buffer.a = alpha; + + normal_output_buffer.rgb = normal * 0.5 + 0.5; + normal_output_buffer.a = 0.0; + + orm_output_buffer.r = ao; + orm_output_buffer.g = roughness; + orm_output_buffer.b = metallic; + orm_output_buffer.a = 1.0; + + emission_output_buffer.rgb = emission; + emission_output_buffer.a = 0.0; +#else // !RENDER_MATERIAL #ifdef BASE_PASS #ifdef MODE_UNSHADED frag_color = vec4(albedo, alpha); @@ -1915,6 +1986,6 @@ void main() { frag_color.rgb += additive_light_color; #endif // USE_ADDITIVE_LIGHTING - +#endif // !RENDER_MATERIAL #endif //!MODE_RENDER_DEPTH } diff --git a/drivers/gles3/storage/light_storage.cpp b/drivers/gles3/storage/light_storage.cpp index 2607a133d6..5421f57646 100644 --- a/drivers/gles3/storage/light_storage.cpp +++ b/drivers/gles3/storage/light_storage.cpp @@ -468,6 +468,9 @@ void LightStorage::reflection_probe_set_enable_shadows(RID p_probe, bool p_enabl void LightStorage::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers) { } +void LightStorage::reflection_probe_set_reflection_mask(RID p_probe, uint32_t p_layers) { +} + void LightStorage::reflection_probe_set_resolution(RID p_probe, int p_resolution) { } @@ -483,6 +486,10 @@ uint32_t LightStorage::reflection_probe_get_cull_mask(RID p_probe) const { return 0; } +uint32_t LightStorage::reflection_probe_get_reflection_mask(RID p_probe) const { + return 0; +} + Vector3 LightStorage::reflection_probe_get_size(RID p_probe) const { return Vector3(); } diff --git a/drivers/gles3/storage/light_storage.h b/drivers/gles3/storage/light_storage.h index 7ab0286098..96e6200219 100644 --- a/drivers/gles3/storage/light_storage.h +++ b/drivers/gles3/storage/light_storage.h @@ -575,6 +575,7 @@ public: virtual void reflection_probe_set_enable_box_projection(RID p_probe, bool p_enable) override; virtual void reflection_probe_set_enable_shadows(RID p_probe, bool p_enable) override; virtual void reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers) override; + virtual void reflection_probe_set_reflection_mask(RID p_probe, uint32_t p_layers) override; virtual void reflection_probe_set_resolution(RID p_probe, int p_resolution) override; virtual void reflection_probe_set_mesh_lod_threshold(RID p_probe, float p_ratio) override; virtual float reflection_probe_get_mesh_lod_threshold(RID p_probe) const override; @@ -582,6 +583,7 @@ public: virtual AABB reflection_probe_get_aabb(RID p_probe) const override; virtual RS::ReflectionProbeUpdateMode reflection_probe_get_update_mode(RID p_probe) const override; virtual uint32_t reflection_probe_get_cull_mask(RID p_probe) const override; + virtual uint32_t reflection_probe_get_reflection_mask(RID p_probe) const override; virtual Vector3 reflection_probe_get_size(RID p_probe) const override; virtual Vector3 reflection_probe_get_origin_offset(RID p_probe) const override; virtual float reflection_probe_get_origin_max_distance(RID p_probe) const override; diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index 13ab05c0a0..5600449d00 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -1160,6 +1160,9 @@ MaterialStorage::MaterialStorage() { actions.renames["INSTANCE_ID"] = "gl_InstanceID"; actions.renames["VERTEX_ID"] = "gl_VertexID"; + actions.renames["CUSTOM0"] = "custom0"; + actions.renames["CUSTOM1"] = "custom1"; + actions.renames["LIGHT_POSITION"] = "light_position"; actions.renames["LIGHT_DIRECTION"] = "light_direction"; actions.renames["LIGHT_IS_DIRECTIONAL"] = "is_directional"; @@ -1178,8 +1181,9 @@ MaterialStorage::MaterialStorage() { actions.usage_defines["SCREEN_PIXEL_SIZE"] = "@SCREEN_UV"; actions.usage_defines["NORMAL"] = "#define NORMAL_USED\n"; actions.usage_defines["NORMAL_MAP"] = "#define NORMAL_MAP_USED\n"; - actions.usage_defines["LIGHT"] = "#define LIGHT_SHADER_CODE_USED\n"; actions.usage_defines["SPECULAR_SHININESS"] = "#define SPECULAR_SHININESS_USED\n"; + actions.usage_defines["CUSTOM0"] = "#define CUSTOM0_USED\n"; + actions.usage_defines["CUSTOM1"] = "#define CUSTOM1_USED\n"; actions.render_mode_defines["skip_vertex_transform"] = "#define SKIP_TRANSFORM_USED\n"; actions.render_mode_defines["unshaded"] = "#define MODE_UNSHADED\n"; @@ -1205,6 +1209,7 @@ MaterialStorage::MaterialStorage() { actions.renames["INV_PROJECTION_MATRIX"] = "inv_projection_matrix"; actions.renames["MODELVIEW_MATRIX"] = "modelview"; actions.renames["MODELVIEW_NORMAL_MATRIX"] = "modelview_normal"; + actions.renames["MAIN_CAM_INV_VIEW_MATRIX"] = "scene_data.main_cam_inv_view_matrix"; actions.renames["VERTEX"] = "vertex"; actions.renames["NORMAL"] = "normal"; @@ -1326,9 +1331,6 @@ MaterialStorage::MaterialStorage() { actions.usage_defines["BACKLIGHT"] = "#define LIGHT_BACKLIGHT_USED\n"; actions.usage_defines["SCREEN_UV"] = "#define SCREEN_UV_USED\n"; - actions.usage_defines["DIFFUSE_LIGHT"] = "#define USE_LIGHT_SHADER_CODE\n"; - actions.usage_defines["SPECULAR_LIGHT"] = "#define USE_LIGHT_SHADER_CODE\n"; - actions.usage_defines["FOG"] = "#define CUSTOM_FOG_USED\n"; actions.usage_defines["RADIANCE"] = "#define CUSTOM_RADIANCE_USED\n"; actions.usage_defines["IRRADIANCE"] = "#define CUSTOM_IRRADIANCE_USED\n"; @@ -2541,6 +2543,8 @@ void CanvasShaderData::set_code(const String &p_code) { uses_screen_texture_mipmaps = false; uses_sdf = false; uses_time = false; + uses_custom0 = false; + uses_custom1 = false; if (code.is_empty()) { return; // Just invalid, but no error. @@ -2565,6 +2569,8 @@ void CanvasShaderData::set_code(const String &p_code) { actions.usage_flag_pointers["texture_sdf"] = &uses_sdf; actions.usage_flag_pointers["TIME"] = &uses_time; + actions.usage_flag_pointers["CUSTOM0"] = &uses_custom0; + actions.usage_flag_pointers["CUSTOM1"] = &uses_custom1; actions.uniforms = &uniforms; Error err = MaterialStorage::get_singleton()->shaders.compiler_canvas.compile(RS::SHADER_CANVAS_ITEM, code, &actions, path, gen_code); @@ -2601,6 +2607,10 @@ void CanvasShaderData::set_code(const String &p_code) { MaterialStorage::get_singleton()->shaders.canvas_shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines, texture_uniform_data); ERR_FAIL_COND(!MaterialStorage::get_singleton()->shaders.canvas_shader.version_is_valid(version)); + vertex_input_mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_COLOR | RS::ARRAY_TEX_UV; + vertex_input_mask |= uses_custom0 << RS::ARRAY_CUSTOM0; + vertex_input_mask |= uses_custom1 << RS::ARRAY_CUSTOM1; + ubo_size = gen_code.uniform_total_size; ubo_offsets = gen_code.uniform_offsets; texture_uniforms = gen_code.texture_uniforms; diff --git a/drivers/gles3/storage/material_storage.h b/drivers/gles3/storage/material_storage.h index 3c5080c6e1..59f5682362 100644 --- a/drivers/gles3/storage/material_storage.h +++ b/drivers/gles3/storage/material_storage.h @@ -168,6 +168,10 @@ struct CanvasShaderData : public ShaderData { bool uses_screen_texture_mipmaps; bool uses_sdf; bool uses_time; + bool uses_custom0; + bool uses_custom1; + + uint64_t vertex_input_mask; virtual void set_code(const String &p_Code); virtual bool is_animated() const; diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index e5080b39a3..8ab66e2bc6 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -471,7 +471,7 @@ void MeshStorage::mesh_surface_update_vertex_region(RID p_mesh, int p_surface, i Mesh *mesh = mesh_owner.get_or_null(p_mesh); ERR_FAIL_NULL(mesh); ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); - ERR_FAIL_COND(p_data.size() == 0); + ERR_FAIL_COND(p_data.is_empty()); uint64_t data_size = p_data.size(); ERR_FAIL_COND(p_offset + data_size > mesh->surfaces[p_surface]->vertex_buffer_size); @@ -486,7 +486,7 @@ void MeshStorage::mesh_surface_update_attribute_region(RID p_mesh, int p_surface Mesh *mesh = mesh_owner.get_or_null(p_mesh); ERR_FAIL_NULL(mesh); ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); - ERR_FAIL_COND(p_data.size() == 0); + ERR_FAIL_COND(p_data.is_empty()); uint64_t data_size = p_data.size(); ERR_FAIL_COND(p_offset + data_size > mesh->surfaces[p_surface]->attribute_buffer_size); @@ -501,7 +501,7 @@ void MeshStorage::mesh_surface_update_skin_region(RID p_mesh, int p_surface, int Mesh *mesh = mesh_owner.get_or_null(p_mesh); ERR_FAIL_NULL(mesh); ERR_FAIL_UNSIGNED_INDEX((uint32_t)p_surface, mesh->surface_count); - ERR_FAIL_COND(p_data.size() == 0); + ERR_FAIL_COND(p_data.is_empty()); uint64_t data_size = p_data.size(); ERR_FAIL_COND(p_offset + data_size > mesh->surfaces[p_surface]->skin_buffer_size); @@ -727,6 +727,20 @@ AABB MeshStorage::mesh_get_aabb(RID p_mesh, RID p_skeleton) { return aabb; } +void MeshStorage::mesh_set_path(RID p_mesh, const String &p_path) { + Mesh *mesh = mesh_owner.get_or_null(p_mesh); + ERR_FAIL_NULL(mesh); + + mesh->path = p_path; +} + +String MeshStorage::mesh_get_path(RID p_mesh) const { + Mesh *mesh = mesh_owner.get_or_null(p_mesh); + ERR_FAIL_NULL_V(mesh, String()); + + return mesh->path; +} + void MeshStorage::mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh) { Mesh *mesh = mesh_owner.get_or_null(p_mesh); ERR_FAIL_NULL(mesh); @@ -1538,7 +1552,7 @@ void MeshStorage::_multimesh_make_local(MultiMesh *multimesh) const { memset(w, 0, (size_t)multimesh->instances * multimesh->stride_cache * sizeof(float)); } } - uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + uint32_t data_cache_dirty_region_count = Math::division_round_up(multimesh->instances, MULTIMESH_DIRTY_REGION_SIZE); multimesh->data_cache_dirty_regions = memnew_arr(bool, data_cache_dirty_region_count); for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { multimesh->data_cache_dirty_regions[i] = false; @@ -1549,7 +1563,7 @@ void MeshStorage::_multimesh_make_local(MultiMesh *multimesh) const { void MeshStorage::_multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb) { uint32_t region_index = p_index / MULTIMESH_DIRTY_REGION_SIZE; #ifdef DEBUG_ENABLED - uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + uint32_t data_cache_dirty_region_count = Math::division_round_up(multimesh->instances, MULTIMESH_DIRTY_REGION_SIZE); ERR_FAIL_UNSIGNED_INDEX(region_index, data_cache_dirty_region_count); //bug #endif if (!multimesh->data_cache_dirty_regions[region_index]) { @@ -1570,7 +1584,7 @@ void MeshStorage::_multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool void MeshStorage::_multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, bool p_aabb) { if (p_data) { - uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + uint32_t data_cache_dirty_region_count = Math::division_round_up(multimesh->instances, MULTIMESH_DIRTY_REGION_SIZE); for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { if (!multimesh->data_cache_dirty_regions[i]) { @@ -1593,6 +1607,9 @@ void MeshStorage::_multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, b void MeshStorage::_multimesh_re_create_aabb(MultiMesh *multimesh, const float *p_data, int p_instances) { ERR_FAIL_COND(multimesh->mesh.is_null()); + if (multimesh->custom_aabb != AABB()) { + return; + } AABB aabb; AABB mesh_aabb = mesh_get_aabb(multimesh->mesh); for (int i = 0; i < p_instances; i++) { @@ -1735,9 +1752,25 @@ RID MeshStorage::multimesh_get_mesh(RID p_multimesh) const { return multimesh->mesh; } +void MeshStorage::multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) { + MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); + ERR_FAIL_NULL(multimesh); + multimesh->custom_aabb = p_aabb; + multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); +} + +AABB MeshStorage::multimesh_get_custom_aabb(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); + ERR_FAIL_NULL_V(multimesh, AABB()); + return multimesh->custom_aabb; +} + AABB MeshStorage::multimesh_get_aabb(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, AABB()); + if (multimesh->custom_aabb != AABB()) { + return multimesh->custom_aabb; + } if (multimesh->aabb_dirty) { const_cast<MeshStorage *>(this)->_update_dirty_multimeshes(); } @@ -1917,7 +1950,7 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_b multimesh->data_cache = multimesh->data_cache; { //clear dirty since nothing will be dirty anymore - uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + uint32_t data_cache_dirty_region_count = Math::division_round_up(multimesh->instances, MULTIMESH_DIRTY_REGION_SIZE); for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { multimesh->data_cache_dirty_regions[i] = false; } @@ -1929,8 +1962,10 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_b //if we have a mesh set, we need to re-generate the AABB from the new data const float *data = p_buffer.ptr(); - _multimesh_re_create_aabb(multimesh, data, multimesh->instances); - multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); + if (multimesh->custom_aabb == AABB()) { + _multimesh_re_create_aabb(multimesh, data, multimesh->instances); + multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); + } } } @@ -2044,8 +2079,8 @@ void MeshStorage::_update_dirty_multimeshes() { uint32_t visible_instances = multimesh->visible_instances >= 0 ? multimesh->visible_instances : multimesh->instances; if (multimesh->data_cache_used_dirty_regions) { - uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; - uint32_t visible_region_count = visible_instances == 0 ? 0 : (visible_instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1; + uint32_t data_cache_dirty_region_count = Math::division_round_up(multimesh->instances, (int)MULTIMESH_DIRTY_REGION_SIZE); + uint32_t visible_region_count = visible_instances == 0 ? 0 : Math::division_round_up(visible_instances, (uint32_t)MULTIMESH_DIRTY_REGION_SIZE); GLint region_size = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * sizeof(float); @@ -2077,9 +2112,11 @@ void MeshStorage::_update_dirty_multimeshes() { } if (multimesh->aabb_dirty && multimesh->mesh.is_valid()) { - _multimesh_re_create_aabb(multimesh, data, visible_instances); multimesh->aabb_dirty = false; - multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); + if (multimesh->custom_aabb == AABB()) { + _multimesh_re_create_aabb(multimesh, data, visible_instances); + multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); + } } } diff --git a/drivers/gles3/storage/mesh_storage.h b/drivers/gles3/storage/mesh_storage.h index 217c4dabf0..d246e7725c 100644 --- a/drivers/gles3/storage/mesh_storage.h +++ b/drivers/gles3/storage/mesh_storage.h @@ -142,6 +142,8 @@ struct Mesh { RID shadow_mesh; HashSet<Mesh *> shadow_owners; + String path; + Dependency dependency; }; @@ -187,6 +189,7 @@ struct MultiMesh { bool uses_custom_data = false; int visible_instances = -1; AABB aabb; + AABB custom_aabb; bool aabb_dirty = false; bool buffer_set = false; uint32_t stride_cache = 0; @@ -304,8 +307,11 @@ public: virtual void mesh_set_custom_aabb(RID p_mesh, const AABB &p_aabb) override; virtual AABB mesh_get_custom_aabb(RID p_mesh) const override; - virtual AABB mesh_get_aabb(RID p_mesh, RID p_skeleton = RID()) override; + + virtual void mesh_set_path(RID p_mesh, const String &p_path) override; + virtual String mesh_get_path(RID p_mesh) const override; + virtual void mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh) override; virtual void mesh_clear(RID p_mesh) override; @@ -500,6 +506,8 @@ public: virtual void multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) override; virtual RID multimesh_get_mesh(RID p_multimesh) const override; + virtual void multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) override; + virtual AABB multimesh_get_custom_aabb(RID p_multimesh) const override; virtual AABB multimesh_get_aabb(RID p_multimesh) const override; virtual Transform3D multimesh_instance_get_transform(RID p_multimesh, int p_index) const override; diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h index 7a6811bf3c..39aa1fb973 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.h +++ b/drivers/gles3/storage/render_scene_buffers_gles3.h @@ -108,7 +108,7 @@ public: void free_render_buffer_data(); - void check_backbuffer(bool p_need_color, bool p_need_depth); // check if we need to initialise our backbuffer + void check_backbuffer(bool p_need_color, bool p_need_depth); // Check if we need to initialize our backbuffer. GLuint get_render_fbo(); GLuint get_msaa3d_fbo() const { return msaa3d.fbo; } diff --git a/drivers/gles3/storage/texture_storage.cpp b/drivers/gles3/storage/texture_storage.cpp index 4e34fbcf0a..bd4793f4dc 100644 --- a/drivers/gles3/storage/texture_storage.cpp +++ b/drivers/gles3/storage/texture_storage.cpp @@ -77,19 +77,24 @@ TextureStorage::TextureStorage() { default_gl_textures[DEFAULT_GL_TEXTURE_2D_ARRAY_WHITE] = texture_allocate(); texture_2d_layered_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_2D_ARRAY_WHITE], images, RS::TEXTURE_LAYERED_2D_ARRAY); - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 5; i++) { images.push_back(image); } - default_gl_textures[DEFAULT_GL_TEXTURE_3D_WHITE] = texture_allocate(); - texture_3d_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_3D_WHITE], image->get_format(), 4, 4, 4, false, images); + default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_WHITE] = texture_allocate(); + texture_2d_layered_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_WHITE], images, RS::TEXTURE_LAYERED_CUBEMAP); + } + + { + Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); + image->fill(Color(1, 1, 1, 1)); - for (int i = 0; i < 2; i++) { + Vector<Ref<Image>> images; + for (int i = 0; i < 4; i++) { images.push_back(image); } - - default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_WHITE] = texture_allocate(); - texture_2d_layered_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_WHITE], images, RS::TEXTURE_LAYERED_CUBEMAP); + default_gl_textures[DEFAULT_GL_TEXTURE_3D_WHITE] = texture_allocate(); + texture_3d_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_3D_WHITE], image->get_format(), 4, 4, 4, false, images); } { // black @@ -101,19 +106,23 @@ TextureStorage::TextureStorage() { texture_2d_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_BLACK], image); Vector<Ref<Image>> images; - - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 6; i++) { images.push_back(image); } + default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_BLACK] = texture_allocate(); + texture_2d_layered_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_BLACK], images, RS::TEXTURE_LAYERED_CUBEMAP); + } - default_gl_textures[DEFAULT_GL_TEXTURE_3D_BLACK] = texture_allocate(); - texture_3d_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_3D_BLACK], image->get_format(), 4, 4, 4, false, images); + { + Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); + image->fill(Color()); - for (int i = 0; i < 2; i++) { + Vector<Ref<Image>> images; + for (int i = 0; i < 4; i++) { images.push_back(image); } - default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_BLACK] = texture_allocate(); - texture_2d_layered_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_CUBEMAP_BLACK], images, RS::TEXTURE_LAYERED_CUBEMAP); + default_gl_textures[DEFAULT_GL_TEXTURE_3D_BLACK] = texture_allocate(); + texture_3d_initialize(default_gl_textures[DEFAULT_GL_TEXTURE_3D_BLACK], image->get_format(), 4, 4, 4, false, images); } { // transparent black @@ -766,7 +775,7 @@ void TextureStorage::texture_2d_layered_initialize(RID p_texture, const Vector<R ERR_FAIL_COND(p_layered_type == RS::TEXTURE_LAYERED_CUBEMAP && p_layers.size() != 6); ERR_FAIL_COND_MSG(p_layered_type == RS::TEXTURE_LAYERED_CUBEMAP_ARRAY, "Cubemap Arrays are not supported in the GL Compatibility backend."); - Ref<Image> image = p_layers[0]; + const Ref<Image> &image = p_layers[0]; { int valid_width = 0; int valid_height = 0; @@ -812,8 +821,42 @@ void TextureStorage::texture_2d_layered_initialize(RID p_texture, const Vector<R } } -void TextureStorage::texture_3d_initialize(RID p_texture, Image::Format, int p_width, int p_height, int p_depth, bool p_mipmaps, const Vector<Ref<Image>> &p_data) { - texture_owner.initialize_rid(p_texture, Texture()); +void TextureStorage::texture_3d_initialize(RID p_texture, Image::Format p_format, int p_width, int p_height, int p_depth, bool p_mipmaps, const Vector<Ref<Image>> &p_data) { + ERR_FAIL_COND(p_data.is_empty()); + + Image::Image3DValidateError verr = Image::validate_3d_image(p_format, p_width, p_height, p_depth, p_mipmaps, p_data); + ERR_FAIL_COND_MSG(verr != Image::VALIDATE_3D_OK, Image::get_3d_image_validation_error_text(verr)); + + Ref<Image> image = p_data[0]; + int mipmap_count = 0; + { + Size2i prev_size; + for (int i = 0; i < p_data.size(); i++) { + Size2i img_size(p_data[i]->get_width(), p_data[i]->get_height()); + if (img_size != prev_size) { + mipmap_count++; + } + prev_size = img_size; + } + } + + Texture texture; + texture.width = p_width; + texture.height = p_height; + texture.depth = p_depth; + texture.alloc_width = texture.width; + texture.alloc_height = texture.height; + texture.mipmaps = mipmap_count; + texture.format = image->get_format(); + texture.type = Texture::TYPE_3D; + texture.target = GL_TEXTURE_3D; + _get_gl_image_and_format(Ref<Image>(), texture.format, texture.real_format, texture.gl_format_cache, texture.gl_internal_format_cache, texture.gl_type_cache, texture.compressed, false); + texture.total_data_size = p_data[0]->get_image_data_size(texture.width, texture.height, texture.format, texture.mipmaps) * texture.depth; + texture.active = true; + glGenTextures(1, &texture.tex_id); + GLES3::Utilities::get_singleton()->texture_allocated_data(texture.tex_id, texture.total_data_size, "Texture 3D"); + texture_owner.initialize_rid(p_texture, texture); + _texture_set_3d_data(p_texture, p_data, true); } // Called internally when texture_proxy_create(p_base) is called. @@ -872,6 +915,19 @@ void TextureStorage::texture_2d_update(RID p_texture, const Ref<Image> &p_image, #endif } +void TextureStorage::texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data) { + Texture *tex = texture_owner.get_or_null(p_texture); + ERR_FAIL_NULL(tex); + ERR_FAIL_COND(tex->type != Texture::TYPE_3D); + + Image::Image3DValidateError verr = Image::validate_3d_image(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps > 1, p_data); + ERR_FAIL_COND_MSG(verr != Image::VALIDATE_3D_OK, Image::get_3d_image_validation_error_text(verr)); + + _texture_set_3d_data(p_texture, p_data, false); + + GLES3::Utilities::get_singleton()->texture_resize_data(tex->tex_id, tex->total_data_size); +} + void TextureStorage::texture_proxy_update(RID p_texture, RID p_proxy_to) { Texture *tex = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL(tex); @@ -984,7 +1040,7 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { data.resize(data_size); - ERR_FAIL_COND_V(data.size() == 0, Ref<Image>()); + ERR_FAIL_COND_V(data.is_empty(), Ref<Image>()); image = Image::create_from_data(texture->width, texture->height, texture->mipmaps > 1, texture->real_format, data); ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); if (texture->format != texture->real_format) { @@ -1040,7 +1096,7 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { data.resize(data_size); - ERR_FAIL_COND_V(data.size() == 0, Ref<Image>()); + ERR_FAIL_COND_V(data.is_empty(), Ref<Image>()); image = Image::create_from_data(texture->width, texture->height, false, Image::FORMAT_RGBA8, data); ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); @@ -1063,6 +1119,165 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { return image; } +Ref<Image> TextureStorage::texture_2d_layer_get(RID p_texture, int p_layer) const { + Texture *texture = texture_owner.get_or_null(p_texture); + ERR_FAIL_NULL_V(texture, Ref<Image>()); + + Vector<uint8_t> data; + + int data_size = Image::get_image_data_size(texture->alloc_width, texture->alloc_height, Image::FORMAT_RGBA8, false); + + data.resize(data_size * 2); //add some memory at the end, just in case for buggy drivers + uint8_t *w = data.ptrw(); + + GLuint temp_framebuffer; + glGenFramebuffers(1, &temp_framebuffer); + + GLuint temp_color_texture; + glGenTextures(1, &temp_color_texture); + + glBindFramebuffer(GL_FRAMEBUFFER, temp_framebuffer); + + glBindTexture(GL_TEXTURE_2D, temp_color_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture->alloc_width, texture->alloc_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, temp_color_texture, 0); + + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_BLEND); + glDepthFunc(GL_LEQUAL); + glColorMask(1, 1, 1, 1); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D_ARRAY, texture->tex_id); + + glViewport(0, 0, texture->alloc_width, texture->alloc_height); + glClearColor(0.0, 0.0, 0.0, 0.0); + glClear(GL_COLOR_BUFFER_BIT); + + CopyEffects::get_singleton()->copy_to_rect_3d(Rect2i(0, 0, 1, 1), p_layer, Texture::TYPE_LAYERED); + + glReadPixels(0, 0, texture->alloc_width, texture->alloc_height, GL_RGBA, GL_UNSIGNED_BYTE, &w[0]); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteTextures(1, &temp_color_texture); + glDeleteFramebuffers(1, &temp_framebuffer); + + data.resize(data_size); + + ERR_FAIL_COND_V(data.is_empty(), Ref<Image>()); + Ref<Image> image = Image::create_from_data(texture->width, texture->height, false, Image::FORMAT_RGBA8, data); + ERR_FAIL_COND_V(image->is_empty(), Ref<Image>()); + + if (texture->format != Image::FORMAT_RGBA8) { + image->convert(texture->format); + } + + if (texture->mipmaps > 1) { + image->generate_mipmaps(); + } + + return image; +} + +Vector<Ref<Image>> TextureStorage::_texture_3d_read_framebuffer(Texture *p_texture) const { + ERR_FAIL_NULL_V(p_texture, Vector<Ref<Image>>()); + + Vector<Ref<Image>> ret; + Vector<uint8_t> data; + + int width = p_texture->width; + int height = p_texture->height; + int depth = p_texture->depth; + + for (int mipmap_level = 0; mipmap_level < p_texture->mipmaps; mipmap_level++) { + int data_size = Image::get_image_data_size(width, height, Image::FORMAT_RGBA8, false); + glViewport(0, 0, width, height); + glClearColor(0.0, 0.0, 0.0, 0.0); + glClear(GL_COLOR_BUFFER_BIT); + + for (int layer = 0; layer < depth; layer++) { + data.resize(data_size * 2); //add some memory at the end, just in case for buggy drivers + uint8_t *w = data.ptrw(); + + float layer_f = layer / float(depth); + CopyEffects::get_singleton()->copy_to_rect_3d(Rect2i(0, 0, 1, 1), layer_f, Texture::TYPE_3D, mipmap_level); + glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, &w[0]); + + data.resize(data_size); + ERR_FAIL_COND_V(data.is_empty(), Vector<Ref<Image>>()); + + Ref<Image> img = Image::create_from_data(width, height, false, Image::FORMAT_RGBA8, data); + ERR_FAIL_COND_V(img->is_empty(), Vector<Ref<Image>>()); + + if (p_texture->format != Image::FORMAT_RGBA8) { + img->convert(p_texture->format); + } + + ret.push_back(img); + } + + width = MAX(1, width >> 1); + height = MAX(1, height >> 1); + depth = MAX(1, depth >> 1); + } + + return ret; +} + +Vector<Ref<Image>> TextureStorage::texture_3d_get(RID p_texture) const { + Texture *texture = texture_owner.get_or_null(p_texture); + ERR_FAIL_NULL_V(texture, Vector<Ref<Image>>()); + ERR_FAIL_COND_V(texture->type != Texture::TYPE_3D, Vector<Ref<Image>>()); + +#ifdef TOOLS_ENABLED + if (!texture->image_cache_3d.is_empty() && !texture->is_render_target) { + return texture->image_cache_3d; + } +#endif + + GLuint temp_framebuffer; + glGenFramebuffers(1, &temp_framebuffer); + + GLuint temp_color_texture; + glGenTextures(1, &temp_color_texture); + + glBindFramebuffer(GL_FRAMEBUFFER, temp_framebuffer); + + glBindTexture(GL_TEXTURE_2D, temp_color_texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture->alloc_width, texture->alloc_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, temp_color_texture, 0); + + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_CULL_FACE); + glDisable(GL_BLEND); + glDepthFunc(GL_LEQUAL); + glColorMask(1, 1, 1, 1); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_3D, texture->tex_id); + + Vector<Ref<Image>> ret = _texture_3d_read_framebuffer(texture); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteTextures(1, &temp_color_texture); + glDeleteFramebuffers(1, &temp_framebuffer); + +#ifdef TOOLS_ENABLED + if (Engine::get_singleton()->is_editor_hint() && !texture->is_render_target) { + texture->image_cache_3d = ret; + } +#endif + + return ret; +} + void TextureStorage::texture_replace(RID p_texture, RID p_by_texture) { Texture *tex_to = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL(tex_to); @@ -1218,7 +1433,7 @@ void TextureStorage::texture_set_data(RID p_texture, const Ref<Image> &p_image, _texture_set_data(p_texture, p_image, p_layer, false); } -void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer, bool initialize) { +void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer, bool p_initialize) { Texture *texture = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL(texture); @@ -1259,56 +1474,7 @@ void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image, glActiveTexture(GL_TEXTURE0); glBindTexture(texture->target, texture->tex_id); - -#ifndef WEB_ENABLED - switch (texture->format) { - case Image::FORMAT_L8: { - if (RasterizerGLES3::is_gles_over_gl()) { - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_G, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_B, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_A, GL_ONE); - } else { - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_B, GL_BLUE); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_A, GL_ALPHA); - } - } break; - case Image::FORMAT_LA8: { - if (RasterizerGLES3::is_gles_over_gl()) { - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_G, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_B, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_A, GL_GREEN); - } else { - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_B, GL_BLUE); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_A, GL_ALPHA); - } - } break; - case Image::FORMAT_ETC2_RA_AS_RG: - case Image::FORMAT_DXT5_RA_AS_RG: { - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); - if (texture->format == real_format) { - // Swizzle RA from compressed texture into RG - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_G, GL_ALPHA); - } else { - // Converted textures are already in RG, leave as-is - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); - } - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_B, GL_ZERO); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_A, GL_ONE); - } break; - default: { - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_B, GL_BLUE); - glTexParameteri(texture->target, GL_TEXTURE_SWIZZLE_A, GL_ALPHA); - } break; - } -#endif // WEB_ENABLED + _texture_set_swizzle(texture, real_format); int mipmaps = img->has_mipmaps() ? img->get_mipmap_count() + 1 : 1; @@ -1340,7 +1506,7 @@ void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image, } else { glPixelStorei(GL_UNPACK_ALIGNMENT, 1); if (texture->target == GL_TEXTURE_2D_ARRAY) { - if (initialize) { + if (p_initialize) { glTexImage3D(GL_TEXTURE_2D_ARRAY, i, internal_format, w, h, texture->layers, 0, format, type, nullptr); } glTexSubImage3D(GL_TEXTURE_2D_ARRAY, i, 0, 0, p_layer, w, h, 1, format, type, &read[ofs]); @@ -1362,6 +1528,140 @@ void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image, texture->mipmaps = mipmaps; } +void TextureStorage::_texture_set_3d_data(RID p_texture, const Vector<Ref<Image>> &p_data, bool p_initialize) { + Texture *texture = texture_owner.get_or_null(p_texture); + + ERR_FAIL_NULL(texture); + ERR_FAIL_COND(!texture->active); + ERR_FAIL_COND(texture->is_render_target); + ERR_FAIL_COND(texture->target != GL_TEXTURE_3D); + ERR_FAIL_COND(p_data.is_empty()); + + GLenum type; + GLenum format; + GLenum internal_format; + bool compressed = false; + + Image::Format real_format; + Ref<Image> img = _get_gl_image_and_format(p_data[0], p_data[0]->get_format(), real_format, format, internal_format, type, compressed, texture->resize_to_po2); + ERR_FAIL_COND(img.is_null()); + + ERR_FAIL_COND_MSG(compressed, "Compressed 3D textures are not supported in the GL Compatibility backend."); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(texture->target, texture->tex_id); + _texture_set_swizzle(texture, texture->real_format); + + // Set filtering and repeat state to default. + if (texture->mipmaps > 1) { + texture->gl_set_filter(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST_WITH_MIPMAPS); + } else { + texture->gl_set_filter(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST); + } + + texture->gl_set_repeat(RS::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED); + + Vector<Ref<Image>> images; + images.resize(p_data.size()); + for (int i = 0; i < p_data.size(); i++) { + Ref<Image> image = p_data[i]; + if (image->get_format() != texture->format) { + image = image->duplicate(); + image->convert(texture->format); + } + images.write[i] = image; + } + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + int all_data_size = 0; + int mipmap_level = 0; + int layer = 0; + int depth = texture->depth; + Size2i prev_size(images[0]->get_width(), images[0]->get_height()); + for (int i = 0; i < images.size(); i++) { + Ref<Image> image = images[i]; + Size2i img_size(image->get_width(), image->get_height()); + + if (img_size != prev_size) { + mipmap_level++; + depth = MAX(1, depth >> 1); + layer = 0; + } + prev_size = img_size; + all_data_size += image->get_data().size(); + + if (layer == 0 && p_initialize) { + glTexImage3D(GL_TEXTURE_3D, mipmap_level, internal_format, img_size.width, img_size.height, depth, 0, format, type, nullptr); + } + + glTexSubImage3D(GL_TEXTURE_3D, mipmap_level, 0, 0, layer, img_size.width, img_size.height, 1, format, type, image->get_data().ptr()); + + layer++; + } + + texture->total_data_size = all_data_size; + texture->mipmaps = mipmap_level + 1; + +#ifdef TOOLS_ENABLED + if (Engine::get_singleton()->is_editor_hint() && !texture->is_render_target) { + texture->image_cache_3d = images; + } +#endif +} + +void TextureStorage::_texture_set_swizzle(Texture *p_texture, Image::Format p_real_format) { +#ifndef WEB_ENABLED + switch (p_texture->format) { + case Image::FORMAT_L8: { + if (RasterizerGLES3::is_gles_over_gl()) { + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_G, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_B, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_A, GL_ONE); + } else { + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_B, GL_BLUE); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_A, GL_ALPHA); + } + } break; + case Image::FORMAT_LA8: { + if (RasterizerGLES3::is_gles_over_gl()) { + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_G, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_B, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_A, GL_GREEN); + } else { + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_B, GL_BLUE); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_A, GL_ALPHA); + } + } break; + case Image::FORMAT_ETC2_RA_AS_RG: + case Image::FORMAT_DXT5_RA_AS_RG: { + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); + if (p_texture->format == p_real_format) { + // Swizzle RA from compressed texture into RG. + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_G, GL_ALPHA); + } else { + // Converted textures are already in RG, leave as-is. + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); + } + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_B, GL_ZERO); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_A, GL_ONE); + } break; + default: { + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_R, GL_RED); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_G, GL_GREEN); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_B, GL_BLUE); + glTexParameteri(p_texture->target, GL_TEXTURE_SWIZZLE_A, GL_ALPHA); + } break; + } +#endif // WEB_ENABLED +} + Image::Format TextureStorage::texture_get_format(RID p_texture) const { Texture *texture = texture_owner.get_or_null(p_texture); @@ -1475,7 +1775,7 @@ void TextureStorage::update_texture_atlas() { //generate atlas Vector<TextureAtlas::SortItem> itemsv; itemsv.resize(texture_atlas.textures.size()); - int base_size = 8; + uint32_t base_size = 8; int idx = 0; @@ -1488,7 +1788,7 @@ void TextureStorage::update_texture_atlas() { si.size.height = (src_tex->height / border) + 1; si.pixel_size = Size2i(src_tex->width, src_tex->height); - if (base_size < si.size.width) { + if (base_size < (uint32_t)si.size.width) { base_size = nearest_power_of_2_templated(si.size.width); } @@ -1519,7 +1819,7 @@ void TextureStorage::update_texture_atlas() { TextureAtlas::SortItem &si = items[i]; int best_idx = -1; int best_height = 0x7FFFFFFF; - for (int j = 0; j <= base_size - si.size.width; j++) { + for (uint32_t j = 0; j <= base_size - si.size.width; j++) { int height = 0; for (int k = 0; k < si.size.width; k++) { int h = v_offsets[k + j]; @@ -1550,7 +1850,7 @@ void TextureStorage::update_texture_atlas() { } } - if (max_height <= base_size * 2) { + if ((uint32_t)max_height <= base_size * 2) { atlas_height = max_height; break; //good ratio, break; } @@ -2647,7 +2947,10 @@ void TextureStorage::render_target_copy_to_back_buffer(RID p_render_target, cons glBindFramebuffer(GL_FRAMEBUFFER, rt->backbuffer_fbo); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, rt->color); - GLES3::CopyEffects::get_singleton()->copy_screen(); + Rect2 normalized_region = region; + normalized_region.position = normalized_region.position / Size2(rt->size); + normalized_region.size = normalized_region.size / Size2(rt->size); + GLES3::CopyEffects::get_singleton()->copy_to_and_from_rect(normalized_region); if (p_gen_mipmaps) { GLES3::CopyEffects::get_singleton()->gaussian_blur(rt->backbuffer, rt->mipmap_count, region, rt->size); diff --git a/drivers/gles3/storage/texture_storage.h b/drivers/gles3/storage/texture_storage.h index a4e5eb260e..91bb676711 100644 --- a/drivers/gles3/storage/texture_storage.h +++ b/drivers/gles3/storage/texture_storage.h @@ -191,6 +191,7 @@ struct Texture { RenderTarget *render_target = nullptr; Ref<Image> image_cache_2d; + Vector<Ref<Image>> image_cache_3d; bool redraw_if_visible = false; @@ -403,8 +404,8 @@ private: RID_Owner<CanvasTexture, true> canvas_texture_owner; /* Texture API */ - - mutable RID_Owner<Texture> texture_owner; + // Textures can be created from threads, so this RID_Owner is thread safe. + mutable RID_Owner<Texture, true> texture_owner; Ref<Image> _get_gl_image_and_format(const Ref<Image> &p_image, Image::Format p_format, Image::Format &r_real_format, GLenum &r_gl_format, GLenum &r_gl_internal_format, GLenum &r_gl_type, bool &r_compressed, bool p_force_decompress) const; @@ -451,7 +452,10 @@ private: void _render_target_clear_sdf(RenderTarget *rt); Rect2i _render_target_get_sdf_rect(const RenderTarget *rt) const; - void _texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer, bool initialize); + void _texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer, bool p_initialize); + void _texture_set_3d_data(RID p_texture, const Vector<Ref<Image>> &p_data, bool p_initialize); + void _texture_set_swizzle(Texture *p_texture, Image::Format p_real_format); + Vector<Ref<Image>> _texture_3d_read_framebuffer(Texture *p_texture) const; struct RenderTargetSDF { CanvasSdfShaderGLES3 shader; @@ -494,6 +498,10 @@ public: }; bool owns_texture(RID p_rid) { return texture_owner.owns(p_rid); }; + void texture_2d_initialize_from_texture(RID p_texture, Texture &p_tex) { + texture_owner.initialize_rid(p_texture, p_tex); + } + virtual bool can_create_resources_async() const override; virtual RID texture_allocate() override; @@ -507,7 +515,7 @@ public: RID texture_create_external(Texture::Type p_type, Image::Format p_format, unsigned int p_image, int p_width, int p_height, int p_depth, int p_layers, RS::TextureLayeredType p_layered_type = RS::TEXTURE_LAYERED_2D_ARRAY); virtual void texture_2d_update(RID p_texture, const Ref<Image> &p_image, int p_layer = 0) override; - virtual void texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data) override{}; + virtual void texture_3d_update(RID p_texture, const Vector<Ref<Image>> &p_data) override; virtual void texture_proxy_update(RID p_proxy, RID p_base) override; //these two APIs can be used together or in combination with the others. @@ -516,8 +524,8 @@ public: virtual void texture_3d_placeholder_initialize(RID p_texture) override; virtual Ref<Image> texture_2d_get(RID p_texture) const override; - virtual Ref<Image> texture_2d_layer_get(RID p_texture, int p_layer) const override { return Ref<Image>(); }; - virtual Vector<Ref<Image>> texture_3d_get(RID p_texture) const override { return Vector<Ref<Image>>(); }; + virtual Ref<Image> texture_2d_layer_get(RID p_texture, int p_layer) const override; + virtual Vector<Ref<Image>> texture_3d_get(RID p_texture) const override; virtual void texture_replace(RID p_texture, RID p_by_texture) override; virtual void texture_set_size_override(RID p_texture, int p_width, int p_height) override; diff --git a/drivers/gles3/storage/utilities.cpp b/drivers/gles3/storage/utilities.cpp index 8a9e61c725..793b3f64f0 100644 --- a/drivers/gles3/storage/utilities.cpp +++ b/drivers/gles3/storage/utilities.cpp @@ -355,19 +355,16 @@ bool Utilities::has_os_feature(const String &p_feature) const { if (p_feature == "rgtc") { return config->rgtc_supported; } - if (p_feature == "s3tc") { return config->s3tc_supported; } - if (p_feature == "bptc") { return config->bptc_supported; } if (p_feature == "astc") { return config->astc_supported; } - - if (p_feature == "etc" || p_feature == "etc2") { + if (p_feature == "etc2") { return config->etc2_supported; } diff --git a/drivers/gles3/storage/utilities.h b/drivers/gles3/storage/utilities.h index ea7bf4a4c2..b9603b972e 100644 --- a/drivers/gles3/storage/utilities.h +++ b/drivers/gles3/storage/utilities.h @@ -111,6 +111,7 @@ public: } // Records that data was allocated for state tracking purposes. + // Size is measured in bytes. _FORCE_INLINE_ void texture_allocated_data(GLuint p_id, uint32_t p_size, String p_name = "") { texture_mem_cache += p_size; #ifdef DEV_ENABLED diff --git a/drivers/png/image_loader_png.cpp b/drivers/png/image_loader_png.cpp index cbcb54bc11..6f98f072dd 100644 --- a/drivers/png/image_loader_png.cpp +++ b/drivers/png/image_loader_png.cpp @@ -66,12 +66,14 @@ Ref<Image> ImageLoaderPNG::load_mem_png(const uint8_t *p_png, int p_size) { return img; } +Ref<Image> ImageLoaderPNG::unpack_mem_png(const uint8_t *p_png, int p_size) { + ERR_FAIL_COND_V(p_size < 4, Ref<Image>()); + ERR_FAIL_COND_V(p_png[0] != 'P' || p_png[1] != 'N' || p_png[2] != 'G' || p_png[3] != ' ', Ref<Image>()); + return load_mem_png(&p_png[4], p_size - 4); +} + Ref<Image> ImageLoaderPNG::lossless_unpack_png(const Vector<uint8_t> &p_data) { - const int len = p_data.size(); - ERR_FAIL_COND_V(len < 4, Ref<Image>()); - const uint8_t *r = p_data.ptr(); - ERR_FAIL_COND_V(r[0] != 'P' || r[1] != 'N' || r[2] != 'G' || r[3] != ' ', Ref<Image>()); - return load_mem_png(&r[4], len - 4); + return unpack_mem_png(p_data.ptr(), p_data.size()); } Vector<uint8_t> ImageLoaderPNG::lossless_pack_png(const Ref<Image> &p_image) { @@ -99,6 +101,7 @@ Vector<uint8_t> ImageLoaderPNG::lossless_pack_png(const Ref<Image> &p_image) { ImageLoaderPNG::ImageLoaderPNG() { Image::_png_mem_loader_func = load_mem_png; + Image::_png_mem_unpacker_func = unpack_mem_png; Image::png_unpacker = lossless_unpack_png; Image::png_packer = lossless_pack_png; } diff --git a/drivers/png/image_loader_png.h b/drivers/png/image_loader_png.h index d587672dd1..ecce9a405b 100644 --- a/drivers/png/image_loader_png.h +++ b/drivers/png/image_loader_png.h @@ -37,6 +37,7 @@ class ImageLoaderPNG : public ImageFormatLoader { private: static Vector<uint8_t> lossless_pack_png(const Ref<Image> &p_image); static Ref<Image> lossless_unpack_png(const Vector<uint8_t> &p_data); + static Ref<Image> unpack_mem_png(const uint8_t *p_png, int p_size); static Ref<Image> load_mem_png(const uint8_t *p_png, int p_size); public: diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp index ca5a13799e..d1e4d207e7 100644 --- a/drivers/unix/file_access_unix.cpp +++ b/drivers/unix/file_access_unix.cpp @@ -228,6 +228,51 @@ uint8_t FileAccessUnix::get_8() const { return b; } +uint16_t FileAccessUnix::get_16() const { + ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use."); + + uint16_t b = 0; + if (fread(&b, 1, 2, f) != 2) { + check_errors(); + } + + if (big_endian) { + b = BSWAP16(b); + } + + return b; +} + +uint32_t FileAccessUnix::get_32() const { + ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use."); + + uint32_t b = 0; + if (fread(&b, 1, 4, f) != 4) { + check_errors(); + } + + if (big_endian) { + b = BSWAP32(b); + } + + return b; +} + +uint64_t FileAccessUnix::get_64() const { + ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use."); + + uint64_t b = 0; + if (fread(&b, 1, 8, f) != 8) { + check_errors(); + } + + if (big_endian) { + b = BSWAP64(b); + } + + return b; +} + uint64_t FileAccessUnix::get_buffer(uint8_t *p_dst, uint64_t p_length) const { ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); ERR_FAIL_NULL_V_MSG(f, -1, "File must be opened before use."); @@ -251,6 +296,36 @@ void FileAccessUnix::store_8(uint8_t p_dest) { ERR_FAIL_COND(fwrite(&p_dest, 1, 1, f) != 1); } +void FileAccessUnix::store_16(uint16_t p_dest) { + ERR_FAIL_NULL_MSG(f, "File must be opened before use."); + + if (big_endian) { + p_dest = BSWAP16(p_dest); + } + + ERR_FAIL_COND(fwrite(&p_dest, 1, 2, f) != 2); +} + +void FileAccessUnix::store_32(uint32_t p_dest) { + ERR_FAIL_NULL_MSG(f, "File must be opened before use."); + + if (big_endian) { + p_dest = BSWAP32(p_dest); + } + + ERR_FAIL_COND(fwrite(&p_dest, 1, 4, f) != 4); +} + +void FileAccessUnix::store_64(uint64_t p_dest) { + ERR_FAIL_NULL_MSG(f, "File must be opened before use."); + + if (big_endian) { + p_dest = BSWAP64(p_dest); + } + + ERR_FAIL_COND(fwrite(&p_dest, 1, 8, f) != 8); +} + void FileAccessUnix::store_buffer(const uint8_t *p_src, uint64_t p_length) { ERR_FAIL_NULL_MSG(f, "File must be opened before use."); ERR_FAIL_COND(!p_src && p_length > 0); diff --git a/drivers/unix/file_access_unix.h b/drivers/unix/file_access_unix.h index 2bfac27c4f..553fbcf355 100644 --- a/drivers/unix/file_access_unix.h +++ b/drivers/unix/file_access_unix.h @@ -68,12 +68,18 @@ public: virtual bool eof_reached() const override; ///< reading passed EOF virtual uint8_t get_8() const override; ///< get a byte + virtual uint16_t get_16() const override; + virtual uint32_t get_32() const override; + virtual uint64_t get_64() const override; virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override; virtual Error get_error() const override; ///< get last error virtual void flush() override; virtual void store_8(uint8_t p_dest) override; ///< store a byte + virtual void store_16(uint16_t p_dest) override; + virtual void store_32(uint32_t p_dest) override; + virtual void store_64(uint64_t p_dest) override; virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes virtual bool file_exists(const String &p_path) override; ///< return true if a file exists diff --git a/drivers/unix/net_socket_posix.cpp b/drivers/unix/net_socket_posix.cpp index a8074aa3f6..1e52b39be1 100644 --- a/drivers/unix/net_socket_posix.cpp +++ b/drivers/unix/net_socket_posix.cpp @@ -787,11 +787,11 @@ Ref<NetSocket> NetSocketPosix::accept(IPAddress &r_ip, uint16_t &r_port) { return Ref<NetSocket>(ns); } -Error NetSocketPosix::join_multicast_group(const IPAddress &p_multi_address, String p_if_name) { +Error NetSocketPosix::join_multicast_group(const IPAddress &p_multi_address, const String &p_if_name) { return _change_multicast_group(p_multi_address, p_if_name, true); } -Error NetSocketPosix::leave_multicast_group(const IPAddress &p_multi_address, String p_if_name) { +Error NetSocketPosix::leave_multicast_group(const IPAddress &p_multi_address, const String &p_if_name) { return _change_multicast_group(p_multi_address, p_if_name, false); } diff --git a/drivers/unix/net_socket_posix.h b/drivers/unix/net_socket_posix.h index 2682530e15..aa59ff36ee 100644 --- a/drivers/unix/net_socket_posix.h +++ b/drivers/unix/net_socket_posix.h @@ -98,8 +98,8 @@ public: virtual void set_tcp_no_delay_enabled(bool p_enabled); virtual void set_reuse_address_enabled(bool p_enabled); virtual void set_reuse_port_enabled(bool p_enabled); - virtual Error join_multicast_group(const IPAddress &p_multi_address, String p_if_name); - virtual Error leave_multicast_group(const IPAddress &p_multi_address, String p_if_name); + virtual Error join_multicast_group(const IPAddress &p_multi_address, const String &p_if_name); + virtual Error leave_multicast_group(const IPAddress &p_multi_address, const String &p_if_name); NetSocketPosix(); ~NetSocketPosix(); diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index 9a77930d75..83a332dea9 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -153,7 +153,9 @@ int OS_Unix::unix_initialize_audio(int p_audio_driver) { } void OS_Unix::initialize_core() { +#ifdef THREADS_ENABLED init_thread_posix(); +#endif FileAccess::make_default<FileAccessUnix>(FileAccess::ACCESS_RESOURCES); FileAccess::make_default<FileAccessUnix>(FileAccess::ACCESS_USERDATA); @@ -637,7 +639,7 @@ String OS_Unix::get_locale() const { return locale; } -Error OS_Unix::open_dynamic_library(const String p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) { +Error OS_Unix::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) { String path = p_path; if (FileAccess::exists(path) && path.is_relative_path()) { @@ -656,6 +658,8 @@ Error OS_Unix::open_dynamic_library(const String p_path, void *&p_library_handle path = get_executable_path().get_base_dir().path_join("../lib").path_join(p_path.get_file()); } + ERR_FAIL_COND_V(!FileAccess::exists(path), ERR_FILE_NOT_FOUND); + p_library_handle = dlopen(path.utf8().get_data(), GODOT_DLOPEN_MODE); ERR_FAIL_NULL_V_MSG(p_library_handle, ERR_CANT_OPEN, vformat("Can't open dynamic library: %s. Error: %s.", p_path, dlerror())); @@ -673,7 +677,7 @@ Error OS_Unix::close_dynamic_library(void *p_library_handle) { return OK; } -Error OS_Unix::get_dynamic_library_symbol_handle(void *p_library_handle, const String p_name, void *&p_symbol_handle, bool p_optional) { +Error OS_Unix::get_dynamic_library_symbol_handle(void *p_library_handle, const String &p_name, void *&p_symbol_handle, bool p_optional) { const char *error; dlerror(); // Clear existing errors @@ -751,12 +755,27 @@ String OS_Unix::get_executable_path() const { return OS::get_executable_path(); } return b; -#elif defined(__OpenBSD__) || defined(__NetBSD__) +#elif defined(__OpenBSD__) char resolved_path[MAXPATHLEN]; realpath(OS::get_executable_path().utf8().get_data(), resolved_path); return String(resolved_path); +#elif defined(__NetBSD__) + int mib[4] = { CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME }; + char buf[MAXPATHLEN]; + size_t len = sizeof(buf); + if (sysctl(mib, 4, buf, &len, nullptr, 0) != 0) { + WARN_PRINT("Couldn't get executable path from sysctl"); + return OS::get_executable_path(); + } + + // NetBSD does not always return a normalized path. For example if argv[0] is "./a.out" then executable path is "/home/netbsd/./a.out". Normalize with realpath: + char resolved_path[MAXPATHLEN]; + + realpath(buf, resolved_path); + + return String(resolved_path); #elif defined(__FreeBSD__) int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; char buf[MAXPATHLEN]; diff --git a/drivers/unix/os_unix.h b/drivers/unix/os_unix.h index 86b0e38e92..d3393c98ec 100644 --- a/drivers/unix/os_unix.h +++ b/drivers/unix/os_unix.h @@ -55,9 +55,9 @@ public: virtual Error get_entropy(uint8_t *r_buffer, int p_bytes) override; - virtual Error open_dynamic_library(const String p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override; + virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override; virtual Error close_dynamic_library(void *p_library_handle) override; - virtual Error get_dynamic_library_symbol_handle(void *p_library_handle, const String p_name, void *&p_symbol_handle, bool p_optional = false) override; + virtual Error get_dynamic_library_symbol_handle(void *p_library_handle, const String &p_name, void *&p_symbol_handle, bool p_optional = false) override; virtual Error set_cwd(const String &p_cwd) override; diff --git a/drivers/vulkan/SCsub b/drivers/vulkan/SCsub index 5e4bc986b8..80d5f35305 100644 --- a/drivers/vulkan/SCsub +++ b/drivers/vulkan/SCsub @@ -2,8 +2,6 @@ Import("env") -env.Append(CPPDEFINES=["RD_ENABLED"]) - thirdparty_obj = [] thirdparty_dir = "#thirdparty/vulkan" thirdparty_volk_dir = "#thirdparty/volk" @@ -19,8 +17,11 @@ if env["platform"] == "android": env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_ANDROID_KHR"]) elif env["platform"] == "ios": env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_IOS_MVK"]) -elif env["platform"] == "linuxbsd" and env["x11"]: - env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_XLIB_KHR"]) +elif env["platform"] == "linuxbsd": + if env["x11"]: + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_XLIB_KHR"]) + if env["wayland"]: + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_WAYLAND_KHR"]) elif env["platform"] == "macos": env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_MACOS_MVK"]) elif env["platform"] == "windows": diff --git a/drivers/vulkan/rendering_context_driver_vulkan.cpp b/drivers/vulkan/rendering_context_driver_vulkan.cpp new file mode 100644 index 0000000000..6eb25743f9 --- /dev/null +++ b/drivers/vulkan/rendering_context_driver_vulkan.cpp @@ -0,0 +1,686 @@ +/**************************************************************************/ +/* rendering_context_driver_vulkan.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifdef VULKAN_ENABLED + +#include "rendering_context_driver_vulkan.h" + +#include "vk_enum_string_helper.h" + +#include "core/config/project_settings.h" +#include "core/version.h" + +#include "rendering_device_driver_vulkan.h" +#include "vulkan_hooks.h" + +RenderingContextDriverVulkan::RenderingContextDriverVulkan() { + // Empty constructor. +} + +RenderingContextDriverVulkan::~RenderingContextDriverVulkan() { + if (debug_messenger != VK_NULL_HANDLE && functions.DestroyDebugUtilsMessengerEXT != nullptr) { + functions.DestroyDebugUtilsMessengerEXT(instance, debug_messenger, nullptr); + } + + if (debug_report != VK_NULL_HANDLE && functions.DestroyDebugReportCallbackEXT != nullptr) { + functions.DestroyDebugReportCallbackEXT(instance, debug_report, nullptr); + } + + if (instance != VK_NULL_HANDLE) { + vkDestroyInstance(instance, nullptr); + } +} + +Error RenderingContextDriverVulkan::_initialize_vulkan_version() { + // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkApplicationInfo.html#_description + // For Vulkan 1.0 vkEnumerateInstanceVersion is not available, including not in the loader we compile against on Android. + typedef VkResult(VKAPI_PTR * _vkEnumerateInstanceVersion)(uint32_t *); + _vkEnumerateInstanceVersion func = (_vkEnumerateInstanceVersion)vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceVersion"); + if (func != nullptr) { + uint32_t api_version; + VkResult res = func(&api_version); + if (res == VK_SUCCESS) { + instance_api_version = api_version; + } else { + // According to the documentation this shouldn't fail with anything except a memory allocation error + // in which case we're in deep trouble anyway. + ERR_FAIL_V(ERR_CANT_CREATE); + } + } else { + print_line("vkEnumerateInstanceVersion not available, assuming Vulkan 1.0."); + instance_api_version = VK_API_VERSION_1_0; + } + + return OK; +} + +void RenderingContextDriverVulkan::_register_requested_instance_extension(const CharString &p_extension_name, bool p_required) { + ERR_FAIL_COND(requested_instance_extensions.has(p_extension_name)); + requested_instance_extensions[p_extension_name] = p_required; +} + +Error RenderingContextDriverVulkan::_initialize_instance_extensions() { + enabled_instance_extension_names.clear(); + + // The surface extension and the platform-specific surface extension are core requirements. + _register_requested_instance_extension(VK_KHR_SURFACE_EXTENSION_NAME, true); + if (_get_platform_surface_extension()) { + _register_requested_instance_extension(_get_platform_surface_extension(), true); + } + + if (_use_validation_layers()) { + _register_requested_instance_extension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, false); + } + + // This extension allows us to use the properties2 features to query additional device capabilities. + _register_requested_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false); + + // Only enable debug utils in verbose mode or DEV_ENABLED. + // End users would get spammed with messages of varying verbosity due to the + // mess that thirdparty layers/extensions and drivers seem to leave in their + // wake, making the Windows registry a bottomless pit of broken layer JSON. +#ifdef DEV_ENABLED + bool want_debug_utils = true; +#else + bool want_debug_utils = OS::get_singleton()->is_stdout_verbose(); +#endif + if (want_debug_utils) { + _register_requested_instance_extension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false); + } + + // Load instance extensions that are available. + uint32_t instance_extension_count = 0; + VkResult err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_INCOMPLETE, ERR_CANT_CREATE); + ERR_FAIL_COND_V_MSG(instance_extension_count == 0, ERR_CANT_CREATE, "No instance extensions were found."); + + TightLocalVector<VkExtensionProperties> instance_extensions; + instance_extensions.resize(instance_extension_count); + err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, instance_extensions.ptr()); + if (err != VK_SUCCESS && err != VK_INCOMPLETE) { + ERR_FAIL_V(ERR_CANT_CREATE); + } + +#ifdef DEV_ENABLED + for (uint32_t i = 0; i < instance_extension_count; i++) { + print_verbose(String("VULKAN: Found instance extension ") + String::utf8(instance_extensions[i].extensionName) + String(".")); + } +#endif + + // Enable all extensions that are supported and requested. + for (uint32_t i = 0; i < instance_extension_count; i++) { + CharString extension_name(instance_extensions[i].extensionName); + if (requested_instance_extensions.has(extension_name)) { + enabled_instance_extension_names.insert(extension_name); + } + } + + // Now check our requested extensions. + for (KeyValue<CharString, bool> &requested_extension : requested_instance_extensions) { + if (!enabled_instance_extension_names.has(requested_extension.key)) { + if (requested_extension.value) { + ERR_FAIL_V_MSG(ERR_BUG, String("Required extension ") + String::utf8(requested_extension.key) + String(" not found.")); + } else { + print_verbose(String("Optional extension ") + String::utf8(requested_extension.key) + String(" not found.")); + } + } + } + + return OK; +} + +Error RenderingContextDriverVulkan::_find_validation_layers(TightLocalVector<const char *> &r_layer_names) const { + r_layer_names.clear(); + + uint32_t instance_layer_count = 0; + VkResult err = vkEnumerateInstanceLayerProperties(&instance_layer_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + if (instance_layer_count > 0) { + TightLocalVector<VkLayerProperties> layer_properties; + layer_properties.resize(instance_layer_count); + err = vkEnumerateInstanceLayerProperties(&instance_layer_count, layer_properties.ptr()); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + // Preferred set of validation layers. + const std::initializer_list<const char *> preferred = { "VK_LAYER_KHRONOS_validation" }; + + // Alternative (deprecated, removed in SDK 1.1.126.0) set of validation layers. + const std::initializer_list<const char *> lunarg = { "VK_LAYER_LUNARG_standard_validation" }; + + // Alternative (deprecated, removed in SDK 1.1.121.1) set of validation layers. + const std::initializer_list<const char *> google = { "VK_LAYER_GOOGLE_threading", "VK_LAYER_LUNARG_parameter_validation", "VK_LAYER_LUNARG_object_tracker", "VK_LAYER_LUNARG_core_validation", "VK_LAYER_GOOGLE_unique_objects" }; + + // Verify all the layers of the list are present. + for (const std::initializer_list<const char *> &list : { preferred, lunarg, google }) { + bool layers_found = false; + for (const char *layer_name : list) { + layers_found = false; + + for (const VkLayerProperties &properties : layer_properties) { + if (!strcmp(properties.layerName, layer_name)) { + layers_found = true; + break; + } + } + + if (!layers_found) { + break; + } + } + + if (layers_found) { + r_layer_names.reserve(list.size()); + for (const char *layer_name : list) { + r_layer_names.push_back(layer_name); + } + + break; + } + } + } + + return OK; +} + +VKAPI_ATTR VkBool32 VKAPI_CALL RenderingContextDriverVulkan::_debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data) { + // This error needs to be ignored because the AMD allocator will mix up memory types on IGP processors. + if (strstr(p_callback_data->pMessage, "Mapping an image with layout") != nullptr && strstr(p_callback_data->pMessage, "can result in undefined behavior if this memory is used by the device") != nullptr) { + return VK_FALSE; + } + // This needs to be ignored because Validator is wrong here. + if (strstr(p_callback_data->pMessage, "Invalid SPIR-V binary version 1.3") != nullptr) { + return VK_FALSE; + } + // This needs to be ignored because Validator is wrong here. + if (strstr(p_callback_data->pMessage, "Shader requires flag") != nullptr) { + return VK_FALSE; + } + + // This needs to be ignored because Validator is wrong here. + if (strstr(p_callback_data->pMessage, "SPIR-V module not valid: Pointer operand") != nullptr && strstr(p_callback_data->pMessage, "must be a memory object") != nullptr) { + return VK_FALSE; + } + + if (p_callback_data->pMessageIdName && strstr(p_callback_data->pMessageIdName, "UNASSIGNED-CoreValidation-DrawState-ClearCmdBeforeDraw") != nullptr) { + return VK_FALSE; + } + + String type_string; + switch (p_message_type) { + case (VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT): + type_string = "GENERAL"; + break; + case (VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT): + type_string = "VALIDATION"; + break; + case (VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT): + type_string = "PERFORMANCE"; + break; + case (VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT): + type_string = "VALIDATION|PERFORMANCE"; + break; + } + + String objects_string; + if (p_callback_data->objectCount > 0) { + objects_string = "\n\tObjects - " + String::num_int64(p_callback_data->objectCount); + for (uint32_t object = 0; object < p_callback_data->objectCount; ++object) { + objects_string += + "\n\t\tObject[" + String::num_int64(object) + "]" + + " - " + string_VkObjectType(p_callback_data->pObjects[object].objectType) + + ", Handle " + String::num_int64(p_callback_data->pObjects[object].objectHandle); + + if (p_callback_data->pObjects[object].pObjectName != nullptr && strlen(p_callback_data->pObjects[object].pObjectName) > 0) { + objects_string += ", Name \"" + String(p_callback_data->pObjects[object].pObjectName) + "\""; + } + } + } + + String labels_string; + if (p_callback_data->cmdBufLabelCount > 0) { + labels_string = "\n\tCommand Buffer Labels - " + String::num_int64(p_callback_data->cmdBufLabelCount); + for (uint32_t cmd_buf_label = 0; cmd_buf_label < p_callback_data->cmdBufLabelCount; ++cmd_buf_label) { + labels_string += + "\n\t\tLabel[" + String::num_int64(cmd_buf_label) + "]" + + " - " + p_callback_data->pCmdBufLabels[cmd_buf_label].pLabelName + + "{ "; + + for (int color_idx = 0; color_idx < 4; ++color_idx) { + labels_string += String::num(p_callback_data->pCmdBufLabels[cmd_buf_label].color[color_idx]); + if (color_idx < 3) { + labels_string += ", "; + } + } + + labels_string += " }"; + } + } + + String error_message(type_string + + " - Message Id Number: " + String::num_int64(p_callback_data->messageIdNumber) + + " | Message Id Name: " + p_callback_data->pMessageIdName + + "\n\t" + p_callback_data->pMessage + + objects_string + labels_string); + + // Convert VK severity to our own log macros. + switch (p_message_severity) { + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: + print_verbose(error_message); + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: + print_line(error_message); + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: + WARN_PRINT(error_message); + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: + ERR_PRINT(error_message); + CRASH_COND_MSG(Engine::get_singleton()->is_abort_on_gpu_errors_enabled(), "Crashing, because abort on GPU errors is enabled."); + break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_FLAG_BITS_MAX_ENUM_EXT: + break; // Shouldn't happen, only handling to make compilers happy. + } + + return VK_FALSE; +} + +VKAPI_ATTR VkBool32 VKAPI_CALL RenderingContextDriverVulkan::_debug_report_callback(VkDebugReportFlagsEXT p_flags, VkDebugReportObjectTypeEXT p_object_type, uint64_t p_object, size_t p_location, int32_t p_message_code, const char *p_layer_prefix, const char *p_message, void *p_user_data) { + String debug_message = String("Vulkan Debug Report: object - ") + String::num_int64(p_object) + "\n" + p_message; + + switch (p_flags) { + case VK_DEBUG_REPORT_DEBUG_BIT_EXT: + case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: + print_line(debug_message); + break; + case VK_DEBUG_REPORT_WARNING_BIT_EXT: + case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: + WARN_PRINT(debug_message); + break; + case VK_DEBUG_REPORT_ERROR_BIT_EXT: + ERR_PRINT(debug_message); + break; + } + + return VK_FALSE; +} + +Error RenderingContextDriverVulkan::_initialize_instance() { + Error err; + TightLocalVector<const char *> enabled_extension_names; + enabled_extension_names.reserve(enabled_instance_extension_names.size()); + for (const CharString &extension_name : enabled_instance_extension_names) { + enabled_extension_names.push_back(extension_name.ptr()); + } + + // We'll set application version to the Vulkan version we're developing against, even if our instance is based on an older Vulkan + // version, devices can still support newer versions of Vulkan. The exception is when we're on Vulkan 1.0, we should not set this + // to anything but 1.0. Note that this value is only used by validation layers to warn us about version issues. + uint32_t application_api_version = instance_api_version == VK_API_VERSION_1_0 ? VK_API_VERSION_1_0 : VK_API_VERSION_1_2; + + CharString cs = GLOBAL_GET("application/config/name").operator String().utf8(); + VkApplicationInfo app_info = {}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pApplicationName = cs.get_data(); + app_info.pEngineName = VERSION_NAME; + app_info.engineVersion = VK_MAKE_VERSION(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH); + app_info.apiVersion = application_api_version; + + TightLocalVector<const char *> enabled_layer_names; + if (_use_validation_layers()) { + err = _find_validation_layers(enabled_layer_names); + ERR_FAIL_COND_V(err != OK, err); + } + + VkInstanceCreateInfo instance_info = {}; + instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_info.pApplicationInfo = &app_info; + instance_info.enabledExtensionCount = enabled_extension_names.size(); + instance_info.ppEnabledExtensionNames = enabled_extension_names.ptr(); + instance_info.enabledLayerCount = enabled_layer_names.size(); + instance_info.ppEnabledLayerNames = enabled_layer_names.ptr(); + + // This is info for a temp callback to use during CreateInstance. After the instance is created, we use the instance-based function to register the final callback. + VkDebugUtilsMessengerCreateInfoEXT debug_messenger_create_info = {}; + VkDebugReportCallbackCreateInfoEXT debug_report_callback_create_info = {}; + const bool has_debug_utils_extension = enabled_instance_extension_names.has(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + const bool has_debug_report_extension = enabled_instance_extension_names.has(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); + if (has_debug_utils_extension) { + debug_messenger_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + debug_messenger_create_info.pNext = nullptr; + debug_messenger_create_info.flags = 0; + debug_messenger_create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + debug_messenger_create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + debug_messenger_create_info.pfnUserCallback = _debug_messenger_callback; + debug_messenger_create_info.pUserData = this; + instance_info.pNext = &debug_messenger_create_info; + } else if (has_debug_report_extension) { + debug_report_callback_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; + debug_report_callback_create_info.flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_DEBUG_BIT_EXT; + debug_report_callback_create_info.pfnCallback = _debug_report_callback; + debug_report_callback_create_info.pUserData = this; + instance_info.pNext = &debug_report_callback_create_info; + } + + err = _create_vulkan_instance(&instance_info, &instance); + ERR_FAIL_COND_V(err != OK, err); + +#ifdef USE_VOLK + volkLoadInstance(instance); +#endif + + // Physical device. + if (enabled_instance_extension_names.has(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) { + functions.GetPhysicalDeviceFeatures2 = PFN_vkGetPhysicalDeviceFeatures2(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceFeatures2")); + functions.GetPhysicalDeviceProperties2 = PFN_vkGetPhysicalDeviceProperties2(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties2")); + + // In Vulkan 1.0, the functions might be accessible under their original extension names. + if (functions.GetPhysicalDeviceFeatures2 == nullptr) { + functions.GetPhysicalDeviceFeatures2 = PFN_vkGetPhysicalDeviceFeatures2(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceFeatures2KHR")); + } + + if (functions.GetPhysicalDeviceProperties2 == nullptr) { + functions.GetPhysicalDeviceProperties2 = PFN_vkGetPhysicalDeviceProperties2(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties2KHR")); + } + } + + // Device. + functions.GetDeviceProcAddr = PFN_vkGetDeviceProcAddr(vkGetInstanceProcAddr(instance, "vkGetDeviceProcAddr")); + + // Surfaces. + functions.GetPhysicalDeviceSurfaceSupportKHR = PFN_vkGetPhysicalDeviceSurfaceSupportKHR(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfaceSupportKHR")); + functions.GetPhysicalDeviceSurfaceFormatsKHR = PFN_vkGetPhysicalDeviceSurfaceFormatsKHR(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfaceFormatsKHR")); + functions.GetPhysicalDeviceSurfaceCapabilitiesKHR = PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR")); + functions.GetPhysicalDeviceSurfacePresentModesKHR = PFN_vkGetPhysicalDeviceSurfacePresentModesKHR(vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceSurfacePresentModesKHR")); + + // Debug utils and report. + if (has_debug_utils_extension) { + // Setup VK_EXT_debug_utils function pointers always (we use them for debug labels and names). + functions.CreateDebugUtilsMessengerEXT = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); + functions.DestroyDebugUtilsMessengerEXT = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT"); + functions.CmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(instance, "vkCmdBeginDebugUtilsLabelEXT"); + functions.CmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(instance, "vkCmdEndDebugUtilsLabelEXT"); + functions.SetDebugUtilsObjectNameEXT = (PFN_vkSetDebugUtilsObjectNameEXT)vkGetInstanceProcAddr(instance, "vkSetDebugUtilsObjectNameEXT"); + + if (!functions.debug_util_functions_available()) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "GetProcAddr: Failed to init VK_EXT_debug_utils\nGetProcAddr: Failure"); + } + + VkResult res = functions.CreateDebugUtilsMessengerEXT(instance, &debug_messenger_create_info, nullptr, &debug_messenger); + switch (res) { + case VK_SUCCESS: + break; + case VK_ERROR_OUT_OF_HOST_MEMORY: + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "CreateDebugUtilsMessengerEXT: out of host memory\nCreateDebugUtilsMessengerEXT Failure"); + break; + default: + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "CreateDebugUtilsMessengerEXT: unknown failure\nCreateDebugUtilsMessengerEXT Failure"); + break; + } + } else if (has_debug_report_extension) { + functions.CreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT"); + functions.DebugReportMessageEXT = (PFN_vkDebugReportMessageEXT)vkGetInstanceProcAddr(instance, "vkDebugReportMessageEXT"); + functions.DestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT"); + + if (!functions.debug_report_functions_available()) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "GetProcAddr: Failed to init VK_EXT_debug_report\nGetProcAddr: Failure"); + } + + VkResult res = functions.CreateDebugReportCallbackEXT(instance, &debug_report_callback_create_info, nullptr, &debug_report); + switch (res) { + case VK_SUCCESS: + break; + case VK_ERROR_OUT_OF_HOST_MEMORY: + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "CreateDebugReportCallbackEXT: out of host memory\nCreateDebugReportCallbackEXT Failure"); + break; + default: + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "CreateDebugReportCallbackEXT: unknown failure\nCreateDebugReportCallbackEXT Failure"); + break; + } + } + + return OK; +} + +Error RenderingContextDriverVulkan::_initialize_devices() { + if (VulkanHooks::get_singleton() != nullptr) { + VkPhysicalDevice physical_device; + bool device_retrieved = VulkanHooks::get_singleton()->get_physical_device(&physical_device); + ERR_FAIL_COND_V(!device_retrieved, ERR_CANT_CREATE); + + // When a hook is active, pretend the device returned by the hook is the only device available. + driver_devices.resize(1); + physical_devices.resize(1); + device_queue_families.resize(1); + physical_devices[0] = physical_device; + + } else { + uint32_t physical_device_count = 0; + VkResult err = vkEnumeratePhysicalDevices(instance, &physical_device_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + ERR_FAIL_COND_V_MSG(physical_device_count == 0, ERR_CANT_CREATE, "vkEnumeratePhysicalDevices reported zero accessible devices.\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?\nvkEnumeratePhysicalDevices Failure."); + + driver_devices.resize(physical_device_count); + physical_devices.resize(physical_device_count); + device_queue_families.resize(physical_device_count); + err = vkEnumeratePhysicalDevices(instance, &physical_device_count, physical_devices.ptr()); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + } + + // Fill the list of driver devices with the properties from the physical devices. + for (uint32_t i = 0; i < physical_devices.size(); i++) { + VkPhysicalDeviceProperties props; + vkGetPhysicalDeviceProperties(physical_devices[i], &props); + + Device &driver_device = driver_devices[i]; + driver_device.name = String::utf8(props.deviceName); + driver_device.vendor = Vendor(props.vendorID); + driver_device.type = DeviceType(props.deviceType); + + uint32_t queue_family_properties_count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &queue_family_properties_count, nullptr); + + if (queue_family_properties_count > 0) { + device_queue_families[i].properties.resize(queue_family_properties_count); + vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &queue_family_properties_count, device_queue_families[i].properties.ptr()); + } + } + + return OK; +} + +bool RenderingContextDriverVulkan::_use_validation_layers() const { + return Engine::get_singleton()->is_validation_layers_enabled(); +} + +Error RenderingContextDriverVulkan::_create_vulkan_instance(const VkInstanceCreateInfo *p_create_info, VkInstance *r_instance) { + if (VulkanHooks::get_singleton() != nullptr) { + return VulkanHooks::get_singleton()->create_vulkan_instance(p_create_info, r_instance) ? OK : ERR_CANT_CREATE; + } else { + VkResult err = vkCreateInstance(p_create_info, nullptr, r_instance); + ERR_FAIL_COND_V_MSG(err == VK_ERROR_INCOMPATIBLE_DRIVER, ERR_CANT_CREATE, + "Cannot find a compatible Vulkan installable client driver (ICD).\n\n" + "vkCreateInstance Failure"); + ERR_FAIL_COND_V_MSG(err == VK_ERROR_EXTENSION_NOT_PRESENT, ERR_CANT_CREATE, + "Cannot find a specified extension library.\n" + "Make sure your layers path is set appropriately.\n" + "vkCreateInstance Failure"); + ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, + "vkCreateInstance failed.\n\n" + "Do you have a compatible Vulkan installable client driver (ICD) installed?\n" + "Please look at the Getting Started guide for additional information.\n" + "vkCreateInstance Failure"); + } + + return OK; +} + +Error RenderingContextDriverVulkan::initialize() { + Error err; + +#ifdef USE_VOLK + if (volkInitialize() != VK_SUCCESS) { + return FAILED; + } +#endif + + err = _initialize_vulkan_version(); + ERR_FAIL_COND_V(err != OK, err); + + err = _initialize_instance_extensions(); + ERR_FAIL_COND_V(err != OK, err); + + err = _initialize_instance(); + ERR_FAIL_COND_V(err != OK, err); + + err = _initialize_devices(); + ERR_FAIL_COND_V(err != OK, err); + + return OK; +} + +const RenderingContextDriver::Device &RenderingContextDriverVulkan::device_get(uint32_t p_device_index) const { + DEV_ASSERT(p_device_index < driver_devices.size()); + return driver_devices[p_device_index]; +} + +uint32_t RenderingContextDriverVulkan::device_get_count() const { + return driver_devices.size(); +} + +bool RenderingContextDriverVulkan::device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const { + DEV_ASSERT(p_device_index < physical_devices.size()); + + // Check if any of the queues supported by the device supports presenting to the window's surface. + const VkPhysicalDevice physical_device = physical_devices[p_device_index]; + const DeviceQueueFamilies &queue_families = device_queue_families[p_device_index]; + for (uint32_t i = 0; i < queue_families.properties.size(); i++) { + if ((queue_families.properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) && queue_family_supports_present(physical_device, i, p_surface)) { + return true; + } + } + + return false; +} + +RenderingDeviceDriver *RenderingContextDriverVulkan::driver_create() { + return memnew(RenderingDeviceDriverVulkan(this)); +} + +void RenderingContextDriverVulkan::driver_free(RenderingDeviceDriver *p_driver) { + memdelete(p_driver); +} + +RenderingContextDriver::SurfaceID RenderingContextDriverVulkan::surface_create(const void *p_platform_data) { + DEV_ASSERT(false && "Surface creation should not be called on the platform-agnostic version of the driver."); + return SurfaceID(); +} + +void RenderingContextDriverVulkan::surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) { + Surface *surface = (Surface *)(p_surface); + surface->width = p_width; + surface->height = p_height; + surface->needs_resize = true; +} + +void RenderingContextDriverVulkan::surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) { + Surface *surface = (Surface *)(p_surface); + surface->vsync_mode = p_vsync_mode; + surface->needs_resize = true; +} + +DisplayServer::VSyncMode RenderingContextDriverVulkan::surface_get_vsync_mode(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->vsync_mode; +} + +uint32_t RenderingContextDriverVulkan::surface_get_width(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->width; +} + +uint32_t RenderingContextDriverVulkan::surface_get_height(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->height; +} + +void RenderingContextDriverVulkan::surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) { + Surface *surface = (Surface *)(p_surface); + surface->needs_resize = p_needs_resize; +} + +bool RenderingContextDriverVulkan::surface_get_needs_resize(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->needs_resize; +} + +void RenderingContextDriverVulkan::surface_destroy(SurfaceID p_surface) { + Surface *surface = (Surface *)(p_surface); + vkDestroySurfaceKHR(instance, surface->vk_surface, nullptr); + memdelete(surface); +} + +bool RenderingContextDriverVulkan::is_debug_utils_enabled() const { + return enabled_instance_extension_names.has(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); +} + +VkInstance RenderingContextDriverVulkan::instance_get() const { + return instance; +} + +VkPhysicalDevice RenderingContextDriverVulkan::physical_device_get(uint32_t p_device_index) const { + DEV_ASSERT(p_device_index < physical_devices.size()); + return physical_devices[p_device_index]; +} + +uint32_t RenderingContextDriverVulkan::queue_family_get_count(uint32_t p_device_index) const { + DEV_ASSERT(p_device_index < physical_devices.size()); + return device_queue_families[p_device_index].properties.size(); +} + +VkQueueFamilyProperties RenderingContextDriverVulkan::queue_family_get(uint32_t p_device_index, uint32_t p_queue_family_index) const { + DEV_ASSERT(p_device_index < physical_devices.size()); + DEV_ASSERT(p_queue_family_index < queue_family_get_count(p_device_index)); + return device_queue_families[p_device_index].properties[p_queue_family_index]; +} + +bool RenderingContextDriverVulkan::queue_family_supports_present(VkPhysicalDevice p_physical_device, uint32_t p_queue_family_index, SurfaceID p_surface) const { + DEV_ASSERT(p_physical_device != VK_NULL_HANDLE); + DEV_ASSERT(p_surface != 0); + Surface *surface = (Surface *)(p_surface); + VkBool32 present_supported = false; + VkResult err = vkGetPhysicalDeviceSurfaceSupportKHR(p_physical_device, p_queue_family_index, surface->vk_surface, &present_supported); + return err == VK_SUCCESS && present_supported; +} + +const RenderingContextDriverVulkan::Functions &RenderingContextDriverVulkan::functions_get() const { + return functions; +} + +#endif // VULKAN_ENABLED diff --git a/drivers/vulkan/rendering_context_driver_vulkan.h b/drivers/vulkan/rendering_context_driver_vulkan.h new file mode 100644 index 0000000000..6348f90d55 --- /dev/null +++ b/drivers/vulkan/rendering_context_driver_vulkan.h @@ -0,0 +1,161 @@ +/**************************************************************************/ +/* rendering_context_driver_vulkan.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef RENDERING_CONTEXT_DRIVER_VULKAN_H +#define RENDERING_CONTEXT_DRIVER_VULKAN_H + +#ifdef VULKAN_ENABLED + +#include "servers/rendering/rendering_context_driver.h" + +#ifdef USE_VOLK +#include <volk.h> +#else +#include <vulkan/vulkan.h> +#endif + +class RenderingContextDriverVulkan : public RenderingContextDriver { +public: + struct Functions { + // Physical device. + PFN_vkGetPhysicalDeviceFeatures2 GetPhysicalDeviceFeatures2 = nullptr; + PFN_vkGetPhysicalDeviceProperties2 GetPhysicalDeviceProperties2 = nullptr; + + // Device. + PFN_vkGetDeviceProcAddr GetDeviceProcAddr = nullptr; + + // Surfaces. + PFN_vkGetPhysicalDeviceSurfaceSupportKHR GetPhysicalDeviceSurfaceSupportKHR = nullptr; + PFN_vkGetPhysicalDeviceSurfaceFormatsKHR GetPhysicalDeviceSurfaceFormatsKHR = nullptr; + PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR GetPhysicalDeviceSurfaceCapabilitiesKHR = nullptr; + PFN_vkGetPhysicalDeviceSurfacePresentModesKHR GetPhysicalDeviceSurfacePresentModesKHR = nullptr; + + // Debug utils. + PFN_vkCreateDebugUtilsMessengerEXT CreateDebugUtilsMessengerEXT = nullptr; + PFN_vkDestroyDebugUtilsMessengerEXT DestroyDebugUtilsMessengerEXT = nullptr; + PFN_vkCmdBeginDebugUtilsLabelEXT CmdBeginDebugUtilsLabelEXT = nullptr; + PFN_vkCmdEndDebugUtilsLabelEXT CmdEndDebugUtilsLabelEXT = nullptr; + PFN_vkSetDebugUtilsObjectNameEXT SetDebugUtilsObjectNameEXT = nullptr; + + bool debug_util_functions_available() const { + return CreateDebugUtilsMessengerEXT != nullptr && + DestroyDebugUtilsMessengerEXT != nullptr && + CmdBeginDebugUtilsLabelEXT != nullptr && + CmdEndDebugUtilsLabelEXT != nullptr && + SetDebugUtilsObjectNameEXT != nullptr; + } + + // Debug report. + PFN_vkCreateDebugReportCallbackEXT CreateDebugReportCallbackEXT = nullptr; + PFN_vkDebugReportMessageEXT DebugReportMessageEXT = nullptr; + PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT = nullptr; + + bool debug_report_functions_available() const { + return CreateDebugReportCallbackEXT != nullptr && + DebugReportMessageEXT != nullptr && + DestroyDebugReportCallbackEXT != nullptr; + } + }; + +private: + struct DeviceQueueFamilies { + TightLocalVector<VkQueueFamilyProperties> properties; + }; + + VkInstance instance = VK_NULL_HANDLE; + uint32_t instance_api_version = VK_API_VERSION_1_0; + HashMap<CharString, bool> requested_instance_extensions; + HashSet<CharString> enabled_instance_extension_names; + TightLocalVector<Device> driver_devices; + TightLocalVector<VkPhysicalDevice> physical_devices; + TightLocalVector<DeviceQueueFamilies> device_queue_families; + VkDebugUtilsMessengerEXT debug_messenger = VK_NULL_HANDLE; + VkDebugReportCallbackEXT debug_report = VK_NULL_HANDLE; + Functions functions; + + Error _initialize_vulkan_version(); + void _register_requested_instance_extension(const CharString &p_extension_name, bool p_required); + Error _initialize_instance_extensions(); + Error _initialize_instance(); + Error _initialize_devices(); + + // Static callbacks. + static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data); + static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_report_callback(VkDebugReportFlagsEXT p_flags, VkDebugReportObjectTypeEXT p_object_type, uint64_t p_object, size_t p_location, int32_t p_message_code, const char *p_layer_prefix, const char *p_message, void *p_user_data); + +protected: + Error _find_validation_layers(TightLocalVector<const char *> &r_layer_names) const; + + // Can be overridden by platform-specific drivers. + virtual const char *_get_platform_surface_extension() const { return nullptr; } + virtual bool _use_validation_layers() const; + virtual Error _create_vulkan_instance(const VkInstanceCreateInfo *p_create_info, VkInstance *r_instance); + +public: + virtual Error initialize() override; + virtual const Device &device_get(uint32_t p_device_index) const override; + virtual uint32_t device_get_count() const override; + virtual bool device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const override; + virtual RenderingDeviceDriver *driver_create() override; + virtual void driver_free(RenderingDeviceDriver *p_driver) override; + virtual SurfaceID surface_create(const void *p_platform_data) override; + virtual void surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) override; + virtual void surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) override; + virtual DisplayServer::VSyncMode surface_get_vsync_mode(SurfaceID p_surface) const override; + virtual uint32_t surface_get_width(SurfaceID p_surface) const override; + virtual uint32_t surface_get_height(SurfaceID p_surface) const override; + virtual void surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) override; + virtual bool surface_get_needs_resize(SurfaceID p_surface) const override; + virtual void surface_destroy(SurfaceID p_surface) override; + virtual bool is_debug_utils_enabled() const override; + + // Vulkan-only methods. + struct Surface { + VkSurfaceKHR vk_surface = VK_NULL_HANDLE; + uint32_t width = 0; + uint32_t height = 0; + DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED; + bool needs_resize = false; + }; + + VkInstance instance_get() const; + VkPhysicalDevice physical_device_get(uint32_t p_device_index) const; + uint32_t queue_family_get_count(uint32_t p_device_index) const; + VkQueueFamilyProperties queue_family_get(uint32_t p_device_index, uint32_t p_queue_family_index) const; + bool queue_family_supports_present(VkPhysicalDevice p_physical_device, uint32_t p_queue_family_index, SurfaceID p_surface) const; + const Functions &functions_get() const; + + RenderingContextDriverVulkan(); + virtual ~RenderingContextDriverVulkan() override; +}; + +#endif // VULKAN_ENABLED + +#endif // RENDERING_CONTEXT_DRIVER_VULKAN_H diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index e18161c974..f48e6eb7ed 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -33,7 +33,11 @@ #include "core/config/project_settings.h" #include "core/io/marshalls.h" #include "thirdparty/misc/smolv.h" -#include "vulkan_context.h" +#include "vulkan_hooks.h" + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +#define PRINT_NATIVE_COMMANDS 0 /*****************/ /**** GENERIC ****/ @@ -272,6 +276,883 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, VK_COMPARE_OP_ALWAYS)); static_assert(ARRAYS_COMPATIBLE_FIELDWISE(Rect2i, VkRect2D)); +uint32_t RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_stages_flags_rd() const { + uint32_t flags = 0; + + if (supported_stages & VK_SHADER_STAGE_VERTEX_BIT) { + flags += SHADER_STAGE_VERTEX_BIT; + } + if (supported_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { + flags += SHADER_STAGE_TESSELATION_CONTROL_BIT; + } + if (supported_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { + flags += SHADER_STAGE_TESSELATION_EVALUATION_BIT; + } + if (supported_stages & VK_SHADER_STAGE_GEOMETRY_BIT) { + // FIXME: Add shader stage geometry bit. + } + if (supported_stages & VK_SHADER_STAGE_FRAGMENT_BIT) { + flags += SHADER_STAGE_FRAGMENT_BIT; + } + if (supported_stages & VK_SHADER_STAGE_COMPUTE_BIT) { + flags += SHADER_STAGE_COMPUTE_BIT; + } + + return flags; +} + +String RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_stages_desc() const { + String res; + + if (supported_stages & VK_SHADER_STAGE_VERTEX_BIT) { + res += ", STAGE_VERTEX"; + } + if (supported_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { + res += ", STAGE_TESSELLATION_CONTROL"; + } + if (supported_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { + res += ", STAGE_TESSELLATION_EVALUATION"; + } + if (supported_stages & VK_SHADER_STAGE_GEOMETRY_BIT) { + res += ", STAGE_GEOMETRY"; + } + if (supported_stages & VK_SHADER_STAGE_FRAGMENT_BIT) { + res += ", STAGE_FRAGMENT"; + } + if (supported_stages & VK_SHADER_STAGE_COMPUTE_BIT) { + res += ", STAGE_COMPUTE"; + } + + // These are not defined on Android GRMBL. + if (supported_stages & 0x00000100 /* VK_SHADER_STAGE_RAYGEN_BIT_KHR */) { + res += ", STAGE_RAYGEN_KHR"; + } + if (supported_stages & 0x00000200 /* VK_SHADER_STAGE_ANY_HIT_BIT_KHR */) { + res += ", STAGE_ANY_HIT_KHR"; + } + if (supported_stages & 0x00000400 /* VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR */) { + res += ", STAGE_CLOSEST_HIT_KHR"; + } + if (supported_stages & 0x00000800 /* VK_SHADER_STAGE_MISS_BIT_KHR */) { + res += ", STAGE_MISS_KHR"; + } + if (supported_stages & 0x00001000 /* VK_SHADER_STAGE_INTERSECTION_BIT_KHR */) { + res += ", STAGE_INTERSECTION_KHR"; + } + if (supported_stages & 0x00002000 /* VK_SHADER_STAGE_CALLABLE_BIT_KHR */) { + res += ", STAGE_CALLABLE_KHR"; + } + if (supported_stages & 0x00000040 /* VK_SHADER_STAGE_TASK_BIT_NV */) { + res += ", STAGE_TASK_NV"; + } + if (supported_stages & 0x00000080 /* VK_SHADER_STAGE_MESH_BIT_NV */) { + res += ", STAGE_MESH_NV"; + } + + return res.substr(2); // Remove first ", ". +} + +uint32_t RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_operations_flags_rd() const { + uint32_t flags = 0; + + if (supported_operations & VK_SUBGROUP_FEATURE_BASIC_BIT) { + flags += SUBGROUP_BASIC_BIT; + } + if (supported_operations & VK_SUBGROUP_FEATURE_VOTE_BIT) { + flags += SUBGROUP_VOTE_BIT; + } + if (supported_operations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) { + flags += SUBGROUP_ARITHMETIC_BIT; + } + if (supported_operations & VK_SUBGROUP_FEATURE_BALLOT_BIT) { + flags += SUBGROUP_BALLOT_BIT; + } + if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) { + flags += SUBGROUP_SHUFFLE_BIT; + } + if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) { + flags += SUBGROUP_SHUFFLE_RELATIVE_BIT; + } + if (supported_operations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) { + flags += SUBGROUP_CLUSTERED_BIT; + } + if (supported_operations & VK_SUBGROUP_FEATURE_QUAD_BIT) { + flags += SUBGROUP_QUAD_BIT; + } + + return flags; +} + +String RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_operations_desc() const { + String res; + + if (supported_operations & VK_SUBGROUP_FEATURE_BASIC_BIT) { + res += ", FEATURE_BASIC"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_VOTE_BIT) { + res += ", FEATURE_VOTE"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) { + res += ", FEATURE_ARITHMETIC"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_BALLOT_BIT) { + res += ", FEATURE_BALLOT"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) { + res += ", FEATURE_SHUFFLE"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) { + res += ", FEATURE_SHUFFLE_RELATIVE"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) { + res += ", FEATURE_CLUSTERED"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_QUAD_BIT) { + res += ", FEATURE_QUAD"; + } + if (supported_operations & VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV) { + res += ", FEATURE_PARTITIONED_NV"; + } + + return res.substr(2); // Remove first ", ". +} + +/*****************/ +/**** GENERIC ****/ +/*****************/ + +void RenderingDeviceDriverVulkan::_register_requested_device_extension(const CharString &p_extension_name, bool p_required) { + ERR_FAIL_COND(requested_device_extensions.has(p_extension_name)); + requested_device_extensions[p_extension_name] = p_required; +} + +Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { + enabled_device_extension_names.clear(); + + _register_requested_device_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); + _register_requested_device_extension(VK_KHR_MULTIVIEW_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_16BIT_STORAGE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_MAINTENANCE_2_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + + if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { + _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); + } + + uint32_t device_extension_count = 0; + VkResult err = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &device_extension_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + ERR_FAIL_COND_V_MSG(device_extension_count == 0, ERR_CANT_CREATE, "vkEnumerateDeviceExtensionProperties failed to find any extensions\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?"); + + TightLocalVector<VkExtensionProperties> device_extensions; + device_extensions.resize(device_extension_count); + err = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &device_extension_count, device_extensions.ptr()); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + +#ifdef DEV_ENABLED + for (uint32_t i = 0; i < device_extension_count; i++) { + print_verbose(String("VULKAN: Found device extension ") + String::utf8(device_extensions[i].extensionName)); + } +#endif + + // Enable all extensions that are supported and requested. + for (uint32_t i = 0; i < device_extension_count; i++) { + CharString extension_name(device_extensions[i].extensionName); + if (requested_device_extensions.has(extension_name)) { + enabled_device_extension_names.insert(extension_name); + } + } + + // Now check our requested extensions. + for (KeyValue<CharString, bool> &requested_extension : requested_device_extensions) { + if (!enabled_device_extension_names.has(requested_extension.key)) { + if (requested_extension.value) { + ERR_FAIL_V_MSG(ERR_BUG, String("Required extension ") + String::utf8(requested_extension.key) + String(" not found.")); + } else { + print_verbose(String("Optional extension ") + String::utf8(requested_extension.key) + String(" not found")); + } + } + } + + return OK; +} + +Error RenderingDeviceDriverVulkan::_check_device_features() { + vkGetPhysicalDeviceFeatures(physical_device, &physical_device_features); + + // Check for required features. + if (!physical_device_features.imageCubeArray || !physical_device_features.independentBlend) { + String error_string = vformat("Your GPU (%s) does not support the following features which are required to use Vulkan-based renderers in Godot:\n\n", context_device.name); + if (!physical_device_features.imageCubeArray) { + error_string += "- No support for image cube arrays.\n"; + } + if (!physical_device_features.independentBlend) { + error_string += "- No support for independentBlend.\n"; + } + error_string += "\nThis is usually a hardware limitation, so updating graphics drivers won't help in most cases."; + +#if defined(ANDROID_ENABLED) || defined(IOS_ENABLED) + // Android/iOS platform ports currently don't exit themselves when this method returns `ERR_CANT_CREATE`. + OS::get_singleton()->alert(error_string + "\nClick OK to exit (black screen will be visible)."); +#else + OS::get_singleton()->alert(error_string + "\nClick OK to exit."); +#endif + + return ERR_CANT_CREATE; + } + + // Opt-in to the features we actually need/use. These can be changed in the future. + // We do this for multiple reasons: + // + // 1. Certain features (like sparse* stuff) cause unnecessary internal driver allocations. + // 2. Others like shaderStorageImageMultisample are a huge red flag + // (MSAA + Storage is rarely needed). + // 3. Most features when turned off aren't actually off (we just promise the driver not to use them) + // and it is validation what will complain. This allows us to target a minimum baseline. + // + // TODO: Allow the user to override these settings (i.e. turn off more stuff) using profiles + // so they can target a broad range of HW. For example Mali HW does not have + // shaderClipDistance/shaderCullDistance; thus validation would complain if such feature is used; + // allowing them to fix the problem without even owning Mali HW to test on. + // + // The excluded features are: + // - robustBufferAccess (can hamper performance on some hardware) + // - occlusionQueryPrecise + // - pipelineStatisticsQuery + // - shaderStorageImageMultisample (unsupported by Intel Arc, prevents from using MSAA storage accidentally) + // - shaderResourceResidency + // - sparseBinding (we don't use sparse features and enabling them cause extra internal allocations inside the Vulkan driver we don't need) + // - sparseResidencyBuffer + // - sparseResidencyImage2D + // - sparseResidencyImage3D + // - sparseResidency2Samples + // - sparseResidency4Samples + // - sparseResidency8Samples + // - sparseResidency16Samples + // - sparseResidencyAliased + // - inheritedQueries + +#define VK_DEVICEFEATURE_ENABLE_IF(x) \ + if (physical_device_features.x) { \ + requested_device_features.x = physical_device_features.x; \ + } else \ + ((void)0) + + requested_device_features = {}; + VK_DEVICEFEATURE_ENABLE_IF(fullDrawIndexUint32); + VK_DEVICEFEATURE_ENABLE_IF(imageCubeArray); + VK_DEVICEFEATURE_ENABLE_IF(independentBlend); + VK_DEVICEFEATURE_ENABLE_IF(geometryShader); + VK_DEVICEFEATURE_ENABLE_IF(tessellationShader); + VK_DEVICEFEATURE_ENABLE_IF(sampleRateShading); + VK_DEVICEFEATURE_ENABLE_IF(dualSrcBlend); + VK_DEVICEFEATURE_ENABLE_IF(logicOp); + VK_DEVICEFEATURE_ENABLE_IF(multiDrawIndirect); + VK_DEVICEFEATURE_ENABLE_IF(drawIndirectFirstInstance); + VK_DEVICEFEATURE_ENABLE_IF(depthClamp); + VK_DEVICEFEATURE_ENABLE_IF(depthBiasClamp); + VK_DEVICEFEATURE_ENABLE_IF(fillModeNonSolid); + VK_DEVICEFEATURE_ENABLE_IF(depthBounds); + VK_DEVICEFEATURE_ENABLE_IF(wideLines); + VK_DEVICEFEATURE_ENABLE_IF(largePoints); + VK_DEVICEFEATURE_ENABLE_IF(alphaToOne); + VK_DEVICEFEATURE_ENABLE_IF(multiViewport); + VK_DEVICEFEATURE_ENABLE_IF(samplerAnisotropy); + VK_DEVICEFEATURE_ENABLE_IF(textureCompressionETC2); + VK_DEVICEFEATURE_ENABLE_IF(textureCompressionASTC_LDR); + VK_DEVICEFEATURE_ENABLE_IF(textureCompressionBC); + VK_DEVICEFEATURE_ENABLE_IF(vertexPipelineStoresAndAtomics); + VK_DEVICEFEATURE_ENABLE_IF(fragmentStoresAndAtomics); + VK_DEVICEFEATURE_ENABLE_IF(shaderTessellationAndGeometryPointSize); + VK_DEVICEFEATURE_ENABLE_IF(shaderImageGatherExtended); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageExtendedFormats); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageReadWithoutFormat); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageWriteWithoutFormat); + VK_DEVICEFEATURE_ENABLE_IF(shaderUniformBufferArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderSampledImageArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageBufferArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderClipDistance); + VK_DEVICEFEATURE_ENABLE_IF(shaderCullDistance); + VK_DEVICEFEATURE_ENABLE_IF(shaderFloat64); + VK_DEVICEFEATURE_ENABLE_IF(shaderInt64); + VK_DEVICEFEATURE_ENABLE_IF(shaderInt16); + VK_DEVICEFEATURE_ENABLE_IF(shaderResourceMinLod); + VK_DEVICEFEATURE_ENABLE_IF(variableMultisampleRate); + + return OK; +} + +Error RenderingDeviceDriverVulkan::_check_device_capabilities() { + // Fill device family and version. + device_capabilities.device_family = DEVICE_VULKAN; + device_capabilities.version_major = VK_API_VERSION_MAJOR(physical_device_properties.apiVersion); + device_capabilities.version_minor = VK_API_VERSION_MINOR(physical_device_properties.apiVersion); + + // References: + // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_multiview.html + // https://www.khronos.org/blog/vulkan-subgroup-tutorial + const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + if (functions.GetPhysicalDeviceFeatures2 != nullptr) { + // We must check that the corresponding extension is present before assuming a feature as enabled. + // See also: https://github.com/godotengine/godot/issues/65409 + + void *next_features = nullptr; + VkPhysicalDeviceVulkan12Features device_features_vk_1_2 = {}; + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {}; + VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; + VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; + VkPhysicalDeviceMultiviewFeatures multiview_features = {}; + VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; + + const bool use_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2; + if (use_1_2_features) { + device_features_vk_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + device_features_vk_1_2.pNext = next_features; + next_features = &device_features_vk_1_2; + } else if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { + shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR; + shader_features.pNext = next_features; + next_features = &shader_features; + } + + if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { + vrs_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; + vrs_features.pNext = next_features; + next_features = &vrs_features; + } + + if (enabled_device_extension_names.has(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { + storage_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; + storage_feature.pNext = next_features; + next_features = &storage_feature; + } + + if (enabled_device_extension_names.has(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { + multiview_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES; + multiview_features.pNext = next_features; + next_features = &multiview_features; + } + + if (enabled_device_extension_names.has(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) { + pipeline_cache_control_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES; + pipeline_cache_control_features.pNext = next_features; + next_features = &pipeline_cache_control_features; + } + + VkPhysicalDeviceFeatures2 device_features_2 = {}; + device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + device_features_2.pNext = next_features; + functions.GetPhysicalDeviceFeatures2(physical_device, &device_features_2); + + if (use_1_2_features) { +#ifdef MACOS_ENABLED + ERR_FAIL_COND_V_MSG(!device_features_vk_1_2.shaderSampledImageArrayNonUniformIndexing, ERR_CANT_CREATE, "Your GPU doesn't support shaderSampledImageArrayNonUniformIndexing which is required to use the Vulkan-based renderers in Godot."); +#endif + if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { + shader_capabilities.shader_float16_is_supported = device_features_vk_1_2.shaderFloat16; + shader_capabilities.shader_int8_is_supported = device_features_vk_1_2.shaderInt8; + } + } else { + if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { + shader_capabilities.shader_float16_is_supported = shader_features.shaderFloat16; + shader_capabilities.shader_int8_is_supported = shader_features.shaderInt8; + } + } + + if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { + vrs_capabilities.pipeline_vrs_supported = vrs_features.pipelineFragmentShadingRate; + vrs_capabilities.primitive_vrs_supported = vrs_features.primitiveFragmentShadingRate; + vrs_capabilities.attachment_vrs_supported = vrs_features.attachmentFragmentShadingRate; + } + + if (enabled_device_extension_names.has(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { + multiview_capabilities.is_supported = multiview_features.multiview; + multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader; + multiview_capabilities.tessellation_shader_is_supported = multiview_features.multiviewTessellationShader; + } + + if (enabled_device_extension_names.has(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { + storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = storage_feature.storageBuffer16BitAccess; + storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported = storage_feature.uniformAndStorageBuffer16BitAccess; + storage_buffer_capabilities.storage_push_constant_16_is_supported = storage_feature.storagePushConstant16; + storage_buffer_capabilities.storage_input_output_16 = storage_feature.storageInputOutput16; + } + + if (enabled_device_extension_names.has(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) { + pipeline_cache_control_support = pipeline_cache_control_features.pipelineCreationCacheControl; + } + } + + if (functions.GetPhysicalDeviceProperties2 != nullptr) { + void *next_properties = nullptr; + VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrs_properties = {}; + VkPhysicalDeviceMultiviewProperties multiview_properties = {}; + VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; + VkPhysicalDeviceProperties2 physical_device_properties_2 = {}; + + const bool use_1_1_properties = physical_device_properties.apiVersion >= VK_API_VERSION_1_1; + if (use_1_1_properties) { + subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + subgroup_properties.pNext = next_properties; + next_properties = &subgroup_properties; + + subgroup_capabilities.size_control_is_supported = enabled_device_extension_names.has(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); + if (subgroup_capabilities.size_control_is_supported) { + subgroup_size_control_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; + subgroup_size_control_properties.pNext = next_properties; + next_properties = &subgroup_size_control_properties; + } + } + + if (multiview_capabilities.is_supported) { + multiview_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES; + multiview_properties.pNext = next_properties; + next_properties = &multiview_properties; + } + + if (vrs_capabilities.attachment_vrs_supported) { + vrs_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + vrs_properties.pNext = next_properties; + next_properties = &vrs_properties; + } + + physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physical_device_properties_2.pNext = next_properties; + functions.GetPhysicalDeviceProperties2(physical_device, &physical_device_properties_2); + + subgroup_capabilities.size = subgroup_properties.subgroupSize; + subgroup_capabilities.min_size = subgroup_properties.subgroupSize; + subgroup_capabilities.max_size = subgroup_properties.subgroupSize; + subgroup_capabilities.supported_stages = subgroup_properties.supportedStages; + subgroup_capabilities.supported_operations = subgroup_properties.supportedOperations; + + // Note: quadOperationsInAllStages will be true if: + // - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT. + // - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT. + subgroup_capabilities.quad_operations_in_all_stages = subgroup_properties.quadOperationsInAllStages; + + if (subgroup_capabilities.size_control_is_supported && (subgroup_size_control_properties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT)) { + subgroup_capabilities.min_size = subgroup_size_control_properties.minSubgroupSize; + subgroup_capabilities.max_size = subgroup_size_control_properties.maxSubgroupSize; + } + + if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { + print_verbose("- Vulkan Variable Rate Shading supported:"); + if (vrs_capabilities.pipeline_vrs_supported) { + print_verbose(" Pipeline fragment shading rate"); + } + if (vrs_capabilities.primitive_vrs_supported) { + print_verbose(" Primitive fragment shading rate"); + } + if (vrs_capabilities.attachment_vrs_supported) { + // TODO: Expose these somehow to the end user. + vrs_capabilities.min_texel_size.x = vrs_properties.minFragmentShadingRateAttachmentTexelSize.width; + vrs_capabilities.min_texel_size.y = vrs_properties.minFragmentShadingRateAttachmentTexelSize.height; + vrs_capabilities.max_texel_size.x = vrs_properties.maxFragmentShadingRateAttachmentTexelSize.width; + vrs_capabilities.max_texel_size.y = vrs_properties.maxFragmentShadingRateAttachmentTexelSize.height; + + // We'll attempt to default to a texel size of 16x16. + vrs_capabilities.texel_size.x = CLAMP(16, vrs_capabilities.min_texel_size.x, vrs_capabilities.max_texel_size.x); + vrs_capabilities.texel_size.y = CLAMP(16, vrs_capabilities.min_texel_size.y, vrs_capabilities.max_texel_size.y); + + print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")")); + } + + } else { + print_verbose("- Vulkan Variable Rate Shading not supported"); + } + + if (multiview_capabilities.is_supported) { + multiview_capabilities.max_view_count = multiview_properties.maxMultiviewViewCount; + multiview_capabilities.max_instance_count = multiview_properties.maxMultiviewInstanceIndex; + + print_verbose("- Vulkan multiview supported:"); + print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); + print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); + } else { + print_verbose("- Vulkan multiview not supported"); + } + + print_verbose("- Vulkan subgroup:"); + print_verbose(" size: " + itos(subgroup_capabilities.size)); + print_verbose(" min size: " + itos(subgroup_capabilities.min_size)); + print_verbose(" max size: " + itos(subgroup_capabilities.max_size)); + print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc()); + print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc()); + if (subgroup_capabilities.quad_operations_in_all_stages) { + print_verbose(" quad operations in all stages"); + } + } + + return OK; +} + +Error RenderingDeviceDriverVulkan::_add_queue_create_info(LocalVector<VkDeviceQueueCreateInfo> &r_queue_create_info) { + uint32_t queue_family_count = queue_family_properties.size(); + queue_families.resize(queue_family_count); + + VkQueueFlags queue_flags_mask = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT; + const uint32_t max_queue_count_per_family = 1; + static const float queue_priorities[max_queue_count_per_family] = {}; + for (uint32_t i = 0; i < queue_family_count; i++) { + if ((queue_family_properties[i].queueFlags & queue_flags_mask) == 0) { + // We ignore creating queues in families that don't support any of the operations we require. + continue; + } + + VkDeviceQueueCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + create_info.queueFamilyIndex = i; + create_info.queueCount = MIN(queue_family_properties[i].queueCount, max_queue_count_per_family); + create_info.pQueuePriorities = queue_priorities; + r_queue_create_info.push_back(create_info); + + // Prepare the vectors where the queues will be filled out. + queue_families[i].resize(create_info.queueCount); + } + + return OK; +} + +Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDeviceQueueCreateInfo> &p_queue_create_info) { + TightLocalVector<const char *> enabled_extension_names; + enabled_extension_names.reserve(enabled_device_extension_names.size()); + for (const CharString &extension_name : enabled_device_extension_names) { + enabled_extension_names.push_back(extension_name.ptr()); + } + + void *create_info_next = nullptr; + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {}; + shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR; + shader_features.pNext = create_info_next; + shader_features.shaderFloat16 = shader_capabilities.shader_float16_is_supported; + shader_features.shaderInt8 = shader_capabilities.shader_int8_is_supported; + create_info_next = &shader_features; + + VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; + if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { + vrs_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; + vrs_features.pNext = create_info_next; + vrs_features.pipelineFragmentShadingRate = vrs_capabilities.pipeline_vrs_supported; + vrs_features.primitiveFragmentShadingRate = vrs_capabilities.primitive_vrs_supported; + vrs_features.attachmentFragmentShadingRate = vrs_capabilities.attachment_vrs_supported; + create_info_next = &vrs_features; + } + + VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; + if (pipeline_cache_control_support) { + pipeline_cache_control_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES; + pipeline_cache_control_features.pNext = create_info_next; + pipeline_cache_control_features.pipelineCreationCacheControl = pipeline_cache_control_support; + create_info_next = &pipeline_cache_control_features; + } + + VkPhysicalDeviceVulkan11Features vulkan_1_1_features = {}; + VkPhysicalDevice16BitStorageFeaturesKHR storage_features = {}; + VkPhysicalDeviceMultiviewFeatures multiview_features = {}; + const bool enable_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2; + if (enable_1_2_features) { + // In Vulkan 1.2 and newer we use a newer struct to enable various features. + vulkan_1_1_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + vulkan_1_1_features.pNext = create_info_next; + vulkan_1_1_features.storageBuffer16BitAccess = storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; + vulkan_1_1_features.uniformAndStorageBuffer16BitAccess = storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported; + vulkan_1_1_features.storagePushConstant16 = storage_buffer_capabilities.storage_push_constant_16_is_supported; + vulkan_1_1_features.storageInputOutput16 = storage_buffer_capabilities.storage_input_output_16; + vulkan_1_1_features.multiview = multiview_capabilities.is_supported; + vulkan_1_1_features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; + vulkan_1_1_features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; + vulkan_1_1_features.variablePointersStorageBuffer = 0; + vulkan_1_1_features.variablePointers = 0; + vulkan_1_1_features.protectedMemory = 0; + vulkan_1_1_features.samplerYcbcrConversion = 0; + vulkan_1_1_features.shaderDrawParameters = 0; + create_info_next = &vulkan_1_1_features; + } else { + // On Vulkan 1.0 and 1.1 we use our older structs to initialize these features. + storage_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; + storage_features.pNext = create_info_next; + storage_features.storageBuffer16BitAccess = storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; + storage_features.uniformAndStorageBuffer16BitAccess = storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported; + storage_features.storagePushConstant16 = storage_buffer_capabilities.storage_push_constant_16_is_supported; + storage_features.storageInputOutput16 = storage_buffer_capabilities.storage_input_output_16; + create_info_next = &storage_features; + + const bool enable_1_1_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_1; + if (enable_1_1_features) { + multiview_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES; + multiview_features.pNext = create_info_next; + multiview_features.multiview = multiview_capabilities.is_supported; + multiview_features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; + multiview_features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; + create_info_next = &multiview_features; + } + } + + VkDeviceCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + create_info.pNext = create_info_next; + create_info.queueCreateInfoCount = p_queue_create_info.size(); + create_info.pQueueCreateInfos = p_queue_create_info.ptr(); + create_info.enabledExtensionCount = enabled_extension_names.size(); + create_info.ppEnabledExtensionNames = enabled_extension_names.ptr(); + create_info.pEnabledFeatures = &requested_device_features; + + if (VulkanHooks::get_singleton() != nullptr) { + bool device_created = VulkanHooks::get_singleton()->create_vulkan_device(&create_info, &vk_device); + ERR_FAIL_COND_V(!device_created, ERR_CANT_CREATE); + } else { + VkResult err = vkCreateDevice(physical_device, &create_info, nullptr, &vk_device); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + } + + for (uint32_t i = 0; i < queue_families.size(); i++) { + for (uint32_t j = 0; j < queue_families[i].size(); j++) { + vkGetDeviceQueue(vk_device, i, j, &queue_families[i][j].queue); + } + } + + const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + if (functions.GetDeviceProcAddr != nullptr) { + device_functions.CreateSwapchainKHR = PFN_vkCreateSwapchainKHR(functions.GetDeviceProcAddr(vk_device, "vkCreateSwapchainKHR")); + device_functions.DestroySwapchainKHR = PFN_vkDestroySwapchainKHR(functions.GetDeviceProcAddr(vk_device, "vkDestroySwapchainKHR")); + device_functions.GetSwapchainImagesKHR = PFN_vkGetSwapchainImagesKHR(functions.GetDeviceProcAddr(vk_device, "vkGetSwapchainImagesKHR")); + device_functions.AcquireNextImageKHR = PFN_vkAcquireNextImageKHR(functions.GetDeviceProcAddr(vk_device, "vkAcquireNextImageKHR")); + device_functions.QueuePresentKHR = PFN_vkQueuePresentKHR(functions.GetDeviceProcAddr(vk_device, "vkQueuePresentKHR")); + + if (enabled_device_extension_names.has(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) { + device_functions.CreateRenderPass2KHR = PFN_vkCreateRenderPass2KHR(functions.GetDeviceProcAddr(vk_device, "vkCreateRenderPass2KHR")); + } + } + + return OK; +} + +Error RenderingDeviceDriverVulkan::_initialize_allocator() { + VmaAllocatorCreateInfo allocator_info = {}; + allocator_info.physicalDevice = physical_device; + allocator_info.device = vk_device; + allocator_info.instance = context_driver->instance_get(); + VkResult err = vmaCreateAllocator(&allocator_info, &allocator); + ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vmaCreateAllocator failed with error " + itos(err) + "."); + + return OK; +} + +Error RenderingDeviceDriverVulkan::_initialize_pipeline_cache() { + pipelines_cache.buffer.resize(sizeof(PipelineCacheHeader)); + PipelineCacheHeader *header = (PipelineCacheHeader *)(pipelines_cache.buffer.ptrw()); + *header = {}; + header->magic = 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE; + header->device_id = physical_device_properties.deviceID; + header->vendor_id = physical_device_properties.vendorID; + header->driver_version = physical_device_properties.driverVersion; + memcpy(header->uuid, physical_device_properties.pipelineCacheUUID, VK_UUID_SIZE); + header->driver_abi = sizeof(void *); + + pipeline_cache_id = String::hex_encode_buffer(physical_device_properties.pipelineCacheUUID, VK_UUID_SIZE); + pipeline_cache_id += "-driver-" + itos(physical_device_properties.driverVersion); + + return OK; +} + +static void _convert_subpass_attachments(const VkAttachmentReference2 *p_attachment_references_2, uint32_t p_attachment_references_count, TightLocalVector<VkAttachmentReference> &r_attachment_references) { + r_attachment_references.resize(p_attachment_references_count); + for (uint32_t i = 0; i < p_attachment_references_count; i++) { + // Ignore sType, pNext and aspectMask (which is currently unused). + r_attachment_references[i].attachment = p_attachment_references_2[i].attachment; + r_attachment_references[i].layout = p_attachment_references_2[i].layout; + } +} + +VkResult RenderingDeviceDriverVulkan::_create_render_pass(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass) { + if (device_functions.CreateRenderPass2KHR != nullptr) { + return device_functions.CreateRenderPass2KHR(p_device, p_create_info, p_allocator, p_render_pass); + } else { + // Compatibility fallback with regular create render pass but by converting the inputs from the newer version to the older one. + TightLocalVector<VkAttachmentDescription> attachments; + attachments.resize(p_create_info->attachmentCount); + for (uint32_t i = 0; i < p_create_info->attachmentCount; i++) { + // Ignores sType and pNext from the attachment. + const VkAttachmentDescription2 &src = p_create_info->pAttachments[i]; + VkAttachmentDescription &dst = attachments[i]; + dst.flags = src.flags; + dst.format = src.format; + dst.samples = src.samples; + dst.loadOp = src.loadOp; + dst.storeOp = src.storeOp; + dst.stencilLoadOp = src.stencilLoadOp; + dst.stencilStoreOp = src.stencilStoreOp; + dst.initialLayout = src.initialLayout; + dst.finalLayout = src.finalLayout; + } + + const uint32_t attachment_vectors_per_subpass = 4; + TightLocalVector<TightLocalVector<VkAttachmentReference>> subpasses_attachments; + TightLocalVector<VkSubpassDescription> subpasses; + subpasses_attachments.resize(p_create_info->subpassCount * attachment_vectors_per_subpass); + subpasses.resize(p_create_info->subpassCount); + + for (uint32_t i = 0; i < p_create_info->subpassCount; i++) { + const uint32_t vector_base_index = i * attachment_vectors_per_subpass; + const uint32_t input_attachments_index = vector_base_index + 0; + const uint32_t color_attachments_index = vector_base_index + 1; + const uint32_t resolve_attachments_index = vector_base_index + 2; + const uint32_t depth_attachment_index = vector_base_index + 3; + _convert_subpass_attachments(p_create_info->pSubpasses[i].pInputAttachments, p_create_info->pSubpasses[i].inputAttachmentCount, subpasses_attachments[input_attachments_index]); + _convert_subpass_attachments(p_create_info->pSubpasses[i].pColorAttachments, p_create_info->pSubpasses[i].colorAttachmentCount, subpasses_attachments[color_attachments_index]); + _convert_subpass_attachments(p_create_info->pSubpasses[i].pResolveAttachments, p_create_info->pSubpasses[i].colorAttachmentCount, subpasses_attachments[resolve_attachments_index]); + _convert_subpass_attachments(p_create_info->pSubpasses[i].pDepthStencilAttachment, (p_create_info->pSubpasses[i].pDepthStencilAttachment != nullptr) ? 1 : 0, subpasses_attachments[depth_attachment_index]); + + // Ignores sType and pNext from the subpass. + const VkSubpassDescription2 &src_subpass = p_create_info->pSubpasses[i]; + VkSubpassDescription &dst_subpass = subpasses[i]; + dst_subpass.flags = src_subpass.flags; + dst_subpass.pipelineBindPoint = src_subpass.pipelineBindPoint; + dst_subpass.inputAttachmentCount = src_subpass.inputAttachmentCount; + dst_subpass.pInputAttachments = subpasses_attachments[input_attachments_index].ptr(); + dst_subpass.colorAttachmentCount = src_subpass.colorAttachmentCount; + dst_subpass.pColorAttachments = subpasses_attachments[color_attachments_index].ptr(); + dst_subpass.pResolveAttachments = subpasses_attachments[resolve_attachments_index].ptr(); + dst_subpass.pDepthStencilAttachment = subpasses_attachments[depth_attachment_index].ptr(); + dst_subpass.preserveAttachmentCount = src_subpass.preserveAttachmentCount; + dst_subpass.pPreserveAttachments = src_subpass.pPreserveAttachments; + } + + TightLocalVector<VkSubpassDependency> dependencies; + dependencies.resize(p_create_info->dependencyCount); + + for (uint32_t i = 0; i < p_create_info->dependencyCount; i++) { + // Ignores sType and pNext from the dependency, and viewMask which is currently unused. + const VkSubpassDependency2 &src_dependency = p_create_info->pDependencies[i]; + VkSubpassDependency &dst_dependency = dependencies[i]; + dst_dependency.srcSubpass = src_dependency.srcSubpass; + dst_dependency.dstSubpass = src_dependency.dstSubpass; + dst_dependency.srcStageMask = src_dependency.srcStageMask; + dst_dependency.dstStageMask = src_dependency.dstStageMask; + dst_dependency.srcAccessMask = src_dependency.srcAccessMask; + dst_dependency.dstAccessMask = src_dependency.dstAccessMask; + dst_dependency.dependencyFlags = src_dependency.dependencyFlags; + } + + VkRenderPassCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + create_info.pNext = p_create_info->pNext; + create_info.flags = p_create_info->flags; + create_info.attachmentCount = attachments.size(); + create_info.pAttachments = attachments.ptr(); + create_info.subpassCount = subpasses.size(); + create_info.pSubpasses = subpasses.ptr(); + create_info.dependencyCount = dependencies.size(); + create_info.pDependencies = dependencies.ptr(); + return vkCreateRenderPass(vk_device, &create_info, p_allocator, p_render_pass); + } +} + +bool RenderingDeviceDriverVulkan::_release_image_semaphore(CommandQueue *p_command_queue, uint32_t p_semaphore_index, bool p_release_on_swap_chain) { + SwapChain *swap_chain = p_command_queue->image_semaphores_swap_chains[p_semaphore_index]; + if (swap_chain != nullptr) { + // Clear the swap chain from the command queue's vector. + p_command_queue->image_semaphores_swap_chains[p_semaphore_index] = nullptr; + + if (p_release_on_swap_chain) { + // Remove the acquired semaphore from the swap chain's vectors. + for (uint32_t i = 0; i < swap_chain->command_queues_acquired.size(); i++) { + if (swap_chain->command_queues_acquired[i] == p_command_queue && swap_chain->command_queues_acquired_semaphores[i] == p_semaphore_index) { + swap_chain->command_queues_acquired.remove_at(i); + swap_chain->command_queues_acquired_semaphores.remove_at(i); + break; + } + } + } + + return true; + } + + return false; +} + +bool RenderingDeviceDriverVulkan::_recreate_image_semaphore(CommandQueue *p_command_queue, uint32_t p_semaphore_index, bool p_release_on_swap_chain) { + _release_image_semaphore(p_command_queue, p_semaphore_index, p_release_on_swap_chain); + + VkSemaphore semaphore; + VkSemaphoreCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + VkResult err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + ERR_FAIL_COND_V(err != VK_SUCCESS, false); + + // Indicate the semaphore is free again and destroy the previous one before storing the new one. + vkDestroySemaphore(vk_device, p_command_queue->image_semaphores[p_semaphore_index], nullptr); + + p_command_queue->image_semaphores[p_semaphore_index] = semaphore; + p_command_queue->free_image_semaphores.push_back(p_semaphore_index); + + return true; +} + +void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) { + const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + if (functions.SetDebugUtilsObjectNameEXT != nullptr) { + CharString obj_data = p_object_name.utf8(); + VkDebugUtilsObjectNameInfoEXT name_info; + name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; + name_info.pNext = nullptr; + name_info.objectType = p_object_type; + name_info.objectHandle = p_object_handle; + name_info.pObjectName = obj_data.get_data(); + functions.SetDebugUtilsObjectNameEXT(vk_device, &name_info); + } +} + +Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t p_frame_count) { + // Frame count is not required for the Vulkan driver, so we just ignore it. + + context_device = context_driver->device_get(p_device_index); + physical_device = context_driver->physical_device_get(p_device_index); + vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties); + + // Copy the queue family properties the context already retrieved. + uint32_t queue_family_count = context_driver->queue_family_get_count(p_device_index); + queue_family_properties.resize(queue_family_count); + for (uint32_t i = 0; i < queue_family_count; i++) { + queue_family_properties[i] = context_driver->queue_family_get(p_device_index, i); + } + + Error err = _initialize_device_extensions(); + ERR_FAIL_COND_V(err != OK, err); + + err = _check_device_features(); + ERR_FAIL_COND_V(err != OK, err); + + err = _check_device_capabilities(); + ERR_FAIL_COND_V(err != OK, err); + + LocalVector<VkDeviceQueueCreateInfo> queue_create_info; + err = _add_queue_create_info(queue_create_info); + ERR_FAIL_COND_V(err != OK, err); + + err = _initialize_device(queue_create_info); + ERR_FAIL_COND_V(err != OK, err); + + err = _initialize_allocator(); + ERR_FAIL_COND_V(err != OK, err); + + err = _initialize_pipeline_cache(); + ERR_FAIL_COND_V(err != OK, err); + + max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool"); + + return OK; +} + /****************/ /**** MEMORY ****/ /****************/ @@ -470,7 +1351,7 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_ static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_STENCIL_BIT, VK_IMAGE_ASPECT_STENCIL_BIT)); VkSampleCountFlagBits RenderingDeviceDriverVulkan::_ensure_supported_sample_count(TextureSamples p_requested_sample_count) { - VkSampleCountFlags sample_count_flags = (context->get_device_limits().framebufferColorSampleCounts & limits.framebufferDepthSampleCounts); + VkSampleCountFlags sample_count_flags = (physical_device_properties.limits.framebufferColorSampleCounts & physical_device_properties.limits.framebufferDepthSampleCounts); if ((sample_count_flags & RD_TO_VK_SAMPLE_COUNT[p_requested_sample_count])) { // The requested sample count is supported. @@ -495,7 +1376,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat & if (p_format.shareable_formats.size()) { create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; - if (context->is_device_extension_enabled(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME)) { + if (enabled_device_extension_names.has(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME)) { VkFormat *vk_allowed_formats = ALLOCA_ARRAY(VkFormat, p_format.shareable_formats.size()); for (int i = 0; i < p_format.shareable_formats.size(); i++) { vk_allowed_formats[i] = RD_TO_VK_FORMAT[p_format.shareable_formats[i]]; @@ -622,6 +1503,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat & tex_info->allocation.handle = allocation; vmaGetAllocationInfo(allocator, tex_info->allocation.handle, &tex_info->allocation.info); +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(vk_image_view), uint64_t(vk_image))); +#endif + return TextureID(tex_info); } @@ -672,7 +1557,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or image_view_create_info.components.b = (VkComponentSwizzle)p_view.swizzle_b; image_view_create_info.components.a = (VkComponentSwizzle)p_view.swizzle_a; - if (context->is_device_extension_enabled(VK_KHR_MAINTENANCE_2_EXTENSION_NAME)) { + if (enabled_device_extension_names.has(VK_KHR_MAINTENANCE_2_EXTENSION_NAME)) { // May need to make VK_KHR_maintenance2 mandatory and thus has Vulkan 1.1 be our minimum supported version // if we require setting this information. Vulkan 1.0 may simply not care. if (image_view_create_info.format != owner_tex_info->vk_view_create_info.format) { @@ -684,7 +1569,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or // Certain features may not be available for the format of the view. { VkFormatProperties properties = {}; - vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), RD_TO_VK_FORMAT[p_view.format], &properties); + vkGetPhysicalDeviceFormatProperties(physical_device, RD_TO_VK_FORMAT[p_view.format], &properties); const VkFormatFeatureFlags &supported_flags = owner_tex_info->vk_create_info.tiling == VK_IMAGE_TILING_LINEAR ? properties.linearTilingFeatures : properties.optimalTilingFeatures; if ((usage_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !(supported_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { usage_info->usage &= ~VK_IMAGE_USAGE_STORAGE_BIT; @@ -710,6 +1595,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or tex_info->vk_view_create_info = image_view_create_info; tex_info->allocation = {}; +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image))); +#endif + return TextureID(tex_info); } @@ -733,6 +1622,9 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex case TEXTURE_SLICE_2D_ARRAY: { image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; } break; + default: { + return TextureID(nullptr); + } } image_view_create_info.format = RD_TO_VK_FORMAT[p_view.format]; image_view_create_info.components.r = (VkComponentSwizzle)p_view.swizzle_r; @@ -756,6 +1648,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex tex_info->vk_view_create_info = image_view_create_info; tex_info->allocation = {}; +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX (%d %d %d %d)", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image), p_mipmap, p_mipmaps, p_layer, p_layers)); +#endif + return TextureID(tex_info); } @@ -805,8 +1701,11 @@ void RenderingDeviceDriverVulkan::texture_get_copyable_layout(TextureID p_textur h = MAX(1u, h >> 1); d = MAX(1u, d >> 1); } - r_layout->size = get_image_format_required_size(tex_info->rd_format, w, h, d, 1); - r_layout->row_pitch = r_layout->size / (h * d); + uint32_t bw = 0, bh = 0; + get_compressed_image_format_block_dimensions(tex_info->rd_format, bw, bh); + uint32_t sbw = 0, sbh = 0; + r_layout->size = get_image_format_required_size(tex_info->rd_format, w, h, d, 1, &sbw, &sbh); + r_layout->row_pitch = r_layout->size / ((sbh / bh) * d); r_layout->depth_pitch = r_layout->size / d; r_layout->layer_pitch = r_layout->size / tex_info->vk_create_info.arrayLayers; } @@ -844,7 +1743,7 @@ void RenderingDeviceDriverVulkan::texture_unmap(TextureID p_texture) { BitField<RDD::TextureUsageBits> RenderingDeviceDriverVulkan::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { VkFormatProperties properties = {}; - vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), RD_TO_VK_FORMAT[p_format], &properties); + vkGetPhysicalDeviceFormatProperties(physical_device, RD_TO_VK_FORMAT[p_format], &properties); const VkFormatFeatureFlags &flags = p_cpu_readable ? properties.linearTilingFeatures : properties.optimalTilingFeatures; @@ -905,7 +1804,7 @@ RDD::SamplerID RenderingDeviceDriverVulkan::sampler_create(const SamplerState &p sampler_create_info.addressModeV = (VkSamplerAddressMode)p_state.repeat_v; sampler_create_info.addressModeW = (VkSamplerAddressMode)p_state.repeat_w; sampler_create_info.mipLodBias = p_state.lod_bias; - sampler_create_info.anisotropyEnable = p_state.use_anisotropy && context->get_physical_device_features().samplerAnisotropy; + sampler_create_info.anisotropyEnable = p_state.use_anisotropy && (physical_device_features.samplerAnisotropy == VK_TRUE); sampler_create_info.maxAnisotropy = p_state.anisotropy_max; sampler_create_info.compareEnable = p_state.enable_compare; sampler_create_info.compareOp = (VkCompareOp)p_state.compare_op; @@ -927,12 +1826,12 @@ void RenderingDeviceDriverVulkan::sampler_free(SamplerID p_sampler) { bool RenderingDeviceDriverVulkan::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) { switch (p_filter) { - case RD::SAMPLER_FILTER_NEAREST: { + case SAMPLER_FILTER_NEAREST: { return true; } - case RD::SAMPLER_FILTER_LINEAR: { + case SAMPLER_FILTER_LINEAR: { VkFormatProperties properties = {}; - vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), RD_TO_VK_FORMAT[p_format], &properties); + vkGetPhysicalDeviceFormatProperties(physical_device, RD_TO_VK_FORMAT[p_format], &properties); return (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT); } } @@ -1065,6 +1964,23 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vk_image_barriers[i].subresourceRange.layerCount = p_texture_barriers[i].subresources.layer_count; } +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d", p_memory_barriers.size(), p_buffer_barriers.size(), p_texture_barriers.size())); + for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { + print_line(vformat(" VkMemoryBarrier #%d src 0x%uX dst 0x%uX", i, vk_memory_barriers[i].srcAccessMask, vk_memory_barriers[i].dstAccessMask)); + } + + for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) { + print_line(vformat(" VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX buffer 0x%ux", i, vk_buffer_barriers[i].srcAccessMask, vk_buffer_barriers[i].dstAccessMask, uint64_t(vk_buffer_barriers[i].buffer))); + } + + for (uint32_t i = 0; i < p_texture_barriers.size(); i++) { + print_line(vformat(" VkImageMemoryBarrier #%d src 0x%uX dst 0x%uX image 0x%ux old %d new %d (%d %d %d %d)", i, vk_image_barriers[i].srcAccessMask, vk_image_barriers[i].dstAccessMask, + uint64_t(vk_image_barriers[i].image), vk_image_barriers[i].oldLayout, vk_image_barriers[i].newLayout, vk_image_barriers[i].subresourceRange.baseMipLevel, vk_image_barriers[i].subresourceRange.levelCount, + vk_image_barriers[i].subresourceRange.baseArrayLayer, vk_image_barriers[i].subresourceRange.layerCount)); + } +#endif + vkCmdPipelineBarrier( (VkCommandBuffer)p_cmd_buffer.id, (VkPipelineStageFlags)p_src_stages, @@ -1075,76 +1991,365 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( p_texture_barriers.size(), vk_image_barriers); } -/*************************/ -/**** COMMAND BUFFERS ****/ -/*************************/ +/****************/ +/**** FENCES ****/ +/****************/ + +RDD::FenceID RenderingDeviceDriverVulkan::fence_create() { + VkFence vk_fence = VK_NULL_HANDLE; + VkFenceCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VkResult err = vkCreateFence(vk_device, &create_info, nullptr, &vk_fence); + ERR_FAIL_COND_V(err != VK_SUCCESS, FenceID()); + + Fence *fence = memnew(Fence); + fence->vk_fence = vk_fence; + fence->queue_signaled_from = nullptr; + return FenceID(fence); +} + +Error RenderingDeviceDriverVulkan::fence_wait(FenceID p_fence) { + Fence *fence = (Fence *)(p_fence.id); + VkResult err = vkWaitForFences(vk_device, 1, &fence->vk_fence, VK_TRUE, UINT64_MAX); + ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); + + err = vkResetFences(vk_device, 1, &fence->vk_fence); + ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); + + if (fence->queue_signaled_from != nullptr) { + // Release all semaphores that the command queue associated to the fence waited on the last time it was submitted. + LocalVector<Pair<Fence *, uint32_t>> &pairs = fence->queue_signaled_from->image_semaphores_for_fences; + uint32_t i = 0; + while (i < pairs.size()) { + if (pairs[i].first == fence) { + _release_image_semaphore(fence->queue_signaled_from, pairs[i].second, true); + fence->queue_signaled_from->free_image_semaphores.push_back(pairs[i].second); + pairs.remove_at(i); + } else { + i++; + } + } + + fence->queue_signaled_from = nullptr; + } + + return OK; +} + +void RenderingDeviceDriverVulkan::fence_free(FenceID p_fence) { + Fence *fence = (Fence *)(p_fence.id); + vkDestroyFence(vk_device, fence->vk_fence, nullptr); + memdelete(fence); +} + +/********************/ +/**** SEMAPHORES ****/ +/********************/ + +RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create() { + VkSemaphore semaphore = VK_NULL_HANDLE; + VkSemaphoreCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + VkResult err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + ERR_FAIL_COND_V(err != VK_SUCCESS, SemaphoreID()); + + return SemaphoreID(semaphore); +} + +void RenderingDeviceDriverVulkan::semaphore_free(SemaphoreID p_semaphore) { + vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), nullptr); +} + +/******************/ +/**** COMMANDS ****/ +/******************/ + +// ----- QUEUE FAMILY ----- + +RDD::CommandQueueFamilyID RenderingDeviceDriverVulkan::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) { + // Pick the queue with the least amount of bits that can fulfill the requirements. + VkQueueFlags picked_queue_flags = VK_QUEUE_FLAG_BITS_MAX_ENUM; + uint32_t picked_family_index = UINT_MAX; + for (uint32_t i = 0; i < queue_family_properties.size(); i++) { + if (queue_families[i].is_empty()) { + // Ignore empty queue families. + continue; + } + + if (p_surface != 0 && !context_driver->queue_family_supports_present(physical_device, i, p_surface)) { + // Present is not an actual bit but something that must be queried manually. + continue; + } + + // Preferring a queue with less bits will get us closer to getting a queue that performs better for our requirements. + // For example, dedicated compute and transfer queues are usually indicated as such. + const VkQueueFlags option_queue_flags = queue_family_properties[i].queueFlags; + const bool includes_all_bits = (option_queue_flags & p_cmd_queue_family_bits) == p_cmd_queue_family_bits; + const bool prefer_less_bits = option_queue_flags < picked_queue_flags; + if (includes_all_bits && prefer_less_bits) { + picked_family_index = i; + picked_queue_flags = option_queue_flags; + } + } + + ERR_FAIL_COND_V_MSG(picked_family_index >= queue_family_properties.size(), CommandQueueFamilyID(), "A queue family with the requested bits could not be found."); + + // Since 0 is a valid index and we use 0 as the error case, we make the index start from 1 instead. + return CommandQueueFamilyID(picked_family_index + 1); +} + +// ----- QUEUE ----- + +RDD::CommandQueueID RenderingDeviceDriverVulkan::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) { + DEV_ASSERT(p_cmd_queue_family.id != 0); + + // Make a virtual queue on top of a real queue. Use the queue from the family with the least amount of virtual queues created. + uint32_t family_index = p_cmd_queue_family.id - 1; + TightLocalVector<Queue> &queue_family = queue_families[family_index]; + uint32_t picked_queue_index = UINT_MAX; + uint32_t picked_virtual_count = UINT_MAX; + for (uint32_t i = 0; i < queue_family.size(); i++) { + if (queue_family[i].virtual_count < picked_virtual_count) { + picked_queue_index = i; + picked_virtual_count = queue_family[i].virtual_count; + } + } + + ERR_FAIL_COND_V_MSG(picked_queue_index >= queue_family.size(), CommandQueueID(), "A queue in the picked family could not be found."); + + // Create the virtual queue. + CommandQueue *command_queue = memnew(CommandQueue); + command_queue->queue_family = family_index; + command_queue->queue_index = picked_queue_index; + queue_family[picked_queue_index].virtual_count++; + + // If is was identified as the main queue and a hook is active, indicate it as such to the hook. + if (p_identify_as_main_queue && (VulkanHooks::get_singleton() != nullptr)) { + VulkanHooks::get_singleton()->set_direct_queue_family_and_index(family_index, picked_queue_index); + } + + return CommandQueueID(command_queue); +} + +Error RenderingDeviceDriverVulkan::command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) { + DEV_ASSERT(p_cmd_queue.id != 0); + + CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); + Queue &device_queue = queue_families[command_queue->queue_family][command_queue->queue_index]; + Fence *fence = (Fence *)(p_signal_fence.id); + VkFence vk_fence = (fence != nullptr) ? fence->vk_fence : VK_NULL_HANDLE; + + thread_local LocalVector<VkCommandBuffer> command_buffers; + thread_local LocalVector<VkSemaphore> wait_semaphores; + thread_local LocalVector<VkSemaphore> signal_semaphores; + thread_local LocalVector<VkPipelineStageFlags> wait_semaphores_stages; + command_buffers.clear(); + wait_semaphores.clear(); + signal_semaphores.clear(); + wait_semaphores_stages.clear(); + + if (!command_queue->pending_semaphores_for_execute.is_empty()) { + for (uint32_t i = 0; i < command_queue->pending_semaphores_for_execute.size(); i++) { + VkSemaphore wait_semaphore = command_queue->image_semaphores[command_queue->pending_semaphores_for_execute[i]]; + wait_semaphores.push_back(wait_semaphore); + wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + } + + command_queue->pending_semaphores_for_execute.clear(); + } + + for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { + command_buffers.push_back(VkCommandBuffer(p_cmd_buffers[i].id)); + } + + for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { + // FIXME: Allow specifying the stage mask in more detail. + wait_semaphores.push_back(VkSemaphore(p_wait_semaphores[i].id)); + wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + } + + for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) { + signal_semaphores.push_back(VkSemaphore(p_signal_semaphores[i].id)); + } + + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.waitSemaphoreCount = wait_semaphores.size(); + submit_info.pWaitSemaphores = wait_semaphores.ptr(); + submit_info.pWaitDstStageMask = wait_semaphores_stages.ptr(); + submit_info.commandBufferCount = command_buffers.size(); + submit_info.pCommandBuffers = command_buffers.ptr(); + submit_info.signalSemaphoreCount = signal_semaphores.size(); + submit_info.pSignalSemaphores = signal_semaphores.ptr(); + + device_queue.submit_mutex.lock(); + VkResult err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence); + device_queue.submit_mutex.unlock(); + ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); + + if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) { + fence->queue_signaled_from = command_queue; + + // Indicate to the fence that it should release the semaphores that were waited on this submission the next time the fence is waited on. + for (uint32_t i = 0; i < command_queue->pending_semaphores_for_fence.size(); i++) { + command_queue->image_semaphores_for_fences.push_back({ fence, command_queue->pending_semaphores_for_fence[i] }); + } + + command_queue->pending_semaphores_for_fence.clear(); + } + + return OK; +} + +Error RenderingDeviceDriverVulkan::command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) { + DEV_ASSERT(p_cmd_queue.id != 0); + + CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); + Queue &device_queue = queue_families[command_queue->queue_family][command_queue->queue_index]; + + thread_local LocalVector<VkSwapchainKHR> swapchains; + thread_local LocalVector<uint32_t> image_indices; + thread_local LocalVector<VkSemaphore> wait_semaphores; + thread_local LocalVector<VkResult> results; + swapchains.clear(); + image_indices.clear(); + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + swapchains.push_back(swap_chain->vk_swapchain); + DEV_ASSERT(swap_chain->image_index < swap_chain->images.size()); + image_indices.push_back(swap_chain->image_index); + } + + wait_semaphores.clear(); + for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { + wait_semaphores.push_back(VkSemaphore(p_wait_semaphores[i].id)); + } + + results.resize(swapchains.size()); + + VkPresentInfoKHR present_info = {}; + present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present_info.waitSemaphoreCount = wait_semaphores.size(); + present_info.pWaitSemaphores = wait_semaphores.ptr(); + present_info.swapchainCount = swapchains.size(); + present_info.pSwapchains = swapchains.ptr(); + present_info.pImageIndices = image_indices.ptr(); + present_info.pResults = results.ptr(); + device_queue.submit_mutex.lock(); + VkResult err = device_functions.QueuePresentKHR(device_queue.queue, &present_info); + device_queue.submit_mutex.unlock(); + + // Set the index to an invalid value. If any of the swap chains returned out of date, indicate it should be resized the next time it's acquired. + bool any_result_is_out_of_date = false; + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + swap_chain->image_index = UINT_MAX; + if (results[i] == VK_ERROR_OUT_OF_DATE_KHR) { + context_driver->surface_set_needs_resize(swap_chain->surface, true); + any_result_is_out_of_date = true; + } + } + + if (any_result_is_out_of_date || err == VK_ERROR_OUT_OF_DATE_KHR) { + // It is possible for presentation to fail with out of date while acquire might've succeeded previously. This case + // will be considered a silent failure as it can be triggered easily by resizing a window in the OS natively. + return FAILED; + } + + // Handling VK_SUBOPTIMAL_KHR the same as VK_SUCCESS is completely intentional. + // + // Godot does not currently support native rotation in Android when creating the swap chain. It intentionally uses + // VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR instead of the current transform bits available in the surface capabilities. + // Choosing the transform that leads to optimal presentation leads to distortion that makes the application unusable, + // as the rotation of all the content is not handled at the moment. + // + // VK_SUBOPTIMAL_KHR is accepted as a successful case even if it's not the most efficient solution to work around this + // problem. This behavior should not be changed unless the swap chain recreation uses the current transform bits, as + // it'll lead to very low performance in Android by entering an endless loop where it'll always resize the swap chain + // every frame. + + ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR, FAILED); + + return OK; +} + +void RenderingDeviceDriverVulkan::command_queue_free(CommandQueueID p_cmd_queue) { + DEV_ASSERT(p_cmd_queue); + + CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); + + // Erase all the semaphores used for image acquisition. + for (VkSemaphore semaphore : command_queue->image_semaphores) { + vkDestroySemaphore(vk_device, semaphore, nullptr); + } + + // Retrieve the queue family corresponding to the virtual queue. + DEV_ASSERT(command_queue->queue_family < queue_families.size()); + TightLocalVector<Queue> &queue_family = queue_families[command_queue->queue_family]; + + // Decrease the virtual queue count. + DEV_ASSERT(command_queue->queue_index < queue_family.size()); + DEV_ASSERT(queue_family[command_queue->queue_index].virtual_count > 0); + queue_family[command_queue->queue_index].virtual_count--; + + // Destroy the virtual queue structure. + memdelete(command_queue); +} // ----- POOL ----- -RDD::CommandPoolID RenderingDeviceDriverVulkan::command_pool_create(CommandBufferType p_cmd_buffer_type) { +RDD::CommandPoolID RenderingDeviceDriverVulkan::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) { + DEV_ASSERT(p_cmd_queue_family.id != 0); + + uint32_t family_index = p_cmd_queue_family.id - 1; VkCommandPoolCreateInfo cmd_pool_info = {}; cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmd_pool_info.queueFamilyIndex = context->get_graphics_queue_family_index(); + cmd_pool_info.queueFamilyIndex = family_index; cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - VkCommandPool vk_cmd_pool = VK_NULL_HANDLE; - VkResult res = vkCreateCommandPool(vk_device, &cmd_pool_info, nullptr, &vk_cmd_pool); + VkCommandPool vk_command_pool = VK_NULL_HANDLE; + VkResult res = vkCreateCommandPool(vk_device, &cmd_pool_info, nullptr, &vk_command_pool); ERR_FAIL_COND_V_MSG(res, CommandPoolID(), "vkCreateCommandPool failed with error " + itos(res) + "."); -#ifdef DEBUG_ENABLED - if (p_cmd_buffer_type == COMMAND_BUFFER_TYPE_SECONDARY) { - secondary_cmd_pools.insert(CommandPoolID(vk_cmd_pool)); - } -#endif - - return CommandPoolID(vk_cmd_pool); + CommandPool *command_pool = memnew(CommandPool); + command_pool->vk_command_pool = vk_command_pool; + command_pool->buffer_type = p_cmd_buffer_type; + return CommandPoolID(command_pool); } void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) { - vkDestroyCommandPool(vk_device, (VkCommandPool)p_cmd_pool.id, nullptr); + DEV_ASSERT(p_cmd_pool); -#ifdef DEBUG_ENABLED - secondary_cmd_pools.erase(p_cmd_pool); -#endif + CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id); + vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, nullptr); + memdelete(command_pool); } // ----- BUFFER ----- -RDD::CommandBufferID RenderingDeviceDriverVulkan::command_buffer_create(CommandBufferType p_cmd_buffer_type, CommandPoolID p_cmd_pool) { -#ifdef DEBUG_ENABLED - if (p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY) { - ERR_FAIL_COND_V(secondary_cmd_pools.has(p_cmd_pool), CommandBufferID()); - } else { - ERR_FAIL_COND_V(!secondary_cmd_pools.has(p_cmd_pool), CommandBufferID()); - } -#endif +RDD::CommandBufferID RenderingDeviceDriverVulkan::command_buffer_create(CommandPoolID p_cmd_pool) { + DEV_ASSERT(p_cmd_pool); + const CommandPool *command_pool = (const CommandPool *)(p_cmd_pool.id); VkCommandBufferAllocateInfo cmd_buf_info = {}; cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cmd_buf_info.commandPool = (VkCommandPool)p_cmd_pool.id; - cmd_buf_info.level = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_COMMAND_BUFFER_LEVEL_PRIMARY : VK_COMMAND_BUFFER_LEVEL_SECONDARY; + cmd_buf_info.commandPool = command_pool->vk_command_pool; cmd_buf_info.commandBufferCount = 1; + if (command_pool->buffer_type == COMMAND_BUFFER_TYPE_SECONDARY) { + cmd_buf_info.level = VK_COMMAND_BUFFER_LEVEL_SECONDARY; + } else { + cmd_buf_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + } + VkCommandBuffer vk_cmd_buffer = VK_NULL_HANDLE; VkResult err = vkAllocateCommandBuffers(vk_device, &cmd_buf_info, &vk_cmd_buffer); ERR_FAIL_COND_V_MSG(err, CommandBufferID(), "vkAllocateCommandBuffers failed with error " + itos(err) + "."); - CommandBufferID cmd_buffer_id = CommandBufferID(vk_cmd_buffer); -#ifdef DEBUG_ENABLED - // Erase first because Vulkan may reuse a handle. - secondary_cmd_buffers.erase(cmd_buffer_id); - if (p_cmd_buffer_type == COMMAND_BUFFER_TYPE_SECONDARY) { - secondary_cmd_buffers.insert(cmd_buffer_id); - } -#endif - return cmd_buffer_id; + return CommandBufferID(vk_cmd_buffer); } bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buffer) { -#ifdef DEBUG_ENABLED - ERR_FAIL_COND_V(secondary_cmd_buffers.has(p_cmd_buffer), false); -#endif - // Reset is implicit (VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT). VkCommandBufferBeginInfo cmd_buf_begin_info = {}; @@ -1158,10 +2363,6 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buf } bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) { -#ifdef DEBUG_ENABLED - ERR_FAIL_COND_V(!secondary_cmd_buffers.has(p_cmd_buffer), false); -#endif - // Reset is implicit (VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT). VkCommandBufferInheritanceInfo inheritance_info = {}; @@ -1172,7 +2373,7 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID VkCommandBufferBeginInfo cmd_buf_begin_info = {}; cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; cmd_buf_begin_info.pInheritanceInfo = &inheritance_info; VkResult err = vkBeginCommandBuffer((VkCommandBuffer)p_cmd_buffer.id, &cmd_buf_begin_info); @@ -1186,14 +2387,407 @@ void RenderingDeviceDriverVulkan::command_buffer_end(CommandBufferID p_cmd_buffe } void RenderingDeviceDriverVulkan::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) { -#ifdef DEBUG_ENABLED - ERR_FAIL_COND(secondary_cmd_buffers.has(p_cmd_buffer)); - for (uint32_t i = 0; i < p_secondary_cmd_buffers.size(); i++) { - ERR_FAIL_COND(!secondary_cmd_buffers.has(p_secondary_cmd_buffers[i])); + vkCmdExecuteCommands((VkCommandBuffer)p_cmd_buffer.id, p_secondary_cmd_buffers.size(), (const VkCommandBuffer *)p_secondary_cmd_buffers.ptr()); +} + +/********************/ +/**** SWAP CHAIN ****/ +/********************/ + +void RenderingDeviceDriverVulkan::_swap_chain_release(SwapChain *swap_chain) { + // Destroy views and framebuffers associated to the swapchain's images. + for (FramebufferID framebuffer : swap_chain->framebuffers) { + framebuffer_free(framebuffer); } -#endif - vkCmdExecuteCommands((VkCommandBuffer)p_cmd_buffer.id, p_secondary_cmd_buffers.size(), (const VkCommandBuffer *)p_secondary_cmd_buffers.ptr()); + for (VkImageView view : swap_chain->image_views) { + vkDestroyImageView(vk_device, view, nullptr); + } + + swap_chain->image_index = UINT_MAX; + swap_chain->images.clear(); + swap_chain->image_views.clear(); + swap_chain->framebuffers.clear(); + + if (swap_chain->vk_swapchain != VK_NULL_HANDLE) { + device_functions.DestroySwapchainKHR(vk_device, swap_chain->vk_swapchain, nullptr); + swap_chain->vk_swapchain = VK_NULL_HANDLE; + } + + for (uint32_t i = 0; i < swap_chain->command_queues_acquired.size(); i++) { + _recreate_image_semaphore(swap_chain->command_queues_acquired[i], swap_chain->command_queues_acquired_semaphores[i], false); + } + + swap_chain->command_queues_acquired.clear(); + swap_chain->command_queues_acquired_semaphores.clear(); +} + +RenderingDeviceDriver::SwapChainID RenderingDeviceDriverVulkan::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) { + DEV_ASSERT(p_surface != 0); + + RenderingContextDriverVulkan::Surface *surface = (RenderingContextDriverVulkan::Surface *)(p_surface); + const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + + // Retrieve the formats supported by the surface. + uint32_t format_count = 0; + VkResult err = functions.GetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface->vk_surface, &format_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID()); + + TightLocalVector<VkSurfaceFormatKHR> formats; + formats.resize(format_count); + err = functions.GetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface->vk_surface, &format_count, formats.ptr()); + ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID()); + + VkFormat format = VK_FORMAT_UNDEFINED; + VkColorSpaceKHR color_space = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; + if (format_count == 1 && formats[0].format == VK_FORMAT_UNDEFINED) { + // If the format list includes just one entry of VK_FORMAT_UNDEFINED, the surface has no preferred format. + format = VK_FORMAT_B8G8R8A8_UNORM; + color_space = formats[0].colorSpace; + } else if (format_count > 0) { + // Use one of the supported formats, prefer B8G8R8A8_UNORM. + const VkFormat preferred_format = VK_FORMAT_B8G8R8A8_UNORM; + const VkFormat second_format = VK_FORMAT_R8G8B8A8_UNORM; + for (uint32_t i = 0; i < format_count; i++) { + if (formats[i].format == preferred_format || formats[i].format == second_format) { + format = formats[i].format; + if (formats[i].format == preferred_format) { + // This is the preferred format, stop searching. + break; + } + } + } + } + + // No formats are supported. + ERR_FAIL_COND_V_MSG(format == VK_FORMAT_UNDEFINED, SwapChainID(), "Surface did not return any valid formats."); + + // Create the render pass for the chosen format. + VkAttachmentDescription2KHR attachment = {}; + attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR; + attachment.format = format; + attachment.samples = VK_SAMPLE_COUNT_1_BIT; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + VkAttachmentReference2KHR color_reference = {}; + color_reference.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; + color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkSubpassDescription2KHR subpass = {}; + subpass.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &color_reference; + + VkRenderPassCreateInfo2KHR pass_info = {}; + pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR; + pass_info.attachmentCount = 1; + pass_info.pAttachments = &attachment; + pass_info.subpassCount = 1; + pass_info.pSubpasses = &subpass; + + VkRenderPass render_pass = VK_NULL_HANDLE; + err = _create_render_pass(vk_device, &pass_info, nullptr, &render_pass); + ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID()); + + SwapChain *swap_chain = memnew(SwapChain); + swap_chain->surface = p_surface; + swap_chain->format = format; + swap_chain->color_space = color_space; + swap_chain->render_pass = RenderPassID(render_pass); + return SwapChainID(swap_chain); +} + +Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) { + DEV_ASSERT(p_cmd_queue.id != 0); + DEV_ASSERT(p_swap_chain.id != 0); + + CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + + // Release all current contents of the swap chain. + _swap_chain_release(swap_chain); + + // Validate if the command queue being used supports creating the swap chain for this surface. + const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + if (!context_driver->queue_family_supports_present(physical_device, command_queue->queue_family, swap_chain->surface)) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Surface is not supported by device. Did the GPU go offline? Was the window created on another monitor? Check" + "previous errors & try launching with --gpu-validation."); + } + + // Retrieve the surface's capabilities. + RenderingContextDriverVulkan::Surface *surface = (RenderingContextDriverVulkan::Surface *)(swap_chain->surface); + VkSurfaceCapabilitiesKHR surface_capabilities = {}; + VkResult err = functions.GetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface->vk_surface, &surface_capabilities); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + VkExtent2D extent; + if (surface_capabilities.currentExtent.width == 0xFFFFFFFF) { + // The current extent is currently undefined, so the current surface width and height will be clamped to the surface's capabilities. + extent.width = CLAMP(surface->width, surface_capabilities.minImageExtent.width, surface_capabilities.maxImageExtent.width); + extent.height = CLAMP(surface->height, surface_capabilities.minImageExtent.height, surface_capabilities.maxImageExtent.height); + } else { + // Grab the dimensions from the current extent. + extent = surface_capabilities.currentExtent; + surface->width = extent.width; + surface->height = extent.height; + } + + if (surface->width == 0 || surface->height == 0) { + // The surface doesn't have valid dimensions, so we can't create a swap chain. + return ERR_SKIP; + } + + // Find what present modes are supported. + TightLocalVector<VkPresentModeKHR> present_modes; + uint32_t present_modes_count = 0; + err = functions.GetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface->vk_surface, &present_modes_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + present_modes.resize(present_modes_count); + err = functions.GetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface->vk_surface, &present_modes_count, present_modes.ptr()); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + // Choose the present mode based on the display server setting. + VkPresentModeKHR present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR; + String present_mode_name = "Enabled"; + switch (surface->vsync_mode) { + case DisplayServer::VSYNC_MAILBOX: + present_mode = VK_PRESENT_MODE_MAILBOX_KHR; + present_mode_name = "Mailbox"; + break; + case DisplayServer::VSYNC_ADAPTIVE: + present_mode = VK_PRESENT_MODE_FIFO_RELAXED_KHR; + present_mode_name = "Adaptive"; + break; + case DisplayServer::VSYNC_ENABLED: + present_mode = VK_PRESENT_MODE_FIFO_KHR; + present_mode_name = "Enabled"; + break; + case DisplayServer::VSYNC_DISABLED: + present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + present_mode_name = "Disabled"; + break; + } + + bool present_mode_available = present_modes.find(present_mode) >= 0; + if (present_mode_available) { + print_verbose("Using present mode: " + present_mode_name); + } else { + // Present mode is not available, fall back to FIFO which is guaranteed to be supported. + WARN_PRINT(vformat("The requested V-Sync mode %s is not available. Falling back to V-Sync mode Enabled.", present_mode_name)); + surface->vsync_mode = DisplayServer::VSYNC_ENABLED; + present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR; + } + + // Clamp the desired image count to the surface's capabilities. + uint32_t desired_swapchain_images = MAX(p_desired_framebuffer_count, surface_capabilities.minImageCount); + if (surface_capabilities.maxImageCount > 0) { + // Only clamp to the max image count if it's defined. A max image count of 0 means there's no upper limit to the amount of images. + desired_swapchain_images = MIN(desired_swapchain_images, surface_capabilities.maxImageCount); + } + + // Prefer identity transform if it's supported, use the current transform otherwise. + // This behavior is intended as Godot does not supported native rotation in platforms that use these bits. + // Refer to the comment in command_queue_present() for more details. + VkSurfaceTransformFlagBitsKHR surface_transform_bits; + if (surface_capabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) { + surface_transform_bits = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + } else { + surface_transform_bits = surface_capabilities.currentTransform; + } + + VkCompositeAlphaFlagBitsKHR composite_alpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + if (OS::get_singleton()->is_layered_allowed() || !(surface_capabilities.supportedCompositeAlpha & composite_alpha)) { + // Find a supported composite alpha mode - one of these is guaranteed to be set. + VkCompositeAlphaFlagBitsKHR composite_alpha_flags[4] = { + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(composite_alpha_flags); i++) { + if (surface_capabilities.supportedCompositeAlpha & composite_alpha_flags[i]) { + composite_alpha = composite_alpha_flags[i]; + break; + } + } + } + + VkSwapchainCreateInfoKHR swap_create_info = {}; + swap_create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + swap_create_info.surface = surface->vk_surface; + swap_create_info.minImageCount = desired_swapchain_images; + swap_create_info.imageFormat = swap_chain->format; + swap_create_info.imageColorSpace = swap_chain->color_space; + swap_create_info.imageExtent = extent; + swap_create_info.imageArrayLayers = 1; + swap_create_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + swap_create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swap_create_info.preTransform = surface_transform_bits; + swap_create_info.compositeAlpha = composite_alpha; + swap_create_info.presentMode = present_mode; + swap_create_info.clipped = true; + err = device_functions.CreateSwapchainKHR(vk_device, &swap_create_info, nullptr, &swap_chain->vk_swapchain); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + uint32_t image_count = 0; + err = device_functions.GetSwapchainImagesKHR(vk_device, swap_chain->vk_swapchain, &image_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + swap_chain->images.resize(image_count); + err = device_functions.GetSwapchainImagesKHR(vk_device, swap_chain->vk_swapchain, &image_count, swap_chain->images.ptr()); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + VkImageViewCreateInfo view_create_info = {}; + view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_create_info.format = swap_chain->format; + view_create_info.components.r = VK_COMPONENT_SWIZZLE_R; + view_create_info.components.g = VK_COMPONENT_SWIZZLE_G; + view_create_info.components.b = VK_COMPONENT_SWIZZLE_B; + view_create_info.components.a = VK_COMPONENT_SWIZZLE_A; + view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view_create_info.subresourceRange.levelCount = 1; + view_create_info.subresourceRange.layerCount = 1; + + swap_chain->image_views.reserve(image_count); + + VkImageView image_view; + for (uint32_t i = 0; i < image_count; i++) { + view_create_info.image = swap_chain->images[i]; + err = vkCreateImageView(vk_device, &view_create_info, nullptr, &image_view); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + swap_chain->image_views.push_back(image_view); + } + + swap_chain->framebuffers.reserve(image_count); + + VkFramebufferCreateInfo fb_create_info = {}; + fb_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fb_create_info.renderPass = VkRenderPass(swap_chain->render_pass.id); + fb_create_info.attachmentCount = 1; + fb_create_info.width = surface->width; + fb_create_info.height = surface->height; + fb_create_info.layers = 1; + + VkFramebuffer framebuffer; + for (uint32_t i = 0; i < image_count; i++) { + fb_create_info.pAttachments = &swap_chain->image_views[i]; + err = vkCreateFramebuffer(vk_device, &fb_create_info, nullptr, &framebuffer); + ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + + swap_chain->framebuffers.push_back(RDD::FramebufferID(framebuffer)); + } + + // Once everything's been created correctly, indicate the surface no longer needs to be resized. + context_driver->surface_set_needs_resize(swap_chain->surface, false); + + return OK; +} + +RDD::FramebufferID RenderingDeviceDriverVulkan::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) { + DEV_ASSERT(p_cmd_queue); + DEV_ASSERT(p_swap_chain); + + CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + if ((swap_chain->vk_swapchain == VK_NULL_HANDLE) || context_driver->surface_get_needs_resize(swap_chain->surface)) { + // The surface does not have a valid swap chain or it indicates it requires a resize. + r_resize_required = true; + return FramebufferID(); + } + + VkResult err; + VkSemaphore semaphore = VK_NULL_HANDLE; + uint32_t semaphore_index = 0; + if (command_queue->free_image_semaphores.is_empty()) { + // Add a new semaphore if none are free. + VkSemaphoreCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + ERR_FAIL_COND_V(err != VK_SUCCESS, FramebufferID()); + + semaphore_index = command_queue->image_semaphores.size(); + command_queue->image_semaphores.push_back(semaphore); + command_queue->image_semaphores_swap_chains.push_back(swap_chain); + } else { + // Pick a free semaphore. + uint32_t free_index = command_queue->free_image_semaphores.size() - 1; + semaphore_index = command_queue->free_image_semaphores[free_index]; + command_queue->image_semaphores_swap_chains[semaphore_index] = swap_chain; + command_queue->free_image_semaphores.remove_at(free_index); + semaphore = command_queue->image_semaphores[semaphore_index]; + } + + // Store in the swap chain the acquired semaphore. + swap_chain->command_queues_acquired.push_back(command_queue); + swap_chain->command_queues_acquired_semaphores.push_back(semaphore_index); + + err = device_functions.AcquireNextImageKHR(vk_device, swap_chain->vk_swapchain, UINT64_MAX, semaphore, VK_NULL_HANDLE, &swap_chain->image_index); + if (err == VK_ERROR_OUT_OF_DATE_KHR) { + // Out of date leaves the semaphore in a signaled state that will never finish, so it's necessary to recreate it. + bool semaphore_recreated = _recreate_image_semaphore(command_queue, semaphore_index, true); + ERR_FAIL_COND_V(!semaphore_recreated, FramebufferID()); + + // Swap chain is out of date and must be recreated. + r_resize_required = true; + return FramebufferID(); + } else if (err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR) { + // Swap chain failed to present but the reason is unknown. + // Refer to the comment in command_queue_present() as to why VK_SUBOPTIMAL_KHR is handled the same as VK_SUCCESS. + return FramebufferID(); + } + + // Indicate the command queue should wait on these semaphores on the next submission and that it should + // indicate they're free again on the next fence. + command_queue->pending_semaphores_for_execute.push_back(semaphore_index); + command_queue->pending_semaphores_for_fence.push_back(semaphore_index); + + // Return the corresponding framebuffer to the new current image. + return swap_chain->framebuffers[swap_chain->image_index]; +} + +RDD::RenderPassID RenderingDeviceDriverVulkan::swap_chain_get_render_pass(SwapChainID p_swap_chain) { + DEV_ASSERT(p_swap_chain.id != 0); + + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + return swap_chain->render_pass; +} + +RDD::DataFormat RenderingDeviceDriverVulkan::swap_chain_get_format(SwapChainID p_swap_chain) { + DEV_ASSERT(p_swap_chain.id != 0); + + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + switch (swap_chain->format) { + case VK_FORMAT_B8G8R8A8_UNORM: + return DATA_FORMAT_B8G8R8A8_UNORM; + case VK_FORMAT_R8G8B8A8_UNORM: + return DATA_FORMAT_R8G8B8A8_UNORM; + default: + DEV_ASSERT(false && "Unknown swap chain format."); + return DATA_FORMAT_MAX; + } +} + +void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) { + DEV_ASSERT(p_swap_chain.id != 0); + + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + _swap_chain_release(swap_chain); + + if (swap_chain->render_pass.id != 0) { + vkDestroyRenderPass(vk_device, VkRenderPass(swap_chain->render_pass.id), nullptr); + } + + memdelete(swap_chain); } /*********************/ @@ -1219,6 +2813,14 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, nullptr, &vk_framebuffer); ERR_FAIL_COND_V_MSG(err, FramebufferID(), "vkCreateFramebuffer failed with error " + itos(err) + "."); +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCreateFramebuffer 0x%uX with %d attachments", uint64_t(vk_framebuffer), p_attachments.size())); + for (uint32_t i = 0; i < p_attachments.size(); i++) { + const TextureInfo *attachment_info = (const TextureInfo *)p_attachments[i].id; + print_line(vformat(" Attachment #%d: IMAGE 0x%uX VIEW 0x%uX", i, uint64_t(attachment_info->vk_view_create_info.image), uint64_t(attachment_info->vk_view))); + } +#endif + return FramebufferID(vk_framebuffer); } @@ -1248,8 +2850,8 @@ Vector<uint8_t> RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(Ve return Vector<uint8_t>(); } - ERR_FAIL_COND_V_MSG((uint32_t)shader_refl.uniform_sets.size() > limits.maxBoundDescriptorSets, Vector<uint8_t>(), - "Number of uniform sets is larger than what is supported by the hardware (" + itos(limits.maxBoundDescriptorSets) + ")."); + ERR_FAIL_COND_V_MSG((uint32_t)shader_refl.uniform_sets.size() > physical_device_properties.limits.maxBoundDescriptorSets, Vector<uint8_t>(), + "Number of uniform sets is larger than what is supported by the hardware (" + itos(physical_device_properties.limits.maxBoundDescriptorSets) + ")."); // Collect reflection data into binary data. ShaderBinary::Data binary_data; @@ -1557,11 +3159,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec read_offset += sizeof(ShaderBinary::SpecializationConstant); } - struct Stage { - ShaderStage type = SHADER_STAGE_MAX; - Vector<uint8_t> spirv; - }; - Vector<Stage> stages; + Vector<Vector<uint8_t>> stages_spirv; + stages_spirv.resize(binary_data.stage_count); + r_shader_desc.stages.resize(binary_data.stage_count); for (uint32_t i = 0; i < binary_data.stage_count; i++) { ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, ShaderID()); @@ -1587,17 +3187,14 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec src_smolv = binptr + read_offset; } - Vector<uint8_t> spirv; + Vector<uint8_t> &spirv = stages_spirv.ptrw()[i]; uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size); spirv.resize(spirv_size); if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) { ERR_FAIL_V_MSG(ShaderID(), "Malformed smolv input uncompressing shader stage:" + String(SHADER_STAGE_NAMES[stage])); } - Stage stage_entry; - stage_entry.type = ShaderStage(stage); - stage_entry.spirv = spirv; - stages.push_back(stage_entry); + r_shader_desc.stages.set(i, ShaderStage(stage)); if (buf_size % 4 != 0) { buf_size += 4 - (buf_size % 4); @@ -1614,22 +3211,22 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec String error_text; - for (int i = 0; i < stages.size(); i++) { + for (int i = 0; i < r_shader_desc.stages.size(); i++) { VkShaderModuleCreateInfo shader_module_create_info = {}; shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_module_create_info.codeSize = stages[i].spirv.size(); - shader_module_create_info.pCode = (const uint32_t *)stages[i].spirv.ptr(); + shader_module_create_info.codeSize = stages_spirv[i].size(); + shader_module_create_info.pCode = (const uint32_t *)stages_spirv[i].ptr(); VkShaderModule vk_module = VK_NULL_HANDLE; VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, nullptr, &vk_module); if (res) { - error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[stages[i].type]); + error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[r_shader_desc.stages[i]]); break; } VkPipelineShaderStageCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[stages[i].type]; + create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[r_shader_desc.stages[i]]; create_info.module = vk_module; create_info.pName = "main"; @@ -2234,12 +3831,13 @@ bool RenderingDeviceDriverVulkan::pipeline_cache_create(const Vector<uint8_t> &p { VkPipelineCacheCreateInfo cache_info = {}; cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - if (context->get_pipeline_cache_control_support()) { - cache_info.flags = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; - } cache_info.initialDataSize = pipelines_cache.buffer.size() - sizeof(PipelineCacheHeader); cache_info.pInitialData = pipelines_cache.buffer.ptr() + sizeof(PipelineCacheHeader); + if (pipeline_cache_control_support) { + cache_info.flags = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; + } + VkResult err = vkCreatePipelineCache(vk_device, &cache_info, nullptr, &pipelines_cache.vk_cache); if (err != VK_SUCCESS) { WARN_PRINT("vkCreatePipelinecache failed with error " + itos(err) + "."); @@ -2254,6 +3852,7 @@ void RenderingDeviceDriverVulkan::pipeline_cache_free() { DEV_ASSERT(pipelines_cache.vk_cache); vkDestroyPipelineCache(vk_device, pipelines_cache.vk_cache, nullptr); + pipelines_cache.vk_cache = VK_NULL_HANDLE; DEV_ASSERT(caching_instance_count > 0); caching_instance_count--; @@ -2372,7 +3971,7 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att vk_subpasses[i].pPreserveAttachments = p_subpasses[i].preserve_attachments.ptr(); // VRS. - if (context->get_vrs_capabilities().attachment_vrs_supported && p_subpasses[i].vrs_reference.attachment != AttachmentReference::UNUSED) { + if (vrs_capabilities.attachment_vrs_supported && p_subpasses[i].vrs_reference.attachment != AttachmentReference::UNUSED) { VkAttachmentReference2KHR *vk_subpass_vrs_attachment = ALLOCA_SINGLE(VkAttachmentReference2KHR); *vk_subpass_vrs_attachment = {}; vk_subpass_vrs_attachment->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; @@ -2383,8 +3982,8 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att *vk_vrs_info = {}; vk_vrs_info->sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR; vk_vrs_info->pFragmentShadingRateAttachment = vk_subpass_vrs_attachment; - vk_vrs_info->shadingRateAttachmentTexelSize.width = context->get_vrs_capabilities().texel_size.x; - vk_vrs_info->shadingRateAttachmentTexelSize.height = context->get_vrs_capabilities().texel_size.y; + vk_vrs_info->shadingRateAttachmentTexelSize.width = vrs_capabilities.texel_size.x; + vk_vrs_info->shadingRateAttachmentTexelSize.height = vrs_capabilities.texel_size.y; vk_subpasses[i].pNext = vk_vrs_info; } @@ -2414,8 +4013,8 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att create_info.pCorrelatedViewMasks = p_view_count == 1 ? nullptr : &correlation_mask; // Multiview. - if (p_view_count > 1 && !context->supports_renderpass2()) { - // This is only required when using vkCreateRenderPass. + if (p_view_count > 1 && device_functions.CreateRenderPass2KHR == nullptr) { + // This is only required when not using vkCreateRenderPass2. // We add it if vkCreateRenderPass2KHR is not supported, // resulting this in being passed to our vkCreateRenderPass fallback. @@ -2436,7 +4035,7 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att } VkRenderPass vk_render_pass = VK_NULL_HANDLE; - VkResult res = context->vkCreateRenderPass2KHR(vk_device, &create_info, nullptr, &vk_render_pass); + VkResult res = _create_render_pass(vk_device, &create_info, nullptr, &vk_render_pass); ERR_FAIL_COND_V_MSG(res, RenderPassID(), "vkCreateRenderPass2KHR failed with error " + itos(res) + "."); return RenderPassID(vk_render_pass); @@ -2466,10 +4065,18 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS; vkCmdBeginRenderPass((VkCommandBuffer)p_cmd_buffer.id, &render_pass_begin, vk_subpass_contents); + +#if PRINT_NATIVE_COMMANDS + print_line(vformat("vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX", p_render_pass.id, p_framebuffer.id)); +#endif } void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_buffer) { vkCmdEndRenderPass((VkCommandBuffer)p_cmd_buffer.id); + +#if PRINT_NATIVE_COMMANDS + print_line("vkCmdEndRenderPass"); +#endif } void RenderingDeviceDriverVulkan::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) { @@ -2690,7 +4297,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( // Tessellation. VkPipelineTessellationStateCreateInfo tessellation_create_info = {}; tessellation_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; - ERR_FAIL_COND_V(limits.maxTessellationPatchSize > 0 && (p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > limits.maxTessellationPatchSize), PipelineID()); + ERR_FAIL_COND_V(physical_device_properties.limits.maxTessellationPatchSize > 0 && (p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > physical_device_properties.limits.maxTessellationPatchSize), PipelineID()); tessellation_create_info.patchControlPoints = p_rasterization_state.patch_control_points; // Viewport. @@ -2852,7 +4459,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( void *graphics_pipeline_nextptr = nullptr; - if (context->get_vrs_capabilities().attachment_vrs_supported) { + if (vrs_capabilities.attachment_vrs_supported) { // If VRS is used, this defines how the different VRS types are combined. // combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) VRS. // combinerOps[1] decides how we use the output of combinerOps[0] and our attachment VRS. @@ -3038,7 +4645,7 @@ uint64_t RenderingDeviceDriverVulkan::timestamp_query_result_to_time(uint64_t p_ uint64_t shift_bits = 16; uint64_t h = 0, l = 0; - mult64to128(p_result, uint64_t(double(context->get_device_limits().timestampPeriod) * double(1 << shift_bits)), h, l); + mult64to128(p_result, uint64_t(double(physical_device_properties.limits.timestampPeriod) * double(1 << shift_bits)), h, l); l >>= shift_bits; l |= h << (64 - shift_bits); @@ -3054,30 +4661,37 @@ void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_ } /****************/ -/**** SCREEN ****/ +/**** LABELS ****/ /****************/ -RDD::DataFormat RenderingDeviceDriverVulkan::screen_get_format() { - // Very hacky, but not used often per frame so I guess ok. - VkFormat vk_format = context->get_screen_format(); - DataFormat format = DATA_FORMAT_MAX; - for (int i = 0; i < DATA_FORMAT_MAX; i++) { - if (vk_format == RD_TO_VK_FORMAT[i]) { - format = DataFormat(i); - break; - } - } - return format; +void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { + const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + VkDebugUtilsLabelEXT label; + label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + label.pNext = nullptr; + label.pLabelName = p_label_name; + label.color[0] = p_color[0]; + label.color[1] = p_color[1]; + label.color[2] = p_color[2]; + label.color[3] = p_color[3]; + functions.CmdBeginDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id, &label); +} + +void RenderingDeviceDriverVulkan::command_end_label(CommandBufferID p_cmd_buffer) { + const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + functions.CmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id); } /********************/ /**** SUBMISSION ****/ /********************/ -void RenderingDeviceDriverVulkan::begin_segment(CommandBufferID p_cmd_buffer, uint32_t p_frame_index, uint32_t p_frames_drawn) { +void RenderingDeviceDriverVulkan::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) { + // Per-frame segments are not required in Vulkan. } void RenderingDeviceDriverVulkan::end_segment() { + // Per-frame segments are not required in Vulkan. } /**************/ @@ -3089,33 +4703,33 @@ void RenderingDeviceDriverVulkan::set_object_name(ObjectType p_type, ID p_driver case OBJECT_TYPE_TEXTURE: { const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id; if (tex_info->allocation.handle) { - context->set_object_name(VK_OBJECT_TYPE_IMAGE, (uint64_t)tex_info->vk_view_create_info.image, p_name); + _set_object_name(VK_OBJECT_TYPE_IMAGE, (uint64_t)tex_info->vk_view_create_info.image, p_name); } - context->set_object_name(VK_OBJECT_TYPE_IMAGE_VIEW, (uint64_t)tex_info->vk_view, p_name + " View"); + _set_object_name(VK_OBJECT_TYPE_IMAGE_VIEW, (uint64_t)tex_info->vk_view, p_name + " View"); } break; case OBJECT_TYPE_SAMPLER: { - context->set_object_name(VK_OBJECT_TYPE_SAMPLER, p_driver_id.id, p_name); + _set_object_name(VK_OBJECT_TYPE_SAMPLER, p_driver_id.id, p_name); } break; case OBJECT_TYPE_BUFFER: { const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id; - context->set_object_name(VK_OBJECT_TYPE_BUFFER, (uint64_t)buf_info->vk_buffer, p_name); + _set_object_name(VK_OBJECT_TYPE_BUFFER, (uint64_t)buf_info->vk_buffer, p_name); if (buf_info->vk_view) { - context->set_object_name(VK_OBJECT_TYPE_BUFFER_VIEW, (uint64_t)buf_info->vk_view, p_name + " View"); + _set_object_name(VK_OBJECT_TYPE_BUFFER_VIEW, (uint64_t)buf_info->vk_view, p_name + " View"); } } break; case OBJECT_TYPE_SHADER: { const ShaderInfo *shader_info = (const ShaderInfo *)p_driver_id.id; for (uint32_t i = 0; i < shader_info->vk_descriptor_set_layouts.size(); i++) { - context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, (uint64_t)shader_info->vk_descriptor_set_layouts[i], p_name); + _set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, (uint64_t)shader_info->vk_descriptor_set_layouts[i], p_name); } - context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, (uint64_t)shader_info->vk_pipeline_layout, p_name + " Pipeline Layout"); + _set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, (uint64_t)shader_info->vk_pipeline_layout, p_name + " Pipeline Layout"); } break; case OBJECT_TYPE_UNIFORM_SET: { const UniformSetInfo *usi = (const UniformSetInfo *)p_driver_id.id; - context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET, (uint64_t)usi->vk_descriptor_set, p_name); + _set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET, (uint64_t)usi->vk_descriptor_set, p_name); } break; case OBJECT_TYPE_PIPELINE: { - context->set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name); + _set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name); } break; default: { DEV_ASSERT(false); @@ -3129,16 +4743,17 @@ uint64_t RenderingDeviceDriverVulkan::get_resource_native_handle(DriverResource return (uint64_t)vk_device; } case DRIVER_RESOURCE_PHYSICAL_DEVICE: { - return (uint64_t)context->get_physical_device(); + return (uint64_t)physical_device; } case DRIVER_RESOURCE_TOPMOST_OBJECT: { - return (uint64_t)context->get_instance(); + return (uint64_t)context_driver->instance_get(); } case DRIVER_RESOURCE_COMMAND_QUEUE: { - return (uint64_t)context->get_graphics_queue(); + const CommandQueue *queue_info = (const CommandQueue *)p_driver_id.id; + return (uint64_t)queue_families[queue_info->queue_family][queue_info->queue_index].queue; } case DRIVER_RESOURCE_QUEUE_FAMILY: { - return context->get_graphics_queue_family_index(); + return uint32_t(p_driver_id.id) - 1; } case DRIVER_RESOURCE_TEXTURE: { const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id; @@ -3172,6 +4787,7 @@ uint64_t RenderingDeviceDriverVulkan::get_total_memory_used() { } uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { + const VkPhysicalDeviceLimits &limits = physical_device_properties.limits; switch (p_limit) { case LIMIT_MAX_BOUND_UNIFORM_SETS: return limits.maxBoundDescriptorSets; @@ -3245,30 +4861,20 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { return limits.maxViewportDimensions[0]; case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: return limits.maxViewportDimensions[1]; - case LIMIT_SUBGROUP_SIZE: { - VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_SIZE: return subgroup_capabilities.size; - } - case LIMIT_SUBGROUP_MIN_SIZE: { - VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_MIN_SIZE: return subgroup_capabilities.min_size; - } - case LIMIT_SUBGROUP_MAX_SIZE: { - VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_MAX_SIZE: return subgroup_capabilities.max_size; - } - case LIMIT_SUBGROUP_IN_SHADERS: { - VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_IN_SHADERS: return subgroup_capabilities.supported_stages_flags_rd(); - } - case LIMIT_SUBGROUP_OPERATIONS: { - VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities(); + case LIMIT_SUBGROUP_OPERATIONS: return subgroup_capabilities.supported_operations_flags_rd(); - } case LIMIT_VRS_TEXEL_WIDTH: - return context->get_vrs_capabilities().texel_size.x; + return vrs_capabilities.texel_size.x; case LIMIT_VRS_TEXEL_HEIGHT: - return context->get_vrs_capabilities().texel_size.y; + return vrs_capabilities.texel_size.y; default: ERR_FAIL_V(0); } @@ -3277,7 +4883,7 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { uint64_t RenderingDeviceDriverVulkan::api_trait_get(ApiTrait p_trait) { switch (p_trait) { case API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT: - return (uint64_t)MAX((uint64_t)16, limits.optimalBufferCopyOffsetAlignment); + return (uint64_t)MAX((uint64_t)16, physical_device_properties.limits.optimalBufferCopyOffsetAlignment); case API_TRAIT_SHADER_CHANGE_INVALIDATION: return (uint64_t)SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE; default: @@ -3287,57 +4893,46 @@ uint64_t RenderingDeviceDriverVulkan::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: { - MultiviewCapabilities multiview_capabilies = context->get_multiview_capabilities(); - return multiview_capabilies.is_supported && multiview_capabilies.max_view_count > 1; - } break; - case SUPPORTS_FSR_HALF_FLOAT: { - return context->get_shader_capabilities().shader_float16_is_supported && context->get_physical_device_features().shaderInt16 && context->get_storage_buffer_capabilities().storage_buffer_16_bit_access_is_supported; - } break; - case SUPPORTS_ATTACHMENT_VRS: { - VulkanContext::VRSCapabilities vrs_capabilities = context->get_vrs_capabilities(); - return vrs_capabilities.attachment_vrs_supported && context->get_physical_device_features().shaderStorageImageExtendedFormats; - } break; - case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: { + case SUPPORTS_MULTIVIEW: + return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; + case SUPPORTS_FSR_HALF_FLOAT: + return shader_capabilities.shader_float16_is_supported && physical_device_features.shaderInt16 && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; + case SUPPORTS_ATTACHMENT_VRS: + return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; + case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; - } break; - default: { + default: return false; - } } } const RDD::MultiviewCapabilities &RenderingDeviceDriverVulkan::get_multiview_capabilities() { - return context->get_multiview_capabilities(); + return multiview_capabilities; } -/******************/ +String RenderingDeviceDriverVulkan::get_api_name() const { + return "Vulkan"; +} -RenderingDeviceDriverVulkan::RenderingDeviceDriverVulkan(VulkanContext *p_context, VkDevice p_vk_device) : - context(p_context), - vk_device(p_vk_device) { - VmaAllocatorCreateInfo allocator_info = {}; - allocator_info.physicalDevice = context->get_physical_device(); - allocator_info.device = vk_device; - allocator_info.instance = context->get_instance(); - VkResult err = vmaCreateAllocator(&allocator_info, &allocator); - ERR_FAIL_COND_MSG(err, "vmaCreateAllocator failed with error " + itos(err) + "."); +String RenderingDeviceDriverVulkan::get_api_version() const { + uint32_t api_version = physical_device_properties.apiVersion; + return vformat("%d.%d.%d", VK_API_VERSION_MAJOR(api_version), VK_API_VERSION_MINOR(api_version), VK_API_VERSION_PATCH(api_version)); +} - max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool"); +String RenderingDeviceDriverVulkan::get_pipeline_cache_uuid() const { + return pipeline_cache_id; +} - VkPhysicalDeviceProperties props = {}; - vkGetPhysicalDeviceProperties(context->get_physical_device(), &props); - pipelines_cache.buffer.resize(sizeof(PipelineCacheHeader)); - PipelineCacheHeader *header = (PipelineCacheHeader *)pipelines_cache.buffer.ptrw(); - *header = {}; - header->magic = 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE; - header->device_id = props.deviceID; - header->vendor_id = props.vendorID; - header->driver_version = props.driverVersion; - memcpy(header->uuid, props.pipelineCacheUUID, VK_UUID_SIZE); - header->driver_abi = sizeof(void *); +const RDD::Capabilities &RenderingDeviceDriverVulkan::get_capabilities() const { + return device_capabilities; +} + +/******************/ - limits = context->get_device_limits(); +RenderingDeviceDriverVulkan::RenderingDeviceDriverVulkan(RenderingContextDriverVulkan *p_context_driver) { + DEV_ASSERT(p_context_driver != nullptr); + + context_driver = p_context_driver; } RenderingDeviceDriverVulkan::~RenderingDeviceDriverVulkan() { @@ -3347,4 +4942,8 @@ RenderingDeviceDriverVulkan::~RenderingDeviceDriverVulkan() { small_allocs_pools.remove(E); } vmaDestroyAllocator(allocator); + + if (vk_device != VK_NULL_HANDLE) { + vkDestroyDevice(vk_device, nullptr); + } } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 6d8f6fd0e0..4abaeecd11 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -33,6 +33,7 @@ #include "core/templates/hash_map.h" #include "core/templates/paged_allocator.h" +#include "drivers/vulkan/rendering_context_driver_vulkan.h" #include "servers/rendering/rendering_device_driver.h" #ifdef DEBUG_ENABLED @@ -48,8 +49,6 @@ #include <vulkan/vulkan.h> #endif -class VulkanContext; - // Design principles: // - Vulkan structs are zero-initialized and fields not requiring a non-zero value are omitted (except in cases where expresivity reasons apply). class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { @@ -57,9 +56,99 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { /**** GENERIC ****/ /*****************/ - VulkanContext *context = nullptr; - VkDevice vk_device = VK_NULL_HANDLE; // Owned by the context. + struct CommandQueue; + struct SwapChain; + + struct Queue { + VkQueue queue = VK_NULL_HANDLE; + uint32_t virtual_count = 0; + BinaryMutex submit_mutex; + }; + + struct SubgroupCapabilities { + uint32_t size = 0; + uint32_t min_size = 0; + uint32_t max_size = 0; + VkShaderStageFlags supported_stages = 0; + VkSubgroupFeatureFlags supported_operations = 0; + VkBool32 quad_operations_in_all_stages = false; + bool size_control_is_supported = false; + + uint32_t supported_stages_flags_rd() const; + String supported_stages_desc() const; + uint32_t supported_operations_flags_rd() const; + String supported_operations_desc() const; + }; + + struct VRSCapabilities { + bool pipeline_vrs_supported = false; // We can specify our fragment rate on a pipeline level. + bool primitive_vrs_supported = false; // We can specify our fragment rate on each drawcall. + bool attachment_vrs_supported = false; // We can provide a density map attachment on our framebuffer. + + Size2i min_texel_size; + Size2i max_texel_size; + Size2i texel_size; // The texel size we'll use + }; + + struct ShaderCapabilities { + bool shader_float16_is_supported = false; + bool shader_int8_is_supported = false; + }; + + struct StorageBufferCapabilities { + bool storage_buffer_16_bit_access_is_supported = false; + bool uniform_and_storage_buffer_16_bit_access_is_supported = false; + bool storage_push_constant_16_is_supported = false; + bool storage_input_output_16 = false; + }; + + struct DeviceFunctions { + PFN_vkCreateSwapchainKHR CreateSwapchainKHR = nullptr; + PFN_vkDestroySwapchainKHR DestroySwapchainKHR = nullptr; + PFN_vkGetSwapchainImagesKHR GetSwapchainImagesKHR = nullptr; + PFN_vkAcquireNextImageKHR AcquireNextImageKHR = nullptr; + PFN_vkQueuePresentKHR QueuePresentKHR = nullptr; + PFN_vkCreateRenderPass2KHR CreateRenderPass2KHR = nullptr; + }; + + VkDevice vk_device = VK_NULL_HANDLE; + RenderingContextDriverVulkan *context_driver = nullptr; + RenderingContextDriver::Device context_device = {}; + VkPhysicalDevice physical_device = VK_NULL_HANDLE; + VkPhysicalDeviceProperties physical_device_properties = {}; + VkPhysicalDeviceFeatures physical_device_features = {}; + VkPhysicalDeviceFeatures requested_device_features = {}; + HashMap<CharString, bool> requested_device_extensions; + HashSet<CharString> enabled_device_extension_names; + TightLocalVector<TightLocalVector<Queue>> queue_families; + TightLocalVector<VkQueueFamilyProperties> queue_family_properties; + RDD::Capabilities device_capabilities; + SubgroupCapabilities subgroup_capabilities; + MultiviewCapabilities multiview_capabilities; + VRSCapabilities vrs_capabilities; + ShaderCapabilities shader_capabilities; + StorageBufferCapabilities storage_buffer_capabilities; + bool pipeline_cache_control_support = false; + DeviceFunctions device_functions; + + void _register_requested_device_extension(const CharString &p_extension_name, bool p_required); + Error _initialize_device_extensions(); + Error _check_device_features(); + Error _check_device_capabilities(); + Error _add_queue_create_info(LocalVector<VkDeviceQueueCreateInfo> &r_queue_create_info); + Error _initialize_device(const LocalVector<VkDeviceQueueCreateInfo> &p_queue_create_info); + Error _initialize_allocator(); + Error _initialize_pipeline_cache(); + VkResult _create_render_pass(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass); + bool _release_image_semaphore(CommandQueue *p_command_queue, uint32_t p_semaphore_index, bool p_release_on_swap_chain); + bool _recreate_image_semaphore(CommandQueue *p_command_queue, uint32_t p_semaphore_index, bool p_release_on_swap_chain); + void _set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name); + +public: + Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override final; + +private: /****************/ /**** MEMORY ****/ /****************/ @@ -154,32 +243,104 @@ public: VectorView<BufferBarrier> p_buffer_barriers, VectorView<TextureBarrier> p_texture_barriers) override final; - /*************************/ - /**** COMMAND BUFFERS ****/ - /*************************/ + /****************/ + /**** FENCES ****/ + /****************/ + private: -#ifdef DEBUG_ENABLED - // Vulkan doesn't need to know if the command buffers created in a pool - // will be primary or secondary, but RDD works like that, so we will enforce. + struct Fence { + VkFence vk_fence = VK_NULL_HANDLE; + CommandQueue *queue_signaled_from = nullptr; + }; - HashSet<CommandPoolID> secondary_cmd_pools; - HashSet<CommandBufferID> secondary_cmd_buffers; -#endif +public: + virtual FenceID fence_create() override final; + virtual Error fence_wait(FenceID p_fence) override final; + virtual void fence_free(FenceID p_fence) override final; + + /********************/ + /**** SEMAPHORES ****/ + /********************/ + + virtual SemaphoreID semaphore_create() override final; + virtual void semaphore_free(SemaphoreID p_semaphore) override final; + + /******************/ + /**** COMMANDS ****/ + /******************/ + + // ----- QUEUE FAMILY ----- + + virtual CommandQueueFamilyID command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface = 0) override final; + + // ----- QUEUE ----- +private: + struct CommandQueue { + LocalVector<VkSemaphore> image_semaphores; + LocalVector<SwapChain *> image_semaphores_swap_chains; + LocalVector<uint32_t> pending_semaphores_for_execute; + LocalVector<uint32_t> pending_semaphores_for_fence; + LocalVector<uint32_t> free_image_semaphores; + LocalVector<Pair<Fence *, uint32_t>> image_semaphores_for_fences; + uint32_t queue_family = 0; + uint32_t queue_index = 0; + }; public: + virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override final; + virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) override final; + virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) override final; + virtual void command_queue_free(CommandQueueID p_cmd_queue) override final; + +private: // ----- POOL ----- - virtual CommandPoolID command_pool_create(CommandBufferType p_cmd_buffer_type) override final; + struct CommandPool { + VkCommandPool vk_command_pool = VK_NULL_HANDLE; + CommandBufferType buffer_type = COMMAND_BUFFER_TYPE_PRIMARY; + }; + +public: + virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final; virtual void command_pool_free(CommandPoolID p_cmd_pool) override final; // ----- BUFFER ----- - virtual CommandBufferID command_buffer_create(CommandBufferType p_cmd_buffer_type, CommandPoolID p_cmd_pool) override final; + virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final; virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final; virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final; virtual void command_buffer_end(CommandBufferID p_cmd_buffer) override final; virtual void command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) override final; + /********************/ + /**** SWAP CHAIN ****/ + /********************/ + +private: + struct SwapChain { + VkSwapchainKHR vk_swapchain = VK_NULL_HANDLE; + RenderingContextDriver::SurfaceID surface = RenderingContextDriver::SurfaceID(); + VkFormat format = VK_FORMAT_UNDEFINED; + VkColorSpaceKHR color_space = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; + TightLocalVector<VkImage> images; + TightLocalVector<VkImageView> image_views; + TightLocalVector<FramebufferID> framebuffers; + LocalVector<CommandQueue *> command_queues_acquired; + LocalVector<uint32_t> command_queues_acquired_semaphores; + RenderPassID render_pass; + uint32_t image_index = 0; + }; + + void _swap_chain_release(SwapChain *p_swap_chain); + +public: + virtual SwapChainID swap_chain_create(RenderingContextDriver::SurfaceID p_surface) override final; + virtual Error swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) override final; + virtual FramebufferID swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) override final; + virtual RenderPassID swap_chain_get_render_pass(SwapChainID p_swap_chain) override final; + virtual DataFormat swap_chain_get_format(SwapChainID p_swap_chain) override final; + virtual void swap_chain_free(SwapChainID p_swap_chain) override final; + /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -329,6 +490,7 @@ private: static int caching_instance_count; PipelineCache pipelines_cache; + String pipeline_cache_id; public: virtual void pipeline_free(PipelineID p_pipeline) override final; @@ -433,24 +595,23 @@ public: virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) override final; /****************/ - /**** SCREEN ****/ + /**** LABELS ****/ /****************/ - virtual DataFormat screen_get_format() override final; + virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final; + virtual void command_end_label(CommandBufferID p_cmd_buffer) override final; /********************/ /**** SUBMISSION ****/ /********************/ - virtual void begin_segment(CommandBufferID p_cmd_buffer, uint32_t p_frame_index, uint32_t p_frames_drawn) override final; + virtual void begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) override final; virtual void end_segment() override final; /**************/ /**** MISC ****/ /**************/ - VkPhysicalDeviceLimits limits = {}; - virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final; virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final; virtual uint64_t get_total_memory_used() override final; @@ -458,6 +619,10 @@ public: virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual String get_api_name() const override final; + virtual String get_api_version() const override final; + virtual String get_pipeline_cache_uuid() const override final; + virtual const Capabilities &get_capabilities() const override final; private: /*********************/ @@ -475,7 +640,7 @@ private: /******************/ public: - RenderingDeviceDriverVulkan(VulkanContext *p_context, VkDevice p_vk_device); + RenderingDeviceDriverVulkan(RenderingContextDriverVulkan *p_context_driver); virtual ~RenderingDeviceDriverVulkan(); }; diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp deleted file mode 100644 index 1b1d4fa50f..0000000000 --- a/drivers/vulkan/vulkan_context.cpp +++ /dev/null @@ -1,2947 +0,0 @@ -/**************************************************************************/ -/* vulkan_context.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "vulkan_context.h" - -#include "core/config/engine.h" -#include "core/config/project_settings.h" -#include "core/string/ustring.h" -#include "core/templates/local_vector.h" -#include "core/version.h" -#include "servers/rendering/rendering_device.h" - -#include "vk_enum_string_helper.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) -#define APP_SHORT_NAME "GodotEngine" - -VulkanHooks *VulkanContext::vulkan_hooks = nullptr; - -Vector<VkAttachmentReference> VulkanContext::_convert_VkAttachmentReference2(uint32_t p_count, const VkAttachmentReference2 *p_refs) { - Vector<VkAttachmentReference> att_refs; - - if (p_refs != nullptr) { - for (uint32_t i = 0; i < p_count; i++) { - // We lose aspectMask in this conversion but we don't use it currently. - - VkAttachmentReference ref = { - p_refs[i].attachment, /* attachment */ - p_refs[i].layout /* layout */ - }; - - att_refs.push_back(ref); - } - } - - return att_refs; -} - -VkResult VulkanContext::vkCreateRenderPass2KHR(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass) { - if (is_device_extension_enabled(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) { - if (fpCreateRenderPass2KHR == nullptr) { - fpCreateRenderPass2KHR = (PFN_vkCreateRenderPass2KHR)vkGetDeviceProcAddr(p_device, "vkCreateRenderPass2KHR"); - } - - if (fpCreateRenderPass2KHR == nullptr) { - return VK_ERROR_EXTENSION_NOT_PRESENT; - } else { - return (fpCreateRenderPass2KHR)(p_device, p_create_info, p_allocator, p_render_pass); - } - } else { - // need to fall back on vkCreateRenderPass - - const void *next = p_create_info->pNext; // ATM we only support multiview which should work if supported. - - Vector<VkAttachmentDescription> attachments; - for (uint32_t i = 0; i < p_create_info->attachmentCount; i++) { - // Basically the old layout just misses type and next. - VkAttachmentDescription att = { - p_create_info->pAttachments[i].flags, /* flags */ - p_create_info->pAttachments[i].format, /* format */ - p_create_info->pAttachments[i].samples, /* samples */ - p_create_info->pAttachments[i].loadOp, /* loadOp */ - p_create_info->pAttachments[i].storeOp, /* storeOp */ - p_create_info->pAttachments[i].stencilLoadOp, /* stencilLoadOp */ - p_create_info->pAttachments[i].stencilStoreOp, /* stencilStoreOp */ - p_create_info->pAttachments[i].initialLayout, /* initialLayout */ - p_create_info->pAttachments[i].finalLayout /* finalLayout */ - }; - - attachments.push_back(att); - } - - Vector<Vector<VkAttachmentReference>> attachment_references; - Vector<VkSubpassDescription> subpasses; - for (uint32_t i = 0; i < p_create_info->subpassCount; i++) { - // Here we need to do more, again it's just stripping out type and next - // but we have VkAttachmentReference2 to convert to VkAttachmentReference. - // Also viewmask is not supported but we don't use it outside of multiview. - - Vector<VkAttachmentReference> input_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].inputAttachmentCount, p_create_info->pSubpasses[i].pInputAttachments); - Vector<VkAttachmentReference> color_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].colorAttachmentCount, p_create_info->pSubpasses[i].pColorAttachments); - Vector<VkAttachmentReference> resolve_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].colorAttachmentCount, p_create_info->pSubpasses[i].pResolveAttachments); - Vector<VkAttachmentReference> depth_attachments = _convert_VkAttachmentReference2(p_create_info->pSubpasses[i].colorAttachmentCount, p_create_info->pSubpasses[i].pDepthStencilAttachment); - - VkSubpassDescription subpass = { - p_create_info->pSubpasses[i].flags, /* flags */ - p_create_info->pSubpasses[i].pipelineBindPoint, /* pipelineBindPoint */ - p_create_info->pSubpasses[i].inputAttachmentCount, /* inputAttachmentCount */ - input_attachments.size() == 0 ? nullptr : input_attachments.ptr(), /* pInputAttachments */ - p_create_info->pSubpasses[i].colorAttachmentCount, /* colorAttachmentCount */ - color_attachments.size() == 0 ? nullptr : color_attachments.ptr(), /* pColorAttachments */ - resolve_attachments.size() == 0 ? nullptr : resolve_attachments.ptr(), /* pResolveAttachments */ - depth_attachments.size() == 0 ? nullptr : depth_attachments.ptr(), /* pDepthStencilAttachment */ - p_create_info->pSubpasses[i].preserveAttachmentCount, /* preserveAttachmentCount */ - p_create_info->pSubpasses[i].pPreserveAttachments /* pPreserveAttachments */ - }; - attachment_references.push_back(input_attachments); - attachment_references.push_back(color_attachments); - attachment_references.push_back(resolve_attachments); - attachment_references.push_back(depth_attachments); - - subpasses.push_back(subpass); - } - - Vector<VkSubpassDependency> dependencies; - for (uint32_t i = 0; i < p_create_info->dependencyCount; i++) { - // We lose viewOffset here but again I don't believe we use this anywhere. - VkSubpassDependency dep = { - p_create_info->pDependencies[i].srcSubpass, /* srcSubpass */ - p_create_info->pDependencies[i].dstSubpass, /* dstSubpass */ - p_create_info->pDependencies[i].srcStageMask, /* srcStageMask */ - p_create_info->pDependencies[i].dstStageMask, /* dstStageMask */ - p_create_info->pDependencies[i].srcAccessMask, /* srcAccessMask */ - p_create_info->pDependencies[i].dstAccessMask, /* dstAccessMask */ - p_create_info->pDependencies[i].dependencyFlags, /* dependencyFlags */ - }; - - dependencies.push_back(dep); - } - - // CorrelatedViewMask is not supported in vkCreateRenderPass but we - // currently only use this for multiview. - // We'll need to look into this. - - VkRenderPassCreateInfo create_info = { - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, /* sType */ - next, /* pNext*/ - p_create_info->flags, /* flags */ - (uint32_t)attachments.size(), /* attachmentCount */ - attachments.ptr(), /* pAttachments */ - (uint32_t)subpasses.size(), /* subpassCount */ - subpasses.ptr(), /* pSubpasses */ - (uint32_t)dependencies.size(), /* */ - dependencies.ptr(), /* */ - }; - - return vkCreateRenderPass(device, &create_info, p_allocator, p_render_pass); - } -} - -VKAPI_ATTR VkBool32 VKAPI_CALL VulkanContext::_debug_messenger_callback( - VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, - VkDebugUtilsMessageTypeFlagsEXT messageType, - const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, - void *pUserData) { - // This error needs to be ignored because the AMD allocator will mix up memory types on IGP processors. - if (strstr(pCallbackData->pMessage, "Mapping an image with layout") != nullptr && - strstr(pCallbackData->pMessage, "can result in undefined behavior if this memory is used by the device") != nullptr) { - return VK_FALSE; - } - // This needs to be ignored because Validator is wrong here. - if (strstr(pCallbackData->pMessage, "Invalid SPIR-V binary version 1.3") != nullptr) { - return VK_FALSE; - } - // This needs to be ignored because Validator is wrong here. - if (strstr(pCallbackData->pMessage, "Shader requires flag") != nullptr) { - return VK_FALSE; - } - - // This needs to be ignored because Validator is wrong here. - if (strstr(pCallbackData->pMessage, "SPIR-V module not valid: Pointer operand") != nullptr && - strstr(pCallbackData->pMessage, "must be a memory object") != nullptr) { - return VK_FALSE; - } - - if (pCallbackData->pMessageIdName && strstr(pCallbackData->pMessageIdName, "UNASSIGNED-CoreValidation-DrawState-ClearCmdBeforeDraw") != nullptr) { - return VK_FALSE; - } - - String type_string; - switch (messageType) { - case (VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT): - type_string = "GENERAL"; - break; - case (VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT): - type_string = "VALIDATION"; - break; - case (VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT): - type_string = "PERFORMANCE"; - break; - case (VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT): - type_string = "VALIDATION|PERFORMANCE"; - break; - } - - String objects_string; - if (pCallbackData->objectCount > 0) { - objects_string = "\n\tObjects - " + String::num_int64(pCallbackData->objectCount); - for (uint32_t object = 0; object < pCallbackData->objectCount; ++object) { - objects_string += - "\n\t\tObject[" + String::num_int64(object) + "]" + - " - " + string_VkObjectType(pCallbackData->pObjects[object].objectType) + - ", Handle " + String::num_int64(pCallbackData->pObjects[object].objectHandle); - if (nullptr != pCallbackData->pObjects[object].pObjectName && strlen(pCallbackData->pObjects[object].pObjectName) > 0) { - objects_string += ", Name \"" + String(pCallbackData->pObjects[object].pObjectName) + "\""; - } - } - } - - String labels_string; - if (pCallbackData->cmdBufLabelCount > 0) { - labels_string = "\n\tCommand Buffer Labels - " + String::num_int64(pCallbackData->cmdBufLabelCount); - for (uint32_t cmd_buf_label = 0; cmd_buf_label < pCallbackData->cmdBufLabelCount; ++cmd_buf_label) { - labels_string += - "\n\t\tLabel[" + String::num_int64(cmd_buf_label) + "]" + - " - " + pCallbackData->pCmdBufLabels[cmd_buf_label].pLabelName + - "{ "; - for (int color_idx = 0; color_idx < 4; ++color_idx) { - labels_string += String::num(pCallbackData->pCmdBufLabels[cmd_buf_label].color[color_idx]); - if (color_idx < 3) { - labels_string += ", "; - } - } - labels_string += " }"; - } - } - - String error_message(type_string + - " - Message Id Number: " + String::num_int64(pCallbackData->messageIdNumber) + - " | Message Id Name: " + pCallbackData->pMessageIdName + - "\n\t" + pCallbackData->pMessage + - objects_string + labels_string); - - // Convert VK severity to our own log macros. - switch (messageSeverity) { - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: - print_verbose(error_message); - break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: - print_line(error_message); - break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: - WARN_PRINT(error_message); - break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: - ERR_PRINT(error_message); - CRASH_COND_MSG(Engine::get_singleton()->is_abort_on_gpu_errors_enabled(), - "Crashing, because abort on GPU errors is enabled."); - break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_FLAG_BITS_MAX_ENUM_EXT: - break; // Shouldn't happen, only handling to make compilers happy. - } - - return VK_FALSE; -} - -VKAPI_ATTR VkBool32 VKAPI_CALL VulkanContext::_debug_report_callback( - VkDebugReportFlagsEXT flags, - VkDebugReportObjectTypeEXT objectType, - uint64_t object, - size_t location, - int32_t messageCode, - const char *pLayerPrefix, - const char *pMessage, - void *pUserData) { - String debugMessage = String("Vulkan Debug Report: object - ") + - String::num_int64(object) + "\n" + pMessage; - - switch (flags) { - case VK_DEBUG_REPORT_DEBUG_BIT_EXT: - case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: - print_line(debugMessage); - break; - case VK_DEBUG_REPORT_WARNING_BIT_EXT: - case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: - WARN_PRINT(debugMessage); - break; - case VK_DEBUG_REPORT_ERROR_BIT_EXT: - ERR_PRINT(debugMessage); - break; - } - - return VK_FALSE; -} - -VkBool32 VulkanContext::_check_layers(uint32_t check_count, const char *const *check_names, uint32_t layer_count, VkLayerProperties *layers) { - for (uint32_t i = 0; i < check_count; i++) { - VkBool32 found = 0; - for (uint32_t j = 0; j < layer_count; j++) { - if (!strcmp(check_names[i], layers[j].layerName)) { - found = 1; - break; - } - } - if (!found) { - WARN_PRINT("Can't find layer: " + String(check_names[i])); - return 0; - } - } - return 1; -} - -Error VulkanContext::_get_preferred_validation_layers(uint32_t *count, const char *const **names) { - static const LocalVector<LocalVector<const char *>> instance_validation_layers_alt{ - // Preferred set of validation layers. - { "VK_LAYER_KHRONOS_validation" }, - - // Alternative (deprecated, removed in SDK 1.1.126.0) set of validation layers. - { "VK_LAYER_LUNARG_standard_validation" }, - - // Alternative (deprecated, removed in SDK 1.1.121.1) set of validation layers. - { "VK_LAYER_GOOGLE_threading", "VK_LAYER_LUNARG_parameter_validation", "VK_LAYER_LUNARG_object_tracker", "VK_LAYER_LUNARG_core_validation", "VK_LAYER_GOOGLE_unique_objects" } - }; - - // Clear out-arguments. - *count = 0; - if (names != nullptr) { - *names = nullptr; - } - - VkResult err; - uint32_t instance_layer_count; - - err = vkEnumerateInstanceLayerProperties(&instance_layer_count, nullptr); - if (err) { - ERR_FAIL_V(ERR_CANT_CREATE); - } - - if (instance_layer_count < 1) { - return OK; - } - - VkLayerProperties *instance_layers = (VkLayerProperties *)malloc(sizeof(VkLayerProperties) * instance_layer_count); - err = vkEnumerateInstanceLayerProperties(&instance_layer_count, instance_layers); - if (err) { - free(instance_layers); - ERR_FAIL_V(ERR_CANT_CREATE); - } - - for (const LocalVector<const char *> &layer : instance_validation_layers_alt) { - if (_check_layers(layer.size(), layer.ptr(), instance_layer_count, instance_layers)) { - *count = layer.size(); - if (names != nullptr) { - *names = layer.ptr(); - } - break; - } - } - - free(instance_layers); - - return OK; -} - -typedef VkResult(VKAPI_PTR *_vkEnumerateInstanceVersion)(uint32_t *); - -Error VulkanContext::_obtain_vulkan_version() { - // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkApplicationInfo.html#_description - // For Vulkan 1.0 vkEnumerateInstanceVersion is not available, including not in the loader we compile against on Android. - _vkEnumerateInstanceVersion func = (_vkEnumerateInstanceVersion)vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceVersion"); - if (func != nullptr) { - uint32_t api_version; - VkResult res = func(&api_version); - if (res == VK_SUCCESS) { - instance_api_version = api_version; - } else { - // According to the documentation this shouldn't fail with anything except a memory allocation error - // in which case we're in deep trouble anyway. - ERR_FAIL_V(ERR_CANT_CREATE); - } - } else { - print_line("vkEnumerateInstanceVersion not available, assuming Vulkan 1.0."); - instance_api_version = VK_API_VERSION_1_0; - } - - return OK; -} - -bool VulkanContext::instance_extensions_initialized = false; -HashMap<CharString, bool> VulkanContext::requested_instance_extensions; - -void VulkanContext::register_requested_instance_extension(const CharString &extension_name, bool p_required) { - ERR_FAIL_COND_MSG(instance_extensions_initialized, "You can only registered extensions before the Vulkan instance is created"); - ERR_FAIL_COND(requested_instance_extensions.has(extension_name)); - - requested_instance_extensions[extension_name] = p_required; -} - -Error VulkanContext::_initialize_instance_extensions() { - enabled_instance_extension_names.clear(); - - // Make sure our core extensions are here - register_requested_instance_extension(VK_KHR_SURFACE_EXTENSION_NAME, true); - if (_get_platform_surface_extension()) { - register_requested_instance_extension(_get_platform_surface_extension(), true); - } - - if (_use_validation_layers()) { - register_requested_instance_extension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, false); - } - - // This extension allows us to use the properties2 features to query additional device capabilities - register_requested_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false); - - // Only enable debug utils in verbose mode or DEV_ENABLED. - // End users would get spammed with messages of varying verbosity due to the - // mess that thirdparty layers/extensions and drivers seem to leave in their - // wake, making the Windows registry a bottomless pit of broken layer JSON. -#ifdef DEV_ENABLED - bool want_debug_utils = true; -#else - bool want_debug_utils = OS::get_singleton()->is_stdout_verbose(); -#endif - if (want_debug_utils) { - register_requested_instance_extension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false); - } - - // Load instance extensions that are available... - uint32_t instance_extension_count = 0; - VkResult err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, nullptr); - ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_INCOMPLETE, ERR_CANT_CREATE); - ERR_FAIL_COND_V_MSG(instance_extension_count == 0, ERR_CANT_CREATE, "No instance extensions found, is a driver installed?"); - - VkExtensionProperties *instance_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * instance_extension_count); - err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, instance_extensions); - if (err != VK_SUCCESS && err != VK_INCOMPLETE) { - free(instance_extensions); - ERR_FAIL_V(ERR_CANT_CREATE); - } -#ifdef DEV_ENABLED - for (uint32_t i = 0; i < instance_extension_count; i++) { - print_verbose(String("VULKAN: Found instance extension ") + String::utf8(instance_extensions[i].extensionName)); - } -#endif - - // Enable all extensions that are supported and requested - for (uint32_t i = 0; i < instance_extension_count; i++) { - CharString extension_name(instance_extensions[i].extensionName); - if (requested_instance_extensions.has(extension_name)) { - enabled_instance_extension_names.insert(extension_name); - } - } - - // Now check our requested extensions - for (KeyValue<CharString, bool> &requested_extension : requested_instance_extensions) { - if (!enabled_instance_extension_names.has(requested_extension.key)) { - if (requested_extension.value) { - free(instance_extensions); - ERR_FAIL_V_MSG(ERR_BUG, String("Required extension ") + String::utf8(requested_extension.key) + String(" not found, is a driver installed?")); - } else { - print_verbose(String("Optional extension ") + String::utf8(requested_extension.key) + String(" not found")); - } - } - } - - free(instance_extensions); - - instance_extensions_initialized = true; - return OK; -} - -bool VulkanContext::device_extensions_initialized = false; -HashMap<CharString, bool> VulkanContext::requested_device_extensions; - -void VulkanContext::register_requested_device_extension(const CharString &extension_name, bool p_required) { - ERR_FAIL_COND_MSG(device_extensions_initialized, "You can only registered extensions before the Vulkan instance is created"); - ERR_FAIL_COND(requested_device_extensions.has(extension_name)); - - requested_device_extensions[extension_name] = p_required; -} - -Error VulkanContext::_initialize_device_extensions() { - // Look for device extensions. - enabled_device_extension_names.clear(); - - // Make sure our core extensions are here - register_requested_device_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); - - register_requested_device_extension(VK_KHR_MULTIVIEW_EXTENSION_NAME, false); - register_requested_device_extension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, false); - register_requested_device_extension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false); - register_requested_device_extension(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); - register_requested_device_extension(VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, false); - register_requested_device_extension(VK_KHR_16BIT_STORAGE_EXTENSION_NAME, false); - register_requested_device_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); - register_requested_device_extension(VK_KHR_MAINTENANCE_2_EXTENSION_NAME, false); - register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); - register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); - - if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { - register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); - } - - // TODO consider the following extensions: - // - VK_KHR_spirv_1_4 - // - VK_KHR_swapchain_mutable_format - // - VK_EXT_full_screen_exclusive - // - VK_EXT_hdr_metadata - // - VK_KHR_depth_stencil_resolve - - // Even though the user "enabled" the extension via the command - // line, we must make sure that it's enumerated for use with the - // device. Therefore, disable it here, and re-enable it again if - // enumerated. - if (VK_KHR_incremental_present_enabled) { - register_requested_device_extension(VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, false); - } - if (VK_GOOGLE_display_timing_enabled) { - register_requested_device_extension(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME, false); - } - - // obtain available device extensions - uint32_t device_extension_count = 0; - VkResult err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - ERR_FAIL_COND_V_MSG(device_extension_count == 0, ERR_CANT_CREATE, - "vkEnumerateDeviceExtensionProperties failed to find any extensions\n\n" - "Do you have a compatible Vulkan installable client driver (ICD) installed?\n" - "vkCreateInstance Failure"); - - VkExtensionProperties *device_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * device_extension_count); - err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, device_extensions); - if (err) { - free(device_extensions); - ERR_FAIL_V(ERR_CANT_CREATE); - } - -#ifdef DEV_ENABLED - for (uint32_t i = 0; i < device_extension_count; i++) { - print_verbose(String("VULKAN: Found device extension ") + String::utf8(device_extensions[i].extensionName)); - } -#endif - - // Enable all extensions that are supported and requested - for (uint32_t i = 0; i < device_extension_count; i++) { - CharString extension_name(device_extensions[i].extensionName); - if (requested_device_extensions.has(extension_name)) { - enabled_device_extension_names.insert(extension_name); - } - } - - // Now check our requested extensions - for (KeyValue<CharString, bool> &requested_extension : requested_device_extensions) { - if (!enabled_device_extension_names.has(requested_extension.key)) { - if (requested_extension.value) { - free(device_extensions); - ERR_FAIL_V_MSG(ERR_BUG, - String("vkEnumerateDeviceExtensionProperties failed to find the ") + String::utf8(requested_extension.key) + String(" extension.\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?\nvkCreateInstance Failure")); - } else { - print_verbose(String("Optional extension ") + String::utf8(requested_extension.key) + String(" not found")); - } - } - } - - free(device_extensions); - - device_extensions_initialized = true; - return OK; -} - -uint32_t VulkanContext::SubgroupCapabilities::supported_stages_flags_rd() const { - uint32_t flags = 0; - - if (supportedStages & VK_SHADER_STAGE_VERTEX_BIT) { - flags += RenderingDevice::ShaderStage::SHADER_STAGE_VERTEX_BIT; - } - if (supportedStages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { - flags += RenderingDevice::ShaderStage::SHADER_STAGE_TESSELATION_CONTROL_BIT; - } - if (supportedStages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { - flags += RenderingDevice::ShaderStage::SHADER_STAGE_TESSELATION_EVALUATION_BIT; - } - // if (supportedStages & VK_SHADER_STAGE_GEOMETRY_BIT) { - // flags += RenderingDevice::ShaderStage::SHADER_STAGE_GEOMETRY_BIT; - // } - if (supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) { - flags += RenderingDevice::ShaderStage::SHADER_STAGE_FRAGMENT_BIT; - } - if (supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) { - flags += RenderingDevice::ShaderStage::SHADER_STAGE_COMPUTE_BIT; - } - - return flags; -} - -String VulkanContext::SubgroupCapabilities::supported_stages_desc() const { - String res; - - if (supportedStages & VK_SHADER_STAGE_VERTEX_BIT) { - res += ", STAGE_VERTEX"; - } - if (supportedStages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { - res += ", STAGE_TESSELLATION_CONTROL"; - } - if (supportedStages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { - res += ", STAGE_TESSELLATION_EVALUATION"; - } - if (supportedStages & VK_SHADER_STAGE_GEOMETRY_BIT) { - res += ", STAGE_GEOMETRY"; - } - if (supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) { - res += ", STAGE_FRAGMENT"; - } - if (supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) { - res += ", STAGE_COMPUTE"; - } - - // These are not defined on Android GRMBL. - if (supportedStages & 0x00000100 /* VK_SHADER_STAGE_RAYGEN_BIT_KHR */) { - res += ", STAGE_RAYGEN_KHR"; - } - if (supportedStages & 0x00000200 /* VK_SHADER_STAGE_ANY_HIT_BIT_KHR */) { - res += ", STAGE_ANY_HIT_KHR"; - } - if (supportedStages & 0x00000400 /* VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR */) { - res += ", STAGE_CLOSEST_HIT_KHR"; - } - if (supportedStages & 0x00000800 /* VK_SHADER_STAGE_MISS_BIT_KHR */) { - res += ", STAGE_MISS_KHR"; - } - if (supportedStages & 0x00001000 /* VK_SHADER_STAGE_INTERSECTION_BIT_KHR */) { - res += ", STAGE_INTERSECTION_KHR"; - } - if (supportedStages & 0x00002000 /* VK_SHADER_STAGE_CALLABLE_BIT_KHR */) { - res += ", STAGE_CALLABLE_KHR"; - } - if (supportedStages & 0x00000040 /* VK_SHADER_STAGE_TASK_BIT_NV */) { - res += ", STAGE_TASK_NV"; - } - if (supportedStages & 0x00000080 /* VK_SHADER_STAGE_MESH_BIT_NV */) { - res += ", STAGE_MESH_NV"; - } - - return res.substr(2); // Remove first ", ". -} - -uint32_t VulkanContext::SubgroupCapabilities::supported_operations_flags_rd() const { - uint32_t flags = 0; - - if (supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_BASIC_BIT; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_VOTE_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_VOTE_BIT; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_BALLOT_BIT; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_BIT; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_CLUSTERED_BIT; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) { - flags += RenderingDevice::SubgroupOperations::SUBGROUP_QUAD_BIT; - } - - return flags; -} - -String VulkanContext::SubgroupCapabilities::supported_operations_desc() const { - String res; - - if (supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) { - res += ", FEATURE_BASIC"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_VOTE_BIT) { - res += ", FEATURE_VOTE"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) { - res += ", FEATURE_ARITHMETIC"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) { - res += ", FEATURE_BALLOT"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) { - res += ", FEATURE_SHUFFLE"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) { - res += ", FEATURE_SHUFFLE_RELATIVE"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) { - res += ", FEATURE_CLUSTERED"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) { - res += ", FEATURE_QUAD"; - } - if (supportedOperations & VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV) { - res += ", FEATURE_PARTITIONED_NV"; - } - - return res.substr(2); // Remove first ", ". -} - -Error VulkanContext::_check_capabilities() { - // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_multiview.html - // https://www.khronos.org/blog/vulkan-subgroup-tutorial - - // For Vulkan 1.0 vkGetPhysicalDeviceProperties2 is not available, including not in the loader we compile against on Android. - - // So we check if the functions are accessible by getting their function pointers and skipping if not - // (note that the desktop loader does a better job here but the android loader doesn't.) - - // Assume not supported until proven otherwise. - vrs_capabilities.pipeline_vrs_supported = false; - vrs_capabilities.primitive_vrs_supported = false; - vrs_capabilities.attachment_vrs_supported = false; - vrs_capabilities.min_texel_size = Size2i(); - vrs_capabilities.max_texel_size = Size2i(); - vrs_capabilities.texel_size = Size2i(); - multiview_capabilities.is_supported = false; - multiview_capabilities.geometry_shader_is_supported = false; - multiview_capabilities.tessellation_shader_is_supported = false; - multiview_capabilities.max_view_count = 0; - multiview_capabilities.max_instance_count = 0; - subgroup_capabilities.size = 0; - subgroup_capabilities.min_size = 0; - subgroup_capabilities.max_size = 0; - subgroup_capabilities.supportedStages = 0; - subgroup_capabilities.supportedOperations = 0; - subgroup_capabilities.quadOperationsInAllStages = false; - subgroup_capabilities.size_control_is_supported = false; - shader_capabilities.shader_float16_is_supported = false; - shader_capabilities.shader_int8_is_supported = false; - storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = false; - storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported = false; - storage_buffer_capabilities.storage_push_constant_16_is_supported = false; - storage_buffer_capabilities.storage_input_output_16 = false; - - if (is_instance_extension_enabled(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) { - // Check for extended features. - PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2"); - if (vkGetPhysicalDeviceFeatures2_func == nullptr) { - // In Vulkan 1.0 might be accessible under its original extension name. - vkGetPhysicalDeviceFeatures2_func = (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceFeatures2KHR"); - } - if (vkGetPhysicalDeviceFeatures2_func != nullptr) { - // Check our extended features. - void *next = nullptr; - - // We must check that the relative extension is present before assuming a - // feature as enabled. - // See also: https://github.com/godotengine/godot/issues/65409 - - VkPhysicalDeviceVulkan12Features device_features_vk12 = {}; - VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {}; - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; - VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; - VkPhysicalDeviceMultiviewFeatures multiview_features = {}; - VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; - - if (device_api_version >= VK_API_VERSION_1_2) { - device_features_vk12.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; - device_features_vk12.pNext = next; - next = &device_features_vk12; - } else { - if (is_device_extension_enabled(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { - shader_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, - /*pNext*/ next, - /*shaderFloat16*/ false, - /*shaderInt8*/ false, - }; - next = &shader_features; - } - } - - if (is_device_extension_enabled(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { - vrs_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, - /*pNext*/ next, - /*pipelineFragmentShadingRate*/ false, - /*primitiveFragmentShadingRate*/ false, - /*attachmentFragmentShadingRate*/ false, - }; - next = &vrs_features; - } - - if (is_device_extension_enabled(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { - storage_feature = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, - /*pNext*/ next, - /*storageBuffer16BitAccess*/ false, - /*uniformAndStorageBuffer16BitAccess*/ false, - /*storagePushConstant16*/ false, - /*storageInputOutput16*/ false, - }; - next = &storage_feature; - } - - if (is_device_extension_enabled(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { - multiview_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, - /*pNext*/ next, - /*multiview*/ false, - /*multiviewGeometryShader*/ false, - /*multiviewTessellationShader*/ false, - }; - next = &multiview_features; - } - - if (is_device_extension_enabled(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) { - pipeline_cache_control_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES, - /*pNext*/ next, - /*pipelineCreationCacheControl*/ false, - }; - next = &pipeline_cache_control_features; - } - - VkPhysicalDeviceFeatures2 device_features; - device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - device_features.pNext = next; - - vkGetPhysicalDeviceFeatures2_func(gpu, &device_features); - - if (device_api_version >= VK_API_VERSION_1_2) { -#ifdef MACOS_ENABLED - ERR_FAIL_COND_V_MSG(!device_features_vk12.shaderSampledImageArrayNonUniformIndexing, ERR_CANT_CREATE, "Your GPU doesn't support shaderSampledImageArrayNonUniformIndexing which is required to use the Vulkan-based renderers in Godot."); -#endif - - if (is_device_extension_enabled(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { - shader_capabilities.shader_float16_is_supported = device_features_vk12.shaderFloat16; - shader_capabilities.shader_int8_is_supported = device_features_vk12.shaderInt8; - } - } else { - if (is_device_extension_enabled(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { - shader_capabilities.shader_float16_is_supported = shader_features.shaderFloat16; - shader_capabilities.shader_int8_is_supported = shader_features.shaderInt8; - } - } - - if (is_device_extension_enabled(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { - vrs_capabilities.pipeline_vrs_supported = vrs_features.pipelineFragmentShadingRate; - vrs_capabilities.primitive_vrs_supported = vrs_features.primitiveFragmentShadingRate; - vrs_capabilities.attachment_vrs_supported = vrs_features.attachmentFragmentShadingRate; - } - - if (is_device_extension_enabled(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { - multiview_capabilities.is_supported = multiview_features.multiview; - multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader; - multiview_capabilities.tessellation_shader_is_supported = multiview_features.multiviewTessellationShader; - } - - if (is_device_extension_enabled(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { - storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = storage_feature.storageBuffer16BitAccess; - storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported = storage_feature.uniformAndStorageBuffer16BitAccess; - storage_buffer_capabilities.storage_push_constant_16_is_supported = storage_feature.storagePushConstant16; - storage_buffer_capabilities.storage_input_output_16 = storage_feature.storageInputOutput16; - } - - if (is_device_extension_enabled(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) { - pipeline_cache_control_support = pipeline_cache_control_features.pipelineCreationCacheControl; - } - } - - // Check extended properties. - PFN_vkGetPhysicalDeviceProperties2 device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2"); - if (device_properties_func == nullptr) { - // In Vulkan 1.0 might be accessible under its original extension name. - device_properties_func = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2KHR"); - } - if (device_properties_func != nullptr) { - VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrsProperties{}; - VkPhysicalDeviceMultiviewProperties multiviewProperties{}; - VkPhysicalDeviceSubgroupProperties subgroupProperties{}; - VkPhysicalDeviceSubgroupSizeControlProperties subgroupSizeControlProperties = {}; - VkPhysicalDeviceProperties2 physicalDeviceProperties{}; - void *nextptr = nullptr; - - if (device_api_version >= VK_API_VERSION_1_1) { // Vulkan 1.1 or higher - subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; - subgroupProperties.pNext = nextptr; - - nextptr = &subgroupProperties; - - subgroup_capabilities.size_control_is_supported = is_device_extension_enabled(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - - if (subgroup_capabilities.size_control_is_supported) { - subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; - subgroupSizeControlProperties.pNext = nextptr; - - nextptr = &subgroupSizeControlProperties; - } - } - - if (multiview_capabilities.is_supported) { - multiviewProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES; - multiviewProperties.pNext = nextptr; - - nextptr = &multiviewProperties; - } - - if (vrs_capabilities.attachment_vrs_supported) { - vrsProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; - vrsProperties.pNext = nextptr; - - nextptr = &vrsProperties; - } - - physicalDeviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - physicalDeviceProperties.pNext = nextptr; - - device_properties_func(gpu, &physicalDeviceProperties); - - subgroup_capabilities.size = subgroupProperties.subgroupSize; - subgroup_capabilities.min_size = subgroupProperties.subgroupSize; - subgroup_capabilities.max_size = subgroupProperties.subgroupSize; - subgroup_capabilities.supportedStages = subgroupProperties.supportedStages; - subgroup_capabilities.supportedOperations = subgroupProperties.supportedOperations; - // Note: quadOperationsInAllStages will be true if: - // - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT. - // - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT. - subgroup_capabilities.quadOperationsInAllStages = subgroupProperties.quadOperationsInAllStages; - - if (subgroup_capabilities.size_control_is_supported && (subgroupSizeControlProperties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT)) { - subgroup_capabilities.min_size = subgroupSizeControlProperties.minSubgroupSize; - subgroup_capabilities.max_size = subgroupSizeControlProperties.maxSubgroupSize; - } - - if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { - print_verbose("- Vulkan Variable Rate Shading supported:"); - if (vrs_capabilities.pipeline_vrs_supported) { - print_verbose(" Pipeline fragment shading rate"); - } - if (vrs_capabilities.primitive_vrs_supported) { - print_verbose(" Primitive fragment shading rate"); - } - if (vrs_capabilities.attachment_vrs_supported) { - // TODO expose these somehow to the end user. - vrs_capabilities.min_texel_size.x = vrsProperties.minFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.min_texel_size.y = vrsProperties.minFragmentShadingRateAttachmentTexelSize.height; - vrs_capabilities.max_texel_size.x = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.max_texel_size.y = vrsProperties.maxFragmentShadingRateAttachmentTexelSize.height; - - // We'll attempt to default to a texel size of 16x16 - vrs_capabilities.texel_size.x = CLAMP(16, vrs_capabilities.min_texel_size.x, vrs_capabilities.max_texel_size.x); - vrs_capabilities.texel_size.y = CLAMP(16, vrs_capabilities.min_texel_size.y, vrs_capabilities.max_texel_size.y); - - print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")")); - } - - } else { - print_verbose("- Vulkan Variable Rate Shading not supported"); - } - - if (multiview_capabilities.is_supported) { - multiview_capabilities.max_view_count = multiviewProperties.maxMultiviewViewCount; - multiview_capabilities.max_instance_count = multiviewProperties.maxMultiviewInstanceIndex; - - print_verbose("- Vulkan multiview supported:"); - print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); - print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); - } else { - print_verbose("- Vulkan multiview not supported"); - } - - print_verbose("- Vulkan subgroup:"); - print_verbose(" size: " + itos(subgroup_capabilities.size)); - print_verbose(" min size: " + itos(subgroup_capabilities.min_size)); - print_verbose(" max size: " + itos(subgroup_capabilities.max_size)); - print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc()); - print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc()); - if (subgroup_capabilities.quadOperationsInAllStages) { - print_verbose(" quad operations in all stages"); - } - } else { - print_verbose("- Couldn't call vkGetPhysicalDeviceProperties2"); - } - } - - return OK; -} - -Error VulkanContext::_create_instance() { - // Obtain Vulkan version. - _obtain_vulkan_version(); - - // Initialize extensions. - { - Error err = _initialize_instance_extensions(); - if (err != OK) { - return err; - } - } - - int enabled_extension_count = 0; - const char *enabled_extension_names[MAX_EXTENSIONS]; - ERR_FAIL_COND_V(enabled_instance_extension_names.size() > MAX_EXTENSIONS, ERR_CANT_CREATE); - for (const CharString &extension_name : enabled_instance_extension_names) { - enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); - } - - // We'll set application version to the Vulkan version we're developing against, even if our instance is based on - // an older Vulkan version, devices can still support newer versions of Vulkan. - // The exception is when we're on Vulkan 1.0, we should not set this to anything but 1.0. - // Note that this value is only used by validation layers to warn us about version issues. - uint32_t application_api_version = instance_api_version == VK_API_VERSION_1_0 ? VK_API_VERSION_1_0 : VK_API_VERSION_1_2; - - CharString cs = GLOBAL_GET("application/config/name").operator String().utf8(); - const VkApplicationInfo app = { - /*sType*/ VK_STRUCTURE_TYPE_APPLICATION_INFO, - /*pNext*/ nullptr, - /*pApplicationName*/ cs.get_data(), - /*applicationVersion*/ 0, // It would be really nice if we store a version number in project settings, say "application/config/version" - /*pEngineName*/ VERSION_NAME, - /*engineVersion*/ VK_MAKE_VERSION(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH), - /*apiVersion*/ application_api_version - }; - VkInstanceCreateInfo inst_info{}; - inst_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - inst_info.pApplicationInfo = &app; - inst_info.enabledExtensionCount = enabled_extension_count; - inst_info.ppEnabledExtensionNames = (const char *const *)enabled_extension_names; - if (_use_validation_layers()) { - _get_preferred_validation_layers(&inst_info.enabledLayerCount, &inst_info.ppEnabledLayerNames); - } - - /* - * This is info for a temp callback to use during CreateInstance. - * After the instance is created, we use the instance-based - * function to register the final callback. - */ - VkDebugUtilsMessengerCreateInfoEXT dbg_messenger_create_info = {}; - VkDebugReportCallbackCreateInfoEXT dbg_report_callback_create_info = {}; - if (is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - // VK_EXT_debug_utils style. - dbg_messenger_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - dbg_messenger_create_info.pNext = nullptr; - dbg_messenger_create_info.flags = 0; - dbg_messenger_create_info.messageSeverity = - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; - dbg_messenger_create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - dbg_messenger_create_info.pfnUserCallback = _debug_messenger_callback; - dbg_messenger_create_info.pUserData = this; - inst_info.pNext = &dbg_messenger_create_info; - } else if (is_instance_extension_enabled(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { - dbg_report_callback_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; - dbg_report_callback_create_info.flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT | - VK_DEBUG_REPORT_WARNING_BIT_EXT | - VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | - VK_DEBUG_REPORT_ERROR_BIT_EXT | - VK_DEBUG_REPORT_DEBUG_BIT_EXT; - dbg_report_callback_create_info.pfnCallback = _debug_report_callback; - dbg_report_callback_create_info.pUserData = this; - inst_info.pNext = &dbg_report_callback_create_info; - } - - VkResult err; - - if (vulkan_hooks) { - if (!vulkan_hooks->create_vulkan_instance(&inst_info, &inst)) { - return ERR_CANT_CREATE; - } - } else { - err = vkCreateInstance(&inst_info, nullptr, &inst); - ERR_FAIL_COND_V_MSG(err == VK_ERROR_INCOMPATIBLE_DRIVER, ERR_CANT_CREATE, - "Cannot find a compatible Vulkan installable client driver (ICD).\n\n" - "vkCreateInstance Failure"); - ERR_FAIL_COND_V_MSG(err == VK_ERROR_EXTENSION_NOT_PRESENT, ERR_CANT_CREATE, - "Cannot find a specified extension library.\n" - "Make sure your layers path is set appropriately.\n" - "vkCreateInstance Failure"); - ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, - "vkCreateInstance failed.\n\n" - "Do you have a compatible Vulkan installable client driver (ICD) installed?\n" - "Please look at the Getting Started guide for additional information.\n" - "vkCreateInstance Failure"); - } - - inst_initialized = true; - -#ifdef USE_VOLK - volkLoadInstance(inst); -#endif - - if (is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - // Setup VK_EXT_debug_utils function pointers always (we use them for debug labels and names). - CreateDebugUtilsMessengerEXT = - (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(inst, "vkCreateDebugUtilsMessengerEXT"); - DestroyDebugUtilsMessengerEXT = - (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(inst, "vkDestroyDebugUtilsMessengerEXT"); - SubmitDebugUtilsMessageEXT = - (PFN_vkSubmitDebugUtilsMessageEXT)vkGetInstanceProcAddr(inst, "vkSubmitDebugUtilsMessageEXT"); - CmdBeginDebugUtilsLabelEXT = - (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(inst, "vkCmdBeginDebugUtilsLabelEXT"); - CmdEndDebugUtilsLabelEXT = - (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(inst, "vkCmdEndDebugUtilsLabelEXT"); - CmdInsertDebugUtilsLabelEXT = - (PFN_vkCmdInsertDebugUtilsLabelEXT)vkGetInstanceProcAddr(inst, "vkCmdInsertDebugUtilsLabelEXT"); - SetDebugUtilsObjectNameEXT = - (PFN_vkSetDebugUtilsObjectNameEXT)vkGetInstanceProcAddr(inst, "vkSetDebugUtilsObjectNameEXT"); - if (nullptr == CreateDebugUtilsMessengerEXT || nullptr == DestroyDebugUtilsMessengerEXT || - nullptr == SubmitDebugUtilsMessageEXT || nullptr == CmdBeginDebugUtilsLabelEXT || - nullptr == CmdEndDebugUtilsLabelEXT || nullptr == CmdInsertDebugUtilsLabelEXT || - nullptr == SetDebugUtilsObjectNameEXT) { - ERR_FAIL_V_MSG(ERR_CANT_CREATE, - "GetProcAddr: Failed to init VK_EXT_debug_utils\n" - "GetProcAddr: Failure"); - } - - err = CreateDebugUtilsMessengerEXT(inst, &dbg_messenger_create_info, nullptr, &dbg_messenger); - switch (err) { - case VK_SUCCESS: - break; - case VK_ERROR_OUT_OF_HOST_MEMORY: - ERR_FAIL_V_MSG(ERR_CANT_CREATE, - "CreateDebugUtilsMessengerEXT: out of host memory\n" - "CreateDebugUtilsMessengerEXT Failure"); - break; - default: - ERR_FAIL_V_MSG(ERR_CANT_CREATE, - "CreateDebugUtilsMessengerEXT: unknown failure\n" - "CreateDebugUtilsMessengerEXT Failure"); - ERR_FAIL_V(ERR_CANT_CREATE); - break; - } - } else if (is_instance_extension_enabled(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { - CreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkCreateDebugReportCallbackEXT"); - DebugReportMessageEXT = (PFN_vkDebugReportMessageEXT)vkGetInstanceProcAddr(inst, "vkDebugReportMessageEXT"); - DestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkDestroyDebugReportCallbackEXT"); - - if (nullptr == CreateDebugReportCallbackEXT || nullptr == DebugReportMessageEXT || nullptr == DestroyDebugReportCallbackEXT) { - ERR_FAIL_V_MSG(ERR_CANT_CREATE, - "GetProcAddr: Failed to init VK_EXT_debug_report\n" - "GetProcAddr: Failure"); - } - - err = CreateDebugReportCallbackEXT(inst, &dbg_report_callback_create_info, nullptr, &dbg_debug_report); - switch (err) { - case VK_SUCCESS: - break; - case VK_ERROR_OUT_OF_HOST_MEMORY: - ERR_FAIL_V_MSG(ERR_CANT_CREATE, - "CreateDebugReportCallbackEXT: out of host memory\n" - "CreateDebugReportCallbackEXT Failure"); - break; - default: - ERR_FAIL_V_MSG(ERR_CANT_CREATE, - "CreateDebugReportCallbackEXT: unknown failure\n" - "CreateDebugReportCallbackEXT Failure"); - ERR_FAIL_V(ERR_CANT_CREATE); - break; - } - } - - return OK; -} - -Error VulkanContext::_create_physical_device(VkSurfaceKHR p_surface) { - // Make initial call to query gpu_count, then second call for gpu info. - uint32_t gpu_count = 0; - VkResult err = vkEnumeratePhysicalDevices(inst, &gpu_count, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - ERR_FAIL_COND_V_MSG(gpu_count == 0, ERR_CANT_CREATE, - "vkEnumeratePhysicalDevices reported zero accessible devices.\n\n" - "Do you have a compatible Vulkan installable client driver (ICD) installed?\n" - "vkEnumeratePhysicalDevices Failure"); - - VkPhysicalDevice *physical_devices = (VkPhysicalDevice *)malloc(sizeof(VkPhysicalDevice) * gpu_count); - err = vkEnumeratePhysicalDevices(inst, &gpu_count, physical_devices); - if (err) { - free(physical_devices); - ERR_FAIL_V(ERR_CANT_CREATE); - } - - static const struct { - uint32_t id; - const char *name; - } vendor_names[] = { - { 0x1002, "AMD" }, - { 0x1010, "ImgTec" }, - { 0x106B, "Apple" }, - { 0x10DE, "NVIDIA" }, - { 0x13B5, "ARM" }, - { 0x5143, "Qualcomm" }, - { 0x8086, "Intel" }, - { 0, nullptr }, - }; - - int32_t device_index = -1; - if (vulkan_hooks) { - if (!vulkan_hooks->get_physical_device(&gpu)) { - return ERR_CANT_CREATE; - } - - // Not really needed but nice to print the correct entry. - for (uint32_t i = 0; i < gpu_count; ++i) { - if (physical_devices[i] == gpu) { - device_index = i; - break; - } - } - } else { - // TODO: At least on Linux Laptops integrated GPUs fail with Vulkan in many instances. - // The device should really be a preference, but for now choosing a discrete GPU over the - // integrated one is better than the default. - - int type_selected = -1; - print_verbose("Vulkan devices:"); - for (uint32_t i = 0; i < gpu_count; ++i) { - VkPhysicalDeviceProperties props; - vkGetPhysicalDeviceProperties(physical_devices[i], &props); - - bool present_supported = false; - - if (p_surface) { - uint32_t device_queue_family_count = 0; - vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &device_queue_family_count, nullptr); - VkQueueFamilyProperties *device_queue_props = (VkQueueFamilyProperties *)malloc(device_queue_family_count * sizeof(VkQueueFamilyProperties)); - vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &device_queue_family_count, device_queue_props); - for (uint32_t j = 0; j < device_queue_family_count; j++) { - if ((device_queue_props[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) { - VkBool32 supports; - err = vkGetPhysicalDeviceSurfaceSupportKHR( - physical_devices[i], j, p_surface, &supports); - if (err == VK_SUCCESS && supports) { - present_supported = true; - } else { - continue; - } - } - } - free(device_queue_props); - } - String name = String::utf8(props.deviceName); - String vendor = "Unknown"; - String dev_type; - switch (props.deviceType) { - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: { - dev_type = "Discrete"; - } break; - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: { - dev_type = "Integrated"; - } break; - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: { - dev_type = "Virtual"; - } break; - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_CPU: { - dev_type = "CPU"; - } break; - default: { - dev_type = "Other"; - } break; - } - uint32_t vendor_idx = 0; - while (vendor_names[vendor_idx].name != nullptr) { - if (props.vendorID == vendor_names[vendor_idx].id) { - vendor = vendor_names[vendor_idx].name; - break; - } - vendor_idx++; - } - print_verbose(" #" + itos(i) + ": " + vendor + " " + name + " - " + (present_supported ? "Supported" : "Unsupported") + ", " + dev_type); - - if (present_supported || !p_surface) { // Select first supported device of preferred type: Discrete > Integrated > Virtual > CPU > Other. - switch (props.deviceType) { - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: { - if (type_selected < 4) { - type_selected = 4; - device_index = i; - } - } break; - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: { - if (type_selected < 3) { - type_selected = 3; - device_index = i; - } - } break; - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: { - if (type_selected < 2) { - type_selected = 2; - device_index = i; - } - } break; - case VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_CPU: { - if (type_selected < 1) { - type_selected = 1; - device_index = i; - } - } break; - default: { - if (type_selected < 0) { - type_selected = 0; - device_index = i; - } - } break; - } - } - } - - int32_t user_device_index = Engine::get_singleton()->get_gpu_index(); // Force user selected GPU. - if (user_device_index >= 0 && user_device_index < (int32_t)gpu_count) { - device_index = user_device_index; - } - - ERR_FAIL_COND_V_MSG(device_index == -1, ERR_CANT_CREATE, "None of Vulkan devices supports both graphics and present queues."); - - gpu = physical_devices[device_index]; - } - - free(physical_devices); - - // Get identifier properties. - vkGetPhysicalDeviceProperties(gpu, &gpu_props); - - device_name = String::utf8(gpu_props.deviceName); - device_type = gpu_props.deviceType; - pipeline_cache_id = String::hex_encode_buffer(gpu_props.pipelineCacheUUID, VK_UUID_SIZE); - pipeline_cache_id += "-driver-" + itos(gpu_props.driverVersion); - { - device_vendor = "Unknown"; - uint32_t vendor_idx = 0; - while (vendor_names[vendor_idx].name != nullptr) { - if (gpu_props.vendorID == vendor_names[vendor_idx].id) { - device_vendor = vendor_names[vendor_idx].name; - break; - } - vendor_idx++; - } - } - - // Get device version - device_api_version = gpu_props.apiVersion; - - String rendering_method; - if (OS::get_singleton()->get_current_rendering_method() == "mobile") { - rendering_method = "Forward Mobile"; - } else { - rendering_method = "Forward+"; - } - - // Output our device version - print_line(vformat("Vulkan API %s - %s - Using Vulkan Device #%d: %s - %s", get_device_api_version(), rendering_method, device_index, device_vendor, device_name)); - - { - Error _err = _initialize_device_extensions(); - if (_err != OK) { - return _err; - } - } - - // Call with nullptr data to get count. - vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr); - ERR_FAIL_COND_V(queue_family_count == 0, ERR_CANT_CREATE); - - queue_props = (VkQueueFamilyProperties *)malloc(queue_family_count * sizeof(VkQueueFamilyProperties)); - vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, queue_props); - // Query fine-grained feature support for this device. - // If app has specific feature requirements it should check supported - // features based on this query - VkPhysicalDeviceFeatures features = {}; - vkGetPhysicalDeviceFeatures(gpu, &features); - - // Check required features and abort if any of them is missing. - if (!features.imageCubeArray || !features.independentBlend) { - String error_string = vformat("Your GPU (%s) does not support the following features which are required to use Vulkan-based renderers in Godot:\n\n", device_name); - if (!features.imageCubeArray) { - error_string += "- No support for image cube arrays.\n"; - } - if (!features.independentBlend) { - error_string += "- No support for independentBlend.\n"; - } - error_string += "\nThis is usually a hardware limitation, so updating graphics drivers won't help in most cases."; - -#if defined(ANDROID_ENABLED) || defined(IOS_ENABLED) - // Android/iOS platform ports currently don't exit themselves when this method returns `ERR_CANT_CREATE`. - OS::get_singleton()->alert(error_string + "\nClick OK to exit (black screen will be visible)."); -#else - OS::get_singleton()->alert(error_string + "\nClick OK to exit."); -#endif - - return ERR_CANT_CREATE; - } - - memset(&physical_device_features, 0, sizeof(physical_device_features)); -#define VK_DEVICEFEATURE_ENABLE_IF(x) \ - if (features.x) { \ - physical_device_features.x = features.x; \ - } else \ - ((void)0) - - // - // Opt-in to the features we actually need/use. These can be changed in the future. - // We do this for multiple reasons: - // - // 1. Certain features (like sparse* stuff) cause unnecessary internal driver allocations. - // 2. Others like shaderStorageImageMultisample are a huge red flag - // (MSAA + Storage is rarely needed). - // 3. Most features when turned off aren't actually off (we just promise the driver not to use them) - // and it is validation what will complain. This allows us to target a minimum baseline. - // - // TODO: Allow the user to override these settings (i.e. turn off more stuff) using profiles - // so they can target a broad range of HW. For example Mali HW does not have - // shaderClipDistance/shaderCullDistance; thus validation would complain if such feature is used; - // allowing them to fix the problem without even owning Mali HW to test on. - // - - // Turn off robust buffer access, which can hamper performance on some hardware. - //VK_DEVICEFEATURE_ENABLE_IF(robustBufferAccess); - VK_DEVICEFEATURE_ENABLE_IF(fullDrawIndexUint32); - VK_DEVICEFEATURE_ENABLE_IF(imageCubeArray); - VK_DEVICEFEATURE_ENABLE_IF(independentBlend); - VK_DEVICEFEATURE_ENABLE_IF(geometryShader); - VK_DEVICEFEATURE_ENABLE_IF(tessellationShader); - VK_DEVICEFEATURE_ENABLE_IF(sampleRateShading); - VK_DEVICEFEATURE_ENABLE_IF(dualSrcBlend); - VK_DEVICEFEATURE_ENABLE_IF(logicOp); - VK_DEVICEFEATURE_ENABLE_IF(multiDrawIndirect); - VK_DEVICEFEATURE_ENABLE_IF(drawIndirectFirstInstance); - VK_DEVICEFEATURE_ENABLE_IF(depthClamp); - VK_DEVICEFEATURE_ENABLE_IF(depthBiasClamp); - VK_DEVICEFEATURE_ENABLE_IF(fillModeNonSolid); - VK_DEVICEFEATURE_ENABLE_IF(depthBounds); - VK_DEVICEFEATURE_ENABLE_IF(wideLines); - VK_DEVICEFEATURE_ENABLE_IF(largePoints); - VK_DEVICEFEATURE_ENABLE_IF(alphaToOne); - VK_DEVICEFEATURE_ENABLE_IF(multiViewport); - VK_DEVICEFEATURE_ENABLE_IF(samplerAnisotropy); - VK_DEVICEFEATURE_ENABLE_IF(textureCompressionETC2); - VK_DEVICEFEATURE_ENABLE_IF(textureCompressionASTC_LDR); - VK_DEVICEFEATURE_ENABLE_IF(textureCompressionBC); - //VK_DEVICEFEATURE_ENABLE_IF(occlusionQueryPrecise); - //VK_DEVICEFEATURE_ENABLE_IF(pipelineStatisticsQuery); - VK_DEVICEFEATURE_ENABLE_IF(vertexPipelineStoresAndAtomics); - VK_DEVICEFEATURE_ENABLE_IF(fragmentStoresAndAtomics); - VK_DEVICEFEATURE_ENABLE_IF(shaderTessellationAndGeometryPointSize); - VK_DEVICEFEATURE_ENABLE_IF(shaderImageGatherExtended); - VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageExtendedFormats); - // Intel Arc doesn't support shaderStorageImageMultisample (yet? could be a driver thing), so it's - // better for Validation to scream at us if we use it. Furthermore MSAA Storage is a huge red flag - // for performance. - //VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageMultisample); - VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageReadWithoutFormat); - VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageWriteWithoutFormat); - VK_DEVICEFEATURE_ENABLE_IF(shaderUniformBufferArrayDynamicIndexing); - VK_DEVICEFEATURE_ENABLE_IF(shaderSampledImageArrayDynamicIndexing); - VK_DEVICEFEATURE_ENABLE_IF(shaderStorageBufferArrayDynamicIndexing); - VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageArrayDynamicIndexing); - VK_DEVICEFEATURE_ENABLE_IF(shaderClipDistance); - VK_DEVICEFEATURE_ENABLE_IF(shaderCullDistance); - VK_DEVICEFEATURE_ENABLE_IF(shaderFloat64); - VK_DEVICEFEATURE_ENABLE_IF(shaderInt64); - VK_DEVICEFEATURE_ENABLE_IF(shaderInt16); - //VK_DEVICEFEATURE_ENABLE_IF(shaderResourceResidency); - VK_DEVICEFEATURE_ENABLE_IF(shaderResourceMinLod); - // We don't use sparse features and enabling them cause extra internal - // allocations inside the Vulkan driver we don't need. - //VK_DEVICEFEATURE_ENABLE_IF(sparseBinding); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyBuffer); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyImage2D); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyImage3D); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency2Samples); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency4Samples); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency8Samples); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency16Samples); - //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyAliased); - VK_DEVICEFEATURE_ENABLE_IF(variableMultisampleRate); - //VK_DEVICEFEATURE_ENABLE_IF(inheritedQueries); - -#define GET_INSTANCE_PROC_ADDR(inst, entrypoint) \ - { \ - fp##entrypoint = (PFN_vk##entrypoint)vkGetInstanceProcAddr(inst, "vk" #entrypoint); \ - ERR_FAIL_NULL_V_MSG(fp##entrypoint, ERR_CANT_CREATE, \ - "vkGetInstanceProcAddr failed to find vk" #entrypoint); \ - } - - GET_INSTANCE_PROC_ADDR(inst, GetPhysicalDeviceSurfaceSupportKHR); - GET_INSTANCE_PROC_ADDR(inst, GetPhysicalDeviceSurfaceCapabilitiesKHR); - GET_INSTANCE_PROC_ADDR(inst, GetPhysicalDeviceSurfaceFormatsKHR); - GET_INSTANCE_PROC_ADDR(inst, GetPhysicalDeviceSurfacePresentModesKHR); - GET_INSTANCE_PROC_ADDR(inst, GetSwapchainImagesKHR); - - // Gets capability info for current Vulkan driver. - { - Error res = _check_capabilities(); - if (res != OK) { - return res; - } - } - - device_initialized = true; - return OK; -} - -Error VulkanContext::_create_device(VkDevice &r_vk_device) { - VkResult err; - float queue_priorities[1] = { 0.0 }; - VkDeviceQueueCreateInfo queues[2]; - queues[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queues[0].pNext = nullptr; - queues[0].queueFamilyIndex = graphics_queue_family_index; - queues[0].queueCount = 1; - queues[0].pQueuePriorities = queue_priorities; - queues[0].flags = 0; - - // Before we retrieved what is supported, here we tell Vulkan we want to enable these features using the same structs. - void *nextptr = nullptr; - - VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = { - /*sType*/ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, - /*pNext*/ nextptr, - /*shaderFloat16*/ shader_capabilities.shader_float16_is_supported, - /*shaderInt8*/ shader_capabilities.shader_int8_is_supported, - }; - nextptr = &shader_features; - - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; - if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { - // Insert into our chain to enable these features if they are available. - vrs_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; - vrs_features.pNext = nextptr; - vrs_features.pipelineFragmentShadingRate = vrs_capabilities.pipeline_vrs_supported; - vrs_features.primitiveFragmentShadingRate = vrs_capabilities.primitive_vrs_supported; - vrs_features.attachmentFragmentShadingRate = vrs_capabilities.attachment_vrs_supported; - - nextptr = &vrs_features; - } - - VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; - if (pipeline_cache_control_support) { - pipeline_cache_control_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES; - pipeline_cache_control_features.pNext = nextptr; - pipeline_cache_control_features.pipelineCreationCacheControl = pipeline_cache_control_support; - - nextptr = &pipeline_cache_control_features; - } - - VkPhysicalDeviceVulkan11Features vulkan11features = {}; - VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; - VkPhysicalDeviceMultiviewFeatures multiview_features = {}; - if (device_api_version >= VK_API_VERSION_1_2) { - // In Vulkan 1.2 and newer we use a newer struct to enable various features. - - vulkan11features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - vulkan11features.pNext = nextptr; - vulkan11features.storageBuffer16BitAccess = storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; - vulkan11features.uniformAndStorageBuffer16BitAccess = storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported; - vulkan11features.storagePushConstant16 = storage_buffer_capabilities.storage_push_constant_16_is_supported; - vulkan11features.storageInputOutput16 = storage_buffer_capabilities.storage_input_output_16; - vulkan11features.multiview = multiview_capabilities.is_supported; - vulkan11features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; - vulkan11features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; - vulkan11features.variablePointersStorageBuffer = 0; - vulkan11features.variablePointers = 0; - vulkan11features.protectedMemory = 0; - vulkan11features.samplerYcbcrConversion = 0; - vulkan11features.shaderDrawParameters = 0; - nextptr = &vulkan11features; - } else { - // On Vulkan 1.0 and 1.1 we use our older structs to initialize these features. - storage_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; - storage_feature.pNext = nextptr; - storage_feature.storageBuffer16BitAccess = storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; - storage_feature.uniformAndStorageBuffer16BitAccess = storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported; - storage_feature.storagePushConstant16 = storage_buffer_capabilities.storage_push_constant_16_is_supported; - storage_feature.storageInputOutput16 = storage_buffer_capabilities.storage_input_output_16; - nextptr = &storage_feature; - - if (device_api_version >= VK_API_VERSION_1_1) { // any Vulkan 1.1.x version - multiview_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES; - multiview_features.pNext = nextptr; - multiview_features.multiview = multiview_capabilities.is_supported; - multiview_features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; - multiview_features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; - nextptr = &multiview_features; - } - } - - uint32_t enabled_extension_count = 0; - const char *enabled_extension_names[MAX_EXTENSIONS]; - ERR_FAIL_COND_V(enabled_device_extension_names.size() > MAX_EXTENSIONS, ERR_CANT_CREATE); - for (const CharString &extension_name : enabled_device_extension_names) { - enabled_extension_names[enabled_extension_count++] = extension_name.ptr(); - } - - VkDeviceCreateInfo sdevice = { - /*sType*/ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - /*pNext*/ nextptr, - /*flags*/ 0, - /*queueCreateInfoCount*/ 1, - /*pQueueCreateInfos*/ queues, - /*enabledLayerCount*/ 0, - /*ppEnabledLayerNames*/ nullptr, - /*enabledExtensionCount*/ enabled_extension_count, - /*ppEnabledExtensionNames*/ (const char *const *)enabled_extension_names, - /*pEnabledFeatures*/ &physical_device_features, // If specific features are required, pass them in here. - }; - if (separate_present_queue) { - queues[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queues[1].pNext = nullptr; - queues[1].queueFamilyIndex = present_queue_family_index; - queues[1].queueCount = 1; - queues[1].pQueuePriorities = queue_priorities; - queues[1].flags = 0; - sdevice.queueCreateInfoCount = 2; - } - - if (vulkan_hooks) { - if (!vulkan_hooks->create_vulkan_device(&sdevice, &r_vk_device)) { - return ERR_CANT_CREATE; - } - } else { - err = vkCreateDevice(gpu, &sdevice, nullptr, &r_vk_device); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - } - - return OK; -} - -Error VulkanContext::_initialize_queues(VkSurfaceKHR p_surface) { - // Iterate over each queue to learn whether it supports presenting: - VkBool32 *supportsPresent = nullptr; - - if (p_surface) { - supportsPresent = (VkBool32 *)malloc(queue_family_count * sizeof(VkBool32)); - for (uint32_t i = 0; i < queue_family_count; i++) { - fpGetPhysicalDeviceSurfaceSupportKHR(gpu, i, p_surface, &supportsPresent[i]); - } - } - - // Search for a graphics and a present queue in the array of queue - // families, try to find one that supports both. - uint32_t graphicsQueueFamilyIndex = UINT32_MAX; - uint32_t presentQueueFamilyIndex = UINT32_MAX; - for (uint32_t i = 0; i < queue_family_count; i++) { - if ((queue_props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) { - if (graphicsQueueFamilyIndex == UINT32_MAX) { - graphicsQueueFamilyIndex = i; - } - - if (p_surface && supportsPresent[i] == VK_TRUE) { - graphicsQueueFamilyIndex = i; - presentQueueFamilyIndex = i; - break; - } - } - } - - if (p_surface && presentQueueFamilyIndex == UINT32_MAX) { - // If didn't find a queue that supports both graphics and present, then - // find a separate present queue. - for (uint32_t i = 0; i < queue_family_count; ++i) { - if (supportsPresent[i] == VK_TRUE) { - presentQueueFamilyIndex = i; - break; - } - } - } - - if (p_surface) { - free(supportsPresent); - - // Generate error if could not find both a graphics and a present queue. - ERR_FAIL_COND_V_MSG(graphicsQueueFamilyIndex == UINT32_MAX || presentQueueFamilyIndex == UINT32_MAX, ERR_CANT_CREATE, - "Could not find both graphics and present queues\n"); - - graphics_queue_family_index = graphicsQueueFamilyIndex; - present_queue_family_index = presentQueueFamilyIndex; - separate_present_queue = (graphics_queue_family_index != present_queue_family_index); - } else { - graphics_queue_family_index = graphicsQueueFamilyIndex; - } - - _create_device(device); - driver = memnew(RenderingDeviceDriverVulkan(this, device)); - - static PFN_vkGetDeviceProcAddr g_gdpa = nullptr; -#define GET_DEVICE_PROC_ADDR(dev, entrypoint) \ - { \ - if (!g_gdpa) \ - g_gdpa = (PFN_vkGetDeviceProcAddr)vkGetInstanceProcAddr(inst, "vkGetDeviceProcAddr"); \ - fp##entrypoint = (PFN_vk##entrypoint)g_gdpa(dev, "vk" #entrypoint); \ - ERR_FAIL_NULL_V_MSG(fp##entrypoint, ERR_CANT_CREATE, \ - "vkGetDeviceProcAddr failed to find vk" #entrypoint); \ - } - - GET_DEVICE_PROC_ADDR(device, CreateSwapchainKHR); - GET_DEVICE_PROC_ADDR(device, DestroySwapchainKHR); - GET_DEVICE_PROC_ADDR(device, GetSwapchainImagesKHR); - GET_DEVICE_PROC_ADDR(device, AcquireNextImageKHR); - GET_DEVICE_PROC_ADDR(device, QueuePresentKHR); - if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { - GET_DEVICE_PROC_ADDR(device, GetRefreshCycleDurationGOOGLE); - GET_DEVICE_PROC_ADDR(device, GetPastPresentationTimingGOOGLE); - } - - vkGetDeviceQueue(device, graphics_queue_family_index, 0, &graphics_queue); - - if (p_surface) { - if (!separate_present_queue) { - present_queue = graphics_queue; - } else { - vkGetDeviceQueue(device, present_queue_family_index, 0, &present_queue); - } - - // Get the list of VkFormat's that are supported: - uint32_t formatCount; - VkResult err = fpGetPhysicalDeviceSurfaceFormatsKHR(gpu, p_surface, &formatCount, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - VkSurfaceFormatKHR *surfFormats = (VkSurfaceFormatKHR *)malloc(formatCount * sizeof(VkSurfaceFormatKHR)); - err = fpGetPhysicalDeviceSurfaceFormatsKHR(gpu, p_surface, &formatCount, surfFormats); - if (err) { - free(surfFormats); - ERR_FAIL_V(ERR_CANT_CREATE); - } - // If the format list includes just one entry of VK_FORMAT_UNDEFINED, - // the surface has no preferred format. Otherwise, at least one - // supported format will be returned. - if (formatCount == 1 && surfFormats[0].format == VK_FORMAT_UNDEFINED) { - format = VK_FORMAT_B8G8R8A8_UNORM; - color_space = surfFormats[0].colorSpace; - } else { - // These should be ordered with the ones we want to use on top and fallback modes further down - // we want a 32bit RGBA unsigned normalized buffer or similar. - const VkFormat allowed_formats[] = { - VK_FORMAT_B8G8R8A8_UNORM, - VK_FORMAT_R8G8B8A8_UNORM - }; - uint32_t allowed_formats_count = sizeof(allowed_formats) / sizeof(VkFormat); - - if (formatCount < 1) { - free(surfFormats); - ERR_FAIL_V_MSG(ERR_CANT_CREATE, "formatCount less than 1"); - } - - // Find the first format that we support. - format = VK_FORMAT_UNDEFINED; - for (uint32_t af = 0; af < allowed_formats_count && format == VK_FORMAT_UNDEFINED; af++) { - for (uint32_t sf = 0; sf < formatCount && format == VK_FORMAT_UNDEFINED; sf++) { - if (surfFormats[sf].format == allowed_formats[af]) { - format = surfFormats[sf].format; - color_space = surfFormats[sf].colorSpace; - } - } - } - - if (format == VK_FORMAT_UNDEFINED) { - free(surfFormats); - ERR_FAIL_V_MSG(ERR_CANT_CREATE, "No usable surface format found."); - } - } - - free(surfFormats); - } - - Error serr = _create_semaphores(); - if (serr) { - return serr; - } - - queues_initialized = true; - return OK; -} - -Error VulkanContext::_create_semaphores() { - VkResult err; - - // Create semaphores to synchronize acquiring presentable buffers before - // rendering and waiting for drawing to be complete before presenting. - VkSemaphoreCreateInfo semaphoreCreateInfo = { - /*sType*/ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - /*pNext*/ nullptr, - /*flags*/ 0, - }; - - // Create fences that we can use to throttle if we get too far - // ahead of the image presents. - VkFenceCreateInfo fence_ci = { - /*sType*/ VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - /*pNext*/ nullptr, - /*flags*/ VK_FENCE_CREATE_SIGNALED_BIT - }; - for (uint32_t i = 0; i < FRAME_LAG; i++) { - err = vkCreateFence(device, &fence_ci, nullptr, &fences[i]); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - err = vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &draw_complete_semaphores[i]); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - if (separate_present_queue) { - err = vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &image_ownership_semaphores[i]); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - } - } - frame_index = 0; - - // Get Memory information and properties. - vkGetPhysicalDeviceMemoryProperties(gpu, &memory_properties); - - return OK; -} - -bool VulkanContext::_use_validation_layers() { - return Engine::get_singleton()->is_validation_layers_enabled(); -} - -VkExtent2D VulkanContext::_compute_swapchain_extent(const VkSurfaceCapabilitiesKHR &p_surf_capabilities, int *p_window_width, int *p_window_height) const { - // Width and height are either both 0xFFFFFFFF, or both not 0xFFFFFFFF. - if (p_surf_capabilities.currentExtent.width == 0xFFFFFFFF) { - // If the surface size is undefined, the size is set to the size - // of the images requested, which must fit within the minimum and - // maximum values. - VkExtent2D extent = {}; - extent.width = CLAMP((uint32_t)(*p_window_width), p_surf_capabilities.minImageExtent.width, p_surf_capabilities.maxImageExtent.width); - extent.height = CLAMP((uint32_t)(*p_window_height), p_surf_capabilities.minImageExtent.height, p_surf_capabilities.maxImageExtent.height); - return extent; - } else { - // If the surface size is defined, the swap chain size must match. - *p_window_width = p_surf_capabilities.currentExtent.width; - *p_window_height = p_surf_capabilities.currentExtent.height; - return p_surf_capabilities.currentExtent; - } -} - -Error VulkanContext::_window_create(DisplayServer::WindowID p_window_id, DisplayServer::VSyncMode p_vsync_mode, VkSurfaceKHR p_surface, int p_width, int p_height) { - ERR_FAIL_NULL_V_MSG(_get_platform_surface_extension(), ERR_UNAVAILABLE, "This Vulkan context is headless."); - - ERR_FAIL_COND_V(windows.has(p_window_id), ERR_INVALID_PARAMETER); - - if (!device_initialized) { - Error err = _create_physical_device(p_surface); - ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); - } - - if (!queues_initialized) { - // We use a single GPU, but we need a surface to initialize the - // queues, so this process must be deferred until a surface - // is created. - Error err = _initialize_queues(p_surface); - ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); - } - - Window window; - window.surface = p_surface; - window.width = p_width; - window.height = p_height; - window.vsync_mode = p_vsync_mode; - Error err = _update_swap_chain(&window); - ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); - - windows[p_window_id] = window; - return OK; -} - -void VulkanContext::window_resize(DisplayServer::WindowID p_window, int p_width, int p_height) { - ERR_FAIL_COND(!windows.has(p_window)); - windows[p_window].width = p_width; - windows[p_window].height = p_height; - _update_swap_chain(&windows[p_window]); -} - -int VulkanContext::window_get_width(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), -1); - return windows[p_window].width; -} - -int VulkanContext::window_get_height(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), -1); - return windows[p_window].height; -} - -bool VulkanContext::window_is_valid_swapchain(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), false); - Window *w = &windows[p_window]; - return w->swapchain_image_resources != VK_NULL_HANDLE; -} - -RDD::RenderPassID VulkanContext::window_get_render_pass(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), RDD::RenderPassID()); - Window *w = &windows[p_window]; - return (RDD::RenderPassID)w->render_pass; -} - -RDD::FramebufferID VulkanContext::window_get_framebuffer(DisplayServer::WindowID p_window) { - ERR_FAIL_COND_V(!windows.has(p_window), RDD::FramebufferID()); - ERR_FAIL_COND_V(!buffers_prepared, RDD::FramebufferID()); - Window *w = &windows[p_window]; - if (w->swapchain_image_resources != VK_NULL_HANDLE) { - return (RDD::FramebufferID)w->swapchain_image_resources[w->current_buffer].framebuffer; - } else { - return RDD::FramebufferID(); - } -} - -void VulkanContext::window_destroy(DisplayServer::WindowID p_window_id) { - ERR_FAIL_COND(!windows.has(p_window_id)); - _clean_up_swap_chain(&windows[p_window_id]); - - vkDestroySurfaceKHR(inst, windows[p_window_id].surface, nullptr); - windows.erase(p_window_id); -} - -Error VulkanContext::_clean_up_swap_chain(Window *window) { - if (!window->swapchain) { - return OK; - } - vkDeviceWaitIdle(device); - - // This destroys images associated it seems. - fpDestroySwapchainKHR(device, window->swapchain, nullptr); - window->swapchain = VK_NULL_HANDLE; - vkDestroyRenderPass(device, window->render_pass, nullptr); - window->render_pass = VK_NULL_HANDLE; - if (window->swapchain_image_resources) { - for (uint32_t i = 0; i < swapchainImageCount; i++) { - vkDestroyImageView(device, window->swapchain_image_resources[i].view, nullptr); - vkDestroyFramebuffer(device, window->swapchain_image_resources[i].framebuffer, nullptr); - } - - free(window->swapchain_image_resources); - window->swapchain_image_resources = nullptr; - swapchainImageCount = 0; - } - if (separate_present_queue) { - vkDestroyCommandPool(device, window->present_cmd_pool, nullptr); - } - - for (uint32_t i = 0; i < FRAME_LAG; i++) { - // Destroy the semaphores now (we'll re-create it later if we have to). - // We must do this because the semaphore cannot be reused if it's in a signaled state - // (which happens if vkAcquireNextImageKHR returned VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR) - // The only way to reset it would be to present the swapchain... the one we just destroyed. - // And the API has no way to "unsignal" the semaphore. - vkDestroySemaphore(device, window->image_acquired_semaphores[i], nullptr); - window->image_acquired_semaphores[i] = 0; - } - - return OK; -} - -Error VulkanContext::_update_swap_chain(Window *window) { - VkResult err; - - if (window->swapchain) { - _clean_up_swap_chain(window); - } - - // Check the surface capabilities and formats. - VkSurfaceCapabilitiesKHR surfCapabilities; - err = fpGetPhysicalDeviceSurfaceCapabilitiesKHR(gpu, window->surface, &surfCapabilities); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - { - VkBool32 supports = VK_FALSE; - err = vkGetPhysicalDeviceSurfaceSupportKHR( - gpu, present_queue_family_index, window->surface, &supports); - ERR_FAIL_COND_V_MSG(err != VK_SUCCESS || supports == false, ERR_CANT_CREATE, - "Window's surface is not supported by device. Did the GPU go offline? Was the window " - "created on another monitor? Check previous errors & try launching with " - "--gpu-validation."); - } - - uint32_t presentModeCount; - err = fpGetPhysicalDeviceSurfacePresentModesKHR(gpu, window->surface, &presentModeCount, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - VkPresentModeKHR *presentModes = (VkPresentModeKHR *)malloc(presentModeCount * sizeof(VkPresentModeKHR)); - ERR_FAIL_NULL_V(presentModes, ERR_CANT_CREATE); - err = fpGetPhysicalDeviceSurfacePresentModesKHR(gpu, window->surface, &presentModeCount, presentModes); - if (err) { - free(presentModes); - ERR_FAIL_V(ERR_CANT_CREATE); - } - - VkExtent2D swapchainExtent = _compute_swapchain_extent(surfCapabilities, &window->width, &window->height); - - if (window->width == 0 || window->height == 0) { - free(presentModes); - // Likely window minimized, no swapchain created. - return ERR_SKIP; - } - // The FIFO present mode is guaranteed by the spec to be supported - // and to have no tearing. It's a great default present mode to use. - - // There are times when you may wish to use another present mode. The - // following code shows how to select them, and the comments provide some - // reasons you may wish to use them. - // - // It should be noted that Vulkan 1.0 doesn't provide a method for - // synchronizing rendering with the presentation engine's display. There - // is a method provided for throttling rendering with the display, but - // there are some presentation engines for which this method will not work. - // If an application doesn't throttle its rendering, and if it renders much - // faster than the refresh rate of the display, this can waste power on - // mobile devices. That is because power is being spent rendering images - // that may never be seen. - - // VK_PRESENT_MODE_IMMEDIATE_KHR is for applications that don't care about - // tearing, or have some way of synchronizing their rendering with the - // display. - // VK_PRESENT_MODE_MAILBOX_KHR may be useful for applications that - // generally render a new presentable image every refresh cycle, but are - // occasionally early. In this case, the application wants the new image - // to be displayed instead of the previously-queued-for-presentation image - // that has not yet been displayed. - // VK_PRESENT_MODE_FIFO_RELAXED_KHR is for applications that generally - // render a new presentable image every refresh cycle, but are occasionally - // late. In this case (perhaps because of stuttering/latency concerns), - // the application wants the late image to be immediately displayed, even - // though that may mean some tearing. - - VkPresentModeKHR requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR; - switch (window->vsync_mode) { - case DisplayServer::VSYNC_MAILBOX: - requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_MAILBOX_KHR; - break; - case DisplayServer::VSYNC_ADAPTIVE: - requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_RELAXED_KHR; - break; - case DisplayServer::VSYNC_ENABLED: - requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR; - break; - case DisplayServer::VSYNC_DISABLED: - requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_IMMEDIATE_KHR; - break; - } - - // Check if the requested mode is available. - bool present_mode_available = false; - for (uint32_t i = 0; i < presentModeCount; i++) { - if (presentModes[i] == requested_present_mode) { - present_mode_available = true; - } - } - - // Set the windows present mode if it is available, otherwise FIFO is used (guaranteed supported). - if (present_mode_available) { - if (window->presentMode != requested_present_mode) { - window->presentMode = requested_present_mode; - print_verbose("Using present mode: " + String(string_VkPresentModeKHR(window->presentMode))); - } - } else { - String present_mode_string; - switch (window->vsync_mode) { - case DisplayServer::VSYNC_MAILBOX: - present_mode_string = "Mailbox"; - break; - case DisplayServer::VSYNC_ADAPTIVE: - present_mode_string = "Adaptive"; - break; - case DisplayServer::VSYNC_ENABLED: - present_mode_string = "Enabled"; - break; - case DisplayServer::VSYNC_DISABLED: - present_mode_string = "Disabled"; - break; - } - WARN_PRINT(vformat("The requested V-Sync mode %s is not available. Falling back to V-Sync mode Enabled.", present_mode_string)); - window->vsync_mode = DisplayServer::VSYNC_ENABLED; // Set to default. - } - - free(presentModes); - - // Determine the number of VkImages to use in the swap chain. - // Application desires to acquire 3 images at a time for triple - // buffering. - uint32_t desiredNumOfSwapchainImages = 3; - if (desiredNumOfSwapchainImages < surfCapabilities.minImageCount) { - desiredNumOfSwapchainImages = surfCapabilities.minImageCount; - } - // If maxImageCount is 0, we can ask for as many images as we want; - // otherwise we're limited to maxImageCount. - if ((surfCapabilities.maxImageCount > 0) && (desiredNumOfSwapchainImages > surfCapabilities.maxImageCount)) { - // Application must settle for fewer images than desired. - desiredNumOfSwapchainImages = surfCapabilities.maxImageCount; - } - - VkSurfaceTransformFlagsKHR preTransform; - if (surfCapabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) { - preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - } else { - preTransform = surfCapabilities.currentTransform; - } - - VkCompositeAlphaFlagBitsKHR compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - - if (OS::get_singleton()->is_layered_allowed() || !(surfCapabilities.supportedCompositeAlpha & compositeAlpha)) { - // Find a supported composite alpha mode - one of these is guaranteed to be set. - VkCompositeAlphaFlagBitsKHR compositeAlphaFlags[4] = { - VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, - VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, - VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, - }; - - for (uint32_t i = 0; i < ARRAY_SIZE(compositeAlphaFlags); i++) { - if (surfCapabilities.supportedCompositeAlpha & compositeAlphaFlags[i]) { - compositeAlpha = compositeAlphaFlags[i]; - break; - } - } - } - - VkSwapchainCreateInfoKHR swapchain_ci = { - /*sType*/ VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, - /*pNext*/ nullptr, - /*flags*/ 0, - /*surface*/ window->surface, - /*minImageCount*/ desiredNumOfSwapchainImages, - /*imageFormat*/ format, - /*imageColorSpace*/ color_space, - /*imageExtent*/ { - /*width*/ swapchainExtent.width, - /*height*/ swapchainExtent.height, - }, - /*imageArrayLayers*/ 1, - /*imageUsage*/ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - /*imageSharingMode*/ VK_SHARING_MODE_EXCLUSIVE, - /*queueFamilyIndexCount*/ 0, - /*pQueueFamilyIndices*/ nullptr, - /*preTransform*/ (VkSurfaceTransformFlagBitsKHR)preTransform, - /*compositeAlpha*/ compositeAlpha, - /*presentMode*/ window->presentMode, - /*clipped*/ true, - /*oldSwapchain*/ VK_NULL_HANDLE, - }; - - err = fpCreateSwapchainKHR(device, &swapchain_ci, nullptr, &window->swapchain); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - uint32_t sp_image_count; - err = fpGetSwapchainImagesKHR(device, window->swapchain, &sp_image_count, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - if (swapchainImageCount == 0) { - // Assign here for the first time. - swapchainImageCount = sp_image_count; - } else { - ERR_FAIL_COND_V(swapchainImageCount != sp_image_count, ERR_BUG); - } - - VkImage *swapchainImages = (VkImage *)malloc(swapchainImageCount * sizeof(VkImage)); - ERR_FAIL_NULL_V(swapchainImages, ERR_CANT_CREATE); - err = fpGetSwapchainImagesKHR(device, window->swapchain, &swapchainImageCount, swapchainImages); - if (err) { - free(swapchainImages); - ERR_FAIL_V(ERR_CANT_CREATE); - } - - window->swapchain_image_resources = - (SwapchainImageResources *)malloc(sizeof(SwapchainImageResources) * swapchainImageCount); - if (!window->swapchain_image_resources) { - free(swapchainImages); - ERR_FAIL_V(ERR_CANT_CREATE); - } - - for (uint32_t i = 0; i < swapchainImageCount; i++) { - VkImageViewCreateInfo color_image_view = { - /*sType*/ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - /*pNext*/ nullptr, - /*flags*/ 0, - /*image*/ swapchainImages[i], - /*viewType*/ VK_IMAGE_VIEW_TYPE_2D, - /*format*/ format, - /*components*/ { - /*r*/ VK_COMPONENT_SWIZZLE_R, - /*g*/ VK_COMPONENT_SWIZZLE_G, - /*b*/ VK_COMPONENT_SWIZZLE_B, - /*a*/ VK_COMPONENT_SWIZZLE_A, - }, - /*subresourceRange*/ { /*aspectMask*/ VK_IMAGE_ASPECT_COLOR_BIT, - /*baseMipLevel*/ 0, - /*levelCount*/ 1, - /*baseArrayLayer*/ 0, - /*layerCount*/ 1 }, - }; - - window->swapchain_image_resources[i].image = swapchainImages[i]; - - color_image_view.image = window->swapchain_image_resources[i].image; - - err = vkCreateImageView(device, &color_image_view, nullptr, &window->swapchain_image_resources[i].view); - if (err) { - free(swapchainImages); - ERR_FAIL_V(ERR_CANT_CREATE); - } - } - - free(swapchainImages); - - /******** FRAMEBUFFER ************/ - - { - const VkAttachmentDescription2KHR attachment = { - /*sType*/ VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR, - /*pNext*/ nullptr, - /*flags*/ 0, - /*format*/ format, - /*samples*/ VK_SAMPLE_COUNT_1_BIT, - /*loadOp*/ VK_ATTACHMENT_LOAD_OP_CLEAR, - /*storeOp*/ VK_ATTACHMENT_STORE_OP_STORE, - /*stencilLoadOp*/ VK_ATTACHMENT_LOAD_OP_DONT_CARE, - /*stencilStoreOp*/ VK_ATTACHMENT_STORE_OP_DONT_CARE, - /*initialLayout*/ VK_IMAGE_LAYOUT_UNDEFINED, - /*finalLayout*/ VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - - }; - const VkAttachmentReference2KHR color_reference = { - /*sType*/ VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR, - /*pNext*/ nullptr, - /*attachment*/ 0, - /*layout*/ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - /*aspectMask*/ 0, - }; - - const VkSubpassDescription2KHR subpass = { - /*sType*/ VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR, - /*pNext*/ nullptr, - /*flags*/ 0, - /*pipelineBindPoint*/ VK_PIPELINE_BIND_POINT_GRAPHICS, - /*viewMask*/ 0, - /*inputAttachmentCount*/ 0, - /*pInputAttachments*/ nullptr, - /*colorAttachmentCount*/ 1, - /*pColorAttachments*/ &color_reference, - /*pResolveAttachments*/ nullptr, - /*pDepthStencilAttachment*/ nullptr, - /*preserveAttachmentCount*/ 0, - /*pPreserveAttachments*/ nullptr, - }; - - const VkRenderPassCreateInfo2KHR pass_info = { - /*sType*/ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR, - /*pNext*/ nullptr, - /*flags*/ 0, - /*attachmentCount*/ 1, - /*pAttachments*/ &attachment, - /*subpassCount*/ 1, - /*pSubpasses*/ &subpass, - /*dependencyCount*/ 0, - /*pDependencies*/ nullptr, - /*correlatedViewMaskCount*/ 0, - /*pCorrelatedViewMasks*/ nullptr, - }; - - err = vkCreateRenderPass2KHR(device, &pass_info, nullptr, &window->render_pass); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - for (uint32_t i = 0; i < swapchainImageCount; i++) { - const VkFramebufferCreateInfo fb_info = { - /*sType*/ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - /*pNext*/ nullptr, - /*flags*/ 0, - /*renderPass*/ window->render_pass, - /*attachmentCount*/ 1, - /*pAttachments*/ &window->swapchain_image_resources[i].view, - /*width*/ (uint32_t)window->width, - /*height*/ (uint32_t)window->height, - /*layers*/ 1, - }; - - err = vkCreateFramebuffer(device, &fb_info, nullptr, &window->swapchain_image_resources[i].framebuffer); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - } - } - - /******** SEPARATE PRESENT QUEUE ************/ - - if (separate_present_queue) { - const VkCommandPoolCreateInfo present_cmd_pool_info = { - /*sType*/ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - /*pNext*/ nullptr, - /*flags*/ 0, - /*queueFamilyIndex*/ present_queue_family_index, - }; - err = vkCreateCommandPool(device, &present_cmd_pool_info, nullptr, &window->present_cmd_pool); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - const VkCommandBufferAllocateInfo present_cmd_info = { - /*sType*/ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - /*pNext*/ nullptr, - /*commandPool*/ window->present_cmd_pool, - /*level*/ VK_COMMAND_BUFFER_LEVEL_PRIMARY, - /*commandBufferCount*/ 1, - }; - for (uint32_t i = 0; i < swapchainImageCount; i++) { - err = vkAllocateCommandBuffers(device, &present_cmd_info, - &window->swapchain_image_resources[i].graphics_to_present_cmd); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - const VkCommandBufferBeginInfo cmd_buf_info = { - /*sType*/ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - /*pNext*/ nullptr, - /*flags*/ VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT, - /*pInheritanceInfo*/ nullptr, - }; - err = vkBeginCommandBuffer(window->swapchain_image_resources[i].graphics_to_present_cmd, &cmd_buf_info); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - - VkImageMemoryBarrier image_ownership_barrier = { - /*sType*/ VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - /*pNext*/ nullptr, - /*srcAccessMask*/ 0, - /*dstAccessMask*/ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - /*oldLayout*/ VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - /*newLayout*/ VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - /*srcQueueFamilyIndex*/ graphics_queue_family_index, - /*dstQueueFamilyIndex*/ present_queue_family_index, - /*image*/ window->swapchain_image_resources[i].image, - /*subresourceRange*/ { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 } - }; - - vkCmdPipelineBarrier(window->swapchain_image_resources[i].graphics_to_present_cmd, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_ownership_barrier); - err = vkEndCommandBuffer(window->swapchain_image_resources[i].graphics_to_present_cmd); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - } - } - - // Reset current buffer. - window->current_buffer = 0; - - VkSemaphoreCreateInfo semaphoreCreateInfo = { - /*sType*/ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - /*pNext*/ nullptr, - /*flags*/ 0, - }; - - for (uint32_t i = 0; i < FRAME_LAG; i++) { - VkResult vkerr = vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &window->image_acquired_semaphores[i]); - ERR_FAIL_COND_V(vkerr, ERR_CANT_CREATE); - } - - return OK; -} - -Error VulkanContext::initialize() { -#ifdef USE_VOLK - if (volkInitialize() != VK_SUCCESS) { - return FAILED; - } -#endif - - Error err = _create_instance(); - if (err != OK) { - return err; - } - - // Headless? Complete setup now. - if (!_get_platform_surface_extension()) { - err = _create_physical_device(VK_NULL_HANDLE); - if (err != OK) { - return err; - } - - err = _initialize_queues(VK_NULL_HANDLE); - if (err != OK) { - return err; - } - } - - return OK; -} - -void VulkanContext::set_setup_buffer(RDD::CommandBufferID p_command_buffer) { - command_buffer_queue[0] = (VkCommandBuffer)p_command_buffer.id; -} - -void VulkanContext::append_command_buffer(RDD::CommandBufferID p_command_buffer) { - if (command_buffer_queue.size() <= command_buffer_count) { - command_buffer_queue.resize(command_buffer_count + 1); - } - - command_buffer_queue[command_buffer_count] = (VkCommandBuffer)p_command_buffer.id; - command_buffer_count++; -} - -void VulkanContext::flush(bool p_flush_setup, bool p_flush_pending) { - // Ensure everything else pending is executed. - vkDeviceWaitIdle(device); - - // Flush the pending setup buffer. - - bool setup_flushable = p_flush_setup && command_buffer_queue[0]; - bool pending_flushable = p_flush_pending && command_buffer_count > 1; - - if (setup_flushable) { - // Use a fence to wait for everything done. - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.pWaitDstStageMask = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = command_buffer_queue.ptr(); - submit_info.signalSemaphoreCount = pending_flushable ? 1 : 0; - submit_info.pSignalSemaphores = pending_flushable ? &draw_complete_semaphores[frame_index] : nullptr; - VkResult err = vkQueueSubmit(graphics_queue, 1, &submit_info, VK_NULL_HANDLE); - command_buffer_queue[0] = nullptr; - ERR_FAIL_COND(err); - } - - if (pending_flushable) { - // Use a fence to wait for everything to finish. - - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - submit_info.pWaitDstStageMask = setup_flushable ? &wait_stage_mask : nullptr; - submit_info.waitSemaphoreCount = setup_flushable ? 1 : 0; - submit_info.pWaitSemaphores = setup_flushable ? &draw_complete_semaphores[frame_index] : nullptr; - submit_info.commandBufferCount = command_buffer_count - 1; - submit_info.pCommandBuffers = command_buffer_queue.ptr() + 1; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = nullptr; - VkResult err = vkQueueSubmit(graphics_queue, 1, &submit_info, VK_NULL_HANDLE); - command_buffer_count = 1; - ERR_FAIL_COND(err); - } - - vkDeviceWaitIdle(device); -} - -Error VulkanContext::prepare_buffers(RDD::CommandBufferID p_command_buffer) { - if (!queues_initialized) { - return OK; - } - - VkResult err; - - // Ensure no more than FRAME_LAG renderings are outstanding. - vkWaitForFences(device, 1, &fences[frame_index], VK_TRUE, UINT64_MAX); - vkResetFences(device, 1, &fences[frame_index]); - - for (KeyValue<int, Window> &E : windows) { - Window *w = &E.value; - - w->semaphore_acquired = false; - - if (w->swapchain == VK_NULL_HANDLE) { - continue; - } - - do { - // Get the index of the next available swapchain image. - err = - fpAcquireNextImageKHR(device, w->swapchain, UINT64_MAX, - w->image_acquired_semaphores[frame_index], VK_NULL_HANDLE, &w->current_buffer); - - if (err == VK_ERROR_OUT_OF_DATE_KHR) { - // Swapchain is out of date (e.g. the window was resized) and - // must be recreated. - print_verbose("Vulkan: Early out of date swapchain, recreating."); - // resize_notify(); - _update_swap_chain(w); - } else if (err == VK_SUBOPTIMAL_KHR) { - // Swapchain is not as optimal as it could be, but the platform's - // presentation engine will still present the image correctly. - print_verbose("Vulkan: Early suboptimal swapchain, recreating."); - Error swap_chain_err = _update_swap_chain(w); - if (swap_chain_err == ERR_SKIP) { - break; - } - } else if (err != VK_SUCCESS) { - ERR_BREAK_MSG(err != VK_SUCCESS, "Vulkan: Did not create swapchain successfully. Error code: " + String(string_VkResult(err))); - } else { - w->semaphore_acquired = true; - } - } while (err != VK_SUCCESS); - } - - buffers_prepared = true; - - return OK; -} - -void VulkanContext::postpare_buffers(RDD::CommandBufferID p_command_buffer) { -} - -Error VulkanContext::swap_buffers() { - if (!queues_initialized) { - return OK; - } - - // print_line("swapbuffers?"); - VkResult err; - -#if 0 - if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { - // Look at what happened to previous presents, and make appropriate - // adjustments in timing. - DemoUpdateTargetIPD(demo); - - // Note: a real application would position its geometry to that it's in - // the correct location for when the next image is presented. It might - // also wait, so that there's less latency between any input and when - // the next image is rendered/presented. This demo program is so - // simple that it doesn't do either of those. - } -#endif - // Wait for the image acquired semaphore to be signaled to ensure - // that the image won't be rendered to until the presentation - // engine has fully released ownership to the application, and it is - // okay to render to the image. - - const VkCommandBuffer *commands_ptr = nullptr; - uint32_t commands_to_submit = 0; - - if (command_buffer_queue[0] == nullptr) { - // No setup command, but commands to submit, submit from the first and skip command. - if (command_buffer_count > 1) { - commands_ptr = command_buffer_queue.ptr() + 1; - commands_to_submit = command_buffer_count - 1; - } - } else { - commands_ptr = command_buffer_queue.ptr(); - commands_to_submit = command_buffer_count; - } - - VkSemaphore *semaphores_to_acquire = (VkSemaphore *)alloca(windows.size() * sizeof(VkSemaphore)); - VkPipelineStageFlags *pipe_stage_flags = (VkPipelineStageFlags *)alloca(windows.size() * sizeof(VkPipelineStageFlags)); - uint32_t semaphores_to_acquire_count = 0; - - for (KeyValue<int, Window> &E : windows) { - Window *w = &E.value; - - if (w->semaphore_acquired) { - semaphores_to_acquire[semaphores_to_acquire_count] = w->image_acquired_semaphores[frame_index]; - pipe_stage_flags[semaphores_to_acquire_count] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - semaphores_to_acquire_count++; - } - } - - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = semaphores_to_acquire_count; - submit_info.pWaitSemaphores = semaphores_to_acquire; - submit_info.pWaitDstStageMask = pipe_stage_flags; - submit_info.commandBufferCount = commands_to_submit; - submit_info.pCommandBuffers = commands_ptr; - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &draw_complete_semaphores[frame_index]; - err = vkQueueSubmit(graphics_queue, 1, &submit_info, fences[frame_index]); - ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "Vulkan: Cannot submit graphics queue. Error code: " + String(string_VkResult(err))); - - command_buffer_queue[0] = nullptr; - command_buffer_count = 1; - - if (separate_present_queue) { - // If we are using separate queues, change image ownership to the - // present queue before presenting, waiting for the draw complete - // semaphore and signaling the ownership released semaphore when finished. - VkFence nullFence = VK_NULL_HANDLE; - pipe_stage_flags[0] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = &draw_complete_semaphores[frame_index]; - submit_info.commandBufferCount = 0; - - VkCommandBuffer *cmdbufptr = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer *) * windows.size()); - submit_info.pCommandBuffers = cmdbufptr; - - for (KeyValue<int, Window> &E : windows) { - Window *w = &E.value; - - if (w->swapchain == VK_NULL_HANDLE) { - continue; - } - cmdbufptr[submit_info.commandBufferCount] = w->swapchain_image_resources[w->current_buffer].graphics_to_present_cmd; - submit_info.commandBufferCount++; - } - - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &image_ownership_semaphores[frame_index]; - err = vkQueueSubmit(present_queue, 1, &submit_info, nullFence); - ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "Vulkan: Cannot submit present queue. Error code: " + String(string_VkResult(err))); - } - - // If we are using separate queues, we have to wait for image ownership, - // otherwise wait for draw complete. - VkPresentInfoKHR present = { - /*sType*/ VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, - /*pNext*/ nullptr, - /*waitSemaphoreCount*/ 1, - /*pWaitSemaphores*/ (separate_present_queue) ? &image_ownership_semaphores[frame_index] : &draw_complete_semaphores[frame_index], - /*swapchainCount*/ 0, - /*pSwapchain*/ nullptr, - /*pImageIndices*/ nullptr, - /*pResults*/ nullptr, - }; - - VkSwapchainKHR *pSwapchains = (VkSwapchainKHR *)alloca(sizeof(VkSwapchainKHR *) * windows.size()); - uint32_t *pImageIndices = (uint32_t *)alloca(sizeof(uint32_t *) * windows.size()); - - present.pSwapchains = pSwapchains; - present.pImageIndices = pImageIndices; - - for (KeyValue<int, Window> &E : windows) { - Window *w = &E.value; - - if (w->swapchain == VK_NULL_HANDLE) { - continue; - } - pSwapchains[present.swapchainCount] = w->swapchain; - pImageIndices[present.swapchainCount] = w->current_buffer; - present.swapchainCount++; - } - -#if 0 - if (is_device_extension_enabled(VK_KHR_incremental_present_enabled)) { - // If using VK_KHR_incremental_present, we provide a hint of the region - // that contains changed content relative to the previously-presented - // image. The implementation can use this hint in order to save - // work/power (by only copying the region in the hint). The - // implementation is free to ignore the hint though, and so we must - // ensure that the entire image has the correctly-drawn content. - uint32_t eighthOfWidth = width / 8; - uint32_t eighthOfHeight = height / 8; - VkRectLayerKHR rect = { - /*offset.x*/ eighthOfWidth, - /*offset.y*/ eighthOfHeight, - /*extent.width*/ eighthOfWidth * 6, - /*extent.height*/ eighthOfHeight * 6, - /*layer*/ 0, - }; - VkPresentRegionKHR region = { - /*rectangleCount*/ 1, - /*pRectangles*/ &rect, - }; - VkPresentRegionsKHR regions = { - /*sType*/ VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR, - /*pNext*/ present.pNext, - /*swapchainCount*/ present.swapchainCount, - /*pRegions*/ ®ion, - }; - present.pNext = ®ions; - } -#endif - -#if 0 - if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { - VkPresentTimeGOOGLE ptime; - if (prev_desired_present_time == 0) { - // This must be the first present for this swapchain. - // - // We don't know where we are relative to the presentation engine's - // display's refresh cycle. We also don't know how long rendering - // takes. Let's make a grossly-simplified assumption that the - // desiredPresentTime should be half way between now and - // now+target_IPD. We will adjust over time. - uint64_t curtime = getTimeInNanoseconds(); - if (curtime == 0) { - // Since we didn't find out the current time, don't give a - // desiredPresentTime. - ptime.desiredPresentTime = 0; - } else { - ptime.desiredPresentTime = curtime + (target_IPD >> 1); - } - } else { - ptime.desiredPresentTime = (prev_desired_present_time + target_IPD); - } - ptime.presentID = next_present_id++; - prev_desired_present_time = ptime.desiredPresentTime; - - VkPresentTimesInfoGOOGLE present_time = { - /*sType*/ VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE, - /*pNext*/ present.pNext, - /*swapchainCount*/ present.swapchainCount, - /*pTimes*/ &ptime, - }; - if (is_device_extension_enabled(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME)) { - present.pNext = &present_time; - } - } -#endif - // print_line("current buffer: " + itos(current_buffer)); - err = fpQueuePresentKHR(present_queue, &present); - - frame_index += 1; - frame_index %= FRAME_LAG; - - if (err == VK_ERROR_OUT_OF_DATE_KHR) { - // Swapchain is out of date (e.g. the window was resized) and - // must be recreated. - print_verbose("Vulkan queue submit: Swapchain is out of date, recreating."); - resize_notify(); - } else if (err == VK_SUBOPTIMAL_KHR) { - // Swapchain is not as optimal as it could be, but the platform's - // presentation engine will still present the image correctly. - print_verbose("Vulkan queue submit: Swapchain is suboptimal."); - } else { - ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "Error code: " + String(string_VkResult(err))); - } - - buffers_prepared = false; - return OK; -} - -void VulkanContext::resize_notify() { -} - -RenderingDevice::Capabilities VulkanContext::get_device_capabilities() const { - RenderingDevice::Capabilities c; - c.device_family = RenderingDevice::DEVICE_VULKAN; - c.version_major = VK_API_VERSION_MAJOR(device_api_version); - c.version_minor = VK_API_VERSION_MINOR(device_api_version); - return c; -} - -VkDevice VulkanContext::get_device() { - return device; -} - -VkPhysicalDevice VulkanContext::get_physical_device() { - return gpu; -} - -int VulkanContext::get_swapchain_image_count() const { - return swapchainImageCount; -} - -VkQueue VulkanContext::get_graphics_queue() const { - return graphics_queue; -} - -uint32_t VulkanContext::get_graphics_queue_family_index() const { - return graphics_queue_family_index; -} - -VkFormat VulkanContext::get_screen_format() const { - return format; -} - -const VkPhysicalDeviceLimits &VulkanContext::get_device_limits() const { - return gpu_props.limits; -} - -RID VulkanContext::local_device_create() { - LocalDevice ld; - - Error err = _create_device(ld.device); - ERR_FAIL_COND_V(err, RID()); - - { // Create graphics queue. - - vkGetDeviceQueue(ld.device, graphics_queue_family_index, 0, &ld.queue); - } - - ld.driver = memnew(RenderingDeviceDriverVulkan(this, ld.device)); - - return local_device_owner.make_rid(ld); -} - -void VulkanContext::local_device_push_command_buffers(RID p_local_device, const RDD::CommandBufferID *p_buffers, int p_count) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - ERR_FAIL_COND(ld->waiting); - - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.pWaitDstStageMask = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.commandBufferCount = p_count; - submit_info.pCommandBuffers = (const VkCommandBuffer *)p_buffers; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = nullptr; - - VkResult err = vkQueueSubmit(ld->queue, 1, &submit_info, VK_NULL_HANDLE); - if (err == VK_ERROR_OUT_OF_HOST_MEMORY) { - print_line("Vulkan: Out of host memory!"); - } - if (err == VK_ERROR_OUT_OF_DEVICE_MEMORY) { - print_line("Vulkan: Out of device memory!"); - } - if (err == VK_ERROR_DEVICE_LOST) { - print_line("Vulkan: Device lost!"); - } - ERR_FAIL_COND(err); - - ld->waiting = true; -} - -void VulkanContext::local_device_sync(RID p_local_device) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - ERR_FAIL_COND(!ld->waiting); - - vkDeviceWaitIdle(ld->device); - ld->waiting = false; -} - -void VulkanContext::local_device_free(RID p_local_device) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - memdelete(ld->driver); - vkDestroyDevice(ld->device, nullptr); - local_device_owner.free(p_local_device); -} - -void VulkanContext::command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { - if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - return; - } - - CharString cs = p_label_name.utf8(); - VkDebugUtilsLabelEXT label; - label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; - label.pNext = nullptr; - label.pLabelName = cs.get_data(); - label.color[0] = p_color[0]; - label.color[1] = p_color[1]; - label.color[2] = p_color[2]; - label.color[3] = p_color[3]; - CmdBeginDebugUtilsLabelEXT((VkCommandBuffer)p_command_buffer.id, &label); -} - -void VulkanContext::command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { - if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - return; - } - CharString cs = p_label_name.utf8(); - VkDebugUtilsLabelEXT label; - label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; - label.pNext = nullptr; - label.pLabelName = cs.get_data(); - label.color[0] = p_color[0]; - label.color[1] = p_color[1]; - label.color[2] = p_color[2]; - label.color[3] = p_color[3]; - CmdInsertDebugUtilsLabelEXT((VkCommandBuffer)p_command_buffer.id, &label); -} - -void VulkanContext::command_end_label(RDD::CommandBufferID p_command_buffer) { - if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - return; - } - CmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_command_buffer.id); -} - -void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) { - if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - return; - } - CharString obj_data = p_object_name.utf8(); - VkDebugUtilsObjectNameInfoEXT name_info; - name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; - name_info.pNext = nullptr; - name_info.objectType = p_object_type; - name_info.objectHandle = p_object_handle; - name_info.pObjectName = obj_data.get_data(); - SetDebugUtilsObjectNameEXT(device, &name_info); -} - -String VulkanContext::get_device_vendor_name() const { - return device_vendor; -} - -String VulkanContext::get_device_name() const { - return device_name; -} - -RenderingDevice::DeviceType VulkanContext::get_device_type() const { - return RenderingDevice::DeviceType(device_type); -} - -String VulkanContext::get_device_api_version() const { - return vformat("%d.%d.%d", VK_API_VERSION_MAJOR(device_api_version), VK_API_VERSION_MINOR(device_api_version), VK_API_VERSION_PATCH(device_api_version)); -} - -String VulkanContext::get_device_pipeline_cache_uuid() const { - return pipeline_cache_id; -} - -DisplayServer::VSyncMode VulkanContext::get_vsync_mode(DisplayServer::WindowID p_window) const { - ERR_FAIL_COND_V_MSG(!windows.has(p_window), DisplayServer::VSYNC_ENABLED, "Could not get V-Sync mode for window with WindowID " + itos(p_window) + " because it does not exist."); - return windows[p_window].vsync_mode; -} - -void VulkanContext::set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_mode) { - ERR_FAIL_COND_MSG(!windows.has(p_window), "Could not set V-Sync mode for window with WindowID " + itos(p_window) + " because it does not exist."); - windows[p_window].vsync_mode = p_mode; - _update_swap_chain(&windows[p_window]); -} - -RenderingDeviceDriver *VulkanContext::get_driver(RID p_local_device) { - if (p_local_device.is_valid()) { - LocalDevice *ld = local_device_owner.get_or_null(p_local_device); - ERR_FAIL_NULL_V(ld, nullptr); - return ld->driver; - } else { - return driver; - } -} - -VulkanContext::VulkanContext() { - command_buffer_queue.resize(1); // First one is always the setup command. - command_buffer_queue[0] = nullptr; -} - -VulkanContext::~VulkanContext() { - if (driver) { - memdelete(driver); - } - if (queue_props) { - free(queue_props); - } - if (device_initialized) { - for (uint32_t i = 0; i < FRAME_LAG; i++) { - vkDestroyFence(device, fences[i], nullptr); - vkDestroySemaphore(device, draw_complete_semaphores[i], nullptr); - if (separate_present_queue) { - vkDestroySemaphore(device, image_ownership_semaphores[i], nullptr); - } - } - if (inst_initialized && is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - DestroyDebugUtilsMessengerEXT(inst, dbg_messenger, nullptr); - } - if (inst_initialized && dbg_debug_report != VK_NULL_HANDLE) { - DestroyDebugReportCallbackEXT(inst, dbg_debug_report, nullptr); - } - vkDestroyDevice(device, nullptr); - } - if (inst_initialized) { - vkDestroyInstance(inst, nullptr); - } -} diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h deleted file mode 100644 index ce1299a559..0000000000 --- a/drivers/vulkan/vulkan_context.h +++ /dev/null @@ -1,350 +0,0 @@ -/**************************************************************************/ -/* vulkan_context.h */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#ifndef VULKAN_CONTEXT_H -#define VULKAN_CONTEXT_H - -#include "core/error/error_list.h" -#include "core/os/mutex.h" -#include "core/string/ustring.h" -#include "core/templates/hash_map.h" -#include "core/templates/rb_map.h" -#include "core/templates/rid_owner.h" -#include "rendering_device_driver_vulkan.h" -#include "servers/display_server.h" -#include "servers/rendering/renderer_rd/api_context_rd.h" - -#ifdef USE_VOLK -#include <volk.h> -#else -#include <vulkan/vulkan.h> -#endif - -#include "vulkan_hooks.h" - -class VulkanContext : public ApiContextRD { -public: - struct SubgroupCapabilities { - uint32_t size; - uint32_t min_size; - uint32_t max_size; - VkShaderStageFlags supportedStages; - VkSubgroupFeatureFlags supportedOperations; - VkBool32 quadOperationsInAllStages; - bool size_control_is_supported; - - uint32_t supported_stages_flags_rd() const; - String supported_stages_desc() const; - uint32_t supported_operations_flags_rd() const; - String supported_operations_desc() const; - }; - - struct VRSCapabilities { - bool pipeline_vrs_supported; // We can specify our fragment rate on a pipeline level. - bool primitive_vrs_supported; // We can specify our fragment rate on each drawcall. - bool attachment_vrs_supported; // We can provide a density map attachment on our framebuffer. - - Size2i min_texel_size; - Size2i max_texel_size; - - Size2i texel_size; // The texel size we'll use - }; - - struct ShaderCapabilities { - bool shader_float16_is_supported; - bool shader_int8_is_supported; - }; - - struct StorageBufferCapabilities { - bool storage_buffer_16_bit_access_is_supported; - bool uniform_and_storage_buffer_16_bit_access_is_supported; - bool storage_push_constant_16_is_supported; - bool storage_input_output_16; - }; - -private: - enum { - MAX_EXTENSIONS = 128, - MAX_LAYERS = 64, - FRAME_LAG = 2 - }; - - static VulkanHooks *vulkan_hooks; - VkInstance inst = VK_NULL_HANDLE; - VkPhysicalDevice gpu = VK_NULL_HANDLE; - VkPhysicalDeviceProperties gpu_props; - uint32_t queue_family_count = 0; - VkQueueFamilyProperties *queue_props = nullptr; - VkDevice device = VK_NULL_HANDLE; - bool device_initialized = false; - bool inst_initialized = false; - - uint32_t instance_api_version = VK_API_VERSION_1_0; - SubgroupCapabilities subgroup_capabilities; - RDD::MultiviewCapabilities multiview_capabilities; - VRSCapabilities vrs_capabilities; - ShaderCapabilities shader_capabilities; - StorageBufferCapabilities storage_buffer_capabilities; - bool pipeline_cache_control_support = false; - - String device_vendor; - String device_name; - VkPhysicalDeviceType device_type; - String pipeline_cache_id; - uint32_t device_api_version = 0; - - bool buffers_prepared = false; - - // Present queue. - bool queues_initialized = false; - uint32_t graphics_queue_family_index = UINT32_MAX; - uint32_t present_queue_family_index = UINT32_MAX; - bool separate_present_queue = false; - VkQueue graphics_queue = VK_NULL_HANDLE; - VkQueue present_queue = VK_NULL_HANDLE; - VkColorSpaceKHR color_space; - VkFormat format; - VkSemaphore draw_complete_semaphores[FRAME_LAG]; - VkSemaphore image_ownership_semaphores[FRAME_LAG]; - int frame_index = 0; - VkFence fences[FRAME_LAG]; - VkPhysicalDeviceMemoryProperties memory_properties; - VkPhysicalDeviceFeatures physical_device_features; - - typedef struct { - VkImage image; - VkCommandBuffer graphics_to_present_cmd; - VkImageView view; - VkFramebuffer framebuffer; - } SwapchainImageResources; - - struct Window { - VkSurfaceKHR surface = VK_NULL_HANDLE; - VkSwapchainKHR swapchain = VK_NULL_HANDLE; - SwapchainImageResources *swapchain_image_resources = VK_NULL_HANDLE; - VkPresentModeKHR presentMode = VK_PRESENT_MODE_FIFO_KHR; - VkSemaphore image_acquired_semaphores[FRAME_LAG]; - bool semaphore_acquired = false; - uint32_t current_buffer = 0; - int width = 0; - int height = 0; - DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED; - VkCommandPool present_cmd_pool = VK_NULL_HANDLE; // For separate present queue. - VkRenderPass render_pass = VK_NULL_HANDLE; - }; - - struct LocalDevice { - bool waiting = false; - VkDevice device = VK_NULL_HANDLE; - VkQueue queue = VK_NULL_HANDLE; - RenderingDeviceDriverVulkan *driver = nullptr; - }; - - RID_Owner<LocalDevice, true> local_device_owner; - - RenderingDeviceDriverVulkan *driver = nullptr; - - HashMap<DisplayServer::WindowID, Window> windows; - uint32_t swapchainImageCount = 0; - - // Commands. - - bool prepared = false; - - LocalVector<VkCommandBuffer> command_buffer_queue; - uint32_t command_buffer_count = 1; - - // Extensions. - static bool instance_extensions_initialized; - static HashMap<CharString, bool> requested_instance_extensions; - HashSet<CharString> enabled_instance_extension_names; - - static bool device_extensions_initialized; - static HashMap<CharString, bool> requested_device_extensions; - HashSet<CharString> enabled_device_extension_names; - bool VK_KHR_incremental_present_enabled = true; - bool VK_GOOGLE_display_timing_enabled = true; - - PFN_vkCreateDebugUtilsMessengerEXT CreateDebugUtilsMessengerEXT = nullptr; - PFN_vkDestroyDebugUtilsMessengerEXT DestroyDebugUtilsMessengerEXT = nullptr; - PFN_vkSubmitDebugUtilsMessageEXT SubmitDebugUtilsMessageEXT = nullptr; - PFN_vkCmdBeginDebugUtilsLabelEXT CmdBeginDebugUtilsLabelEXT = nullptr; - PFN_vkCmdEndDebugUtilsLabelEXT CmdEndDebugUtilsLabelEXT = nullptr; - PFN_vkCmdInsertDebugUtilsLabelEXT CmdInsertDebugUtilsLabelEXT = nullptr; - PFN_vkSetDebugUtilsObjectNameEXT SetDebugUtilsObjectNameEXT = nullptr; - PFN_vkCreateDebugReportCallbackEXT CreateDebugReportCallbackEXT = nullptr; - PFN_vkDebugReportMessageEXT DebugReportMessageEXT = nullptr; - PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT = nullptr; - PFN_vkGetPhysicalDeviceSurfaceSupportKHR fpGetPhysicalDeviceSurfaceSupportKHR = nullptr; - PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR fpGetPhysicalDeviceSurfaceCapabilitiesKHR = nullptr; - PFN_vkGetPhysicalDeviceSurfaceFormatsKHR fpGetPhysicalDeviceSurfaceFormatsKHR = nullptr; - PFN_vkGetPhysicalDeviceSurfacePresentModesKHR fpGetPhysicalDeviceSurfacePresentModesKHR = nullptr; - PFN_vkCreateSwapchainKHR fpCreateSwapchainKHR = nullptr; - PFN_vkDestroySwapchainKHR fpDestroySwapchainKHR = nullptr; - PFN_vkGetSwapchainImagesKHR fpGetSwapchainImagesKHR = nullptr; - PFN_vkAcquireNextImageKHR fpAcquireNextImageKHR = nullptr; - PFN_vkQueuePresentKHR fpQueuePresentKHR = nullptr; - PFN_vkGetRefreshCycleDurationGOOGLE fpGetRefreshCycleDurationGOOGLE = nullptr; - PFN_vkGetPastPresentationTimingGOOGLE fpGetPastPresentationTimingGOOGLE = nullptr; - PFN_vkCreateRenderPass2KHR fpCreateRenderPass2KHR = nullptr; - - VkDebugUtilsMessengerEXT dbg_messenger = VK_NULL_HANDLE; - VkDebugReportCallbackEXT dbg_debug_report = VK_NULL_HANDLE; - - Error _obtain_vulkan_version(); - Error _initialize_instance_extensions(); - Error _initialize_device_extensions(); - Error _check_capabilities(); - - VkBool32 _check_layers(uint32_t check_count, const char *const *check_names, uint32_t layer_count, VkLayerProperties *layers); - static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback( - VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, - VkDebugUtilsMessageTypeFlagsEXT messageType, - const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, - void *pUserData); - - static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_report_callback( - VkDebugReportFlagsEXT flags, - VkDebugReportObjectTypeEXT objectType, - uint64_t object, - size_t location, - int32_t messageCode, - const char *pLayerPrefix, - const char *pMessage, - void *pUserData); - - Error _create_instance(); - - Error _create_physical_device(VkSurfaceKHR p_surface); - - Error _initialize_queues(VkSurfaceKHR p_surface); - - Error _create_device(VkDevice &r_vk_device); - - Error _clean_up_swap_chain(Window *window); - - Error _update_swap_chain(Window *window); - - Error _create_swap_chain(); - Error _create_semaphores(); - - Vector<VkAttachmentReference> _convert_VkAttachmentReference2(uint32_t p_count, const VkAttachmentReference2 *p_refs); - -protected: - virtual const char *_get_platform_surface_extension() const { return nullptr; } - - virtual Error _window_create(DisplayServer::WindowID p_window_id, DisplayServer::VSyncMode p_vsync_mode, VkSurfaceKHR p_surface, int p_width, int p_height); - - virtual bool _use_validation_layers(); - - Error _get_preferred_validation_layers(uint32_t *count, const char *const **names); - - virtual VkExtent2D _compute_swapchain_extent(const VkSurfaceCapabilitiesKHR &p_surf_capabilities, int *p_window_width, int *p_window_height) const; - -public: - // Extension calls. - bool supports_renderpass2() const { return is_device_extension_enabled(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME); } - VkResult vkCreateRenderPass2KHR(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass); - - virtual const char *get_api_name() const override final { return "Vulkan"; }; - virtual RenderingDevice::Capabilities get_device_capabilities() const override final; - const SubgroupCapabilities &get_subgroup_capabilities() const { return subgroup_capabilities; }; - virtual const RDD::MultiviewCapabilities &get_multiview_capabilities() const override final { return multiview_capabilities; }; - const VRSCapabilities &get_vrs_capabilities() const { return vrs_capabilities; }; - const ShaderCapabilities &get_shader_capabilities() const { return shader_capabilities; }; - const StorageBufferCapabilities &get_storage_buffer_capabilities() const { return storage_buffer_capabilities; }; - const VkPhysicalDeviceFeatures &get_physical_device_features() const { return physical_device_features; }; - bool get_pipeline_cache_control_support() const { return pipeline_cache_control_support; }; - - VkDevice get_device(); - VkPhysicalDevice get_physical_device(); - VkInstance get_instance() { return inst; } - virtual int get_swapchain_image_count() const override final; - VkQueue get_graphics_queue() const; - uint32_t get_graphics_queue_family_index() const; - - static void set_vulkan_hooks(VulkanHooks *p_vulkan_hooks) { vulkan_hooks = p_vulkan_hooks; }; - - static void register_requested_instance_extension(const CharString &extension_name, bool p_required); - bool is_instance_extension_enabled(const CharString &extension_name) const { - return enabled_instance_extension_names.has(extension_name); - } - - static void register_requested_device_extension(const CharString &extension_name, bool p_required); - bool is_device_extension_enabled(const CharString &extension_name) const { - return enabled_device_extension_names.has(extension_name); - } - - virtual void window_resize(DisplayServer::WindowID p_window_id, int p_width, int p_height) override final; - virtual int window_get_width(DisplayServer::WindowID p_window = 0) override final; - virtual int window_get_height(DisplayServer::WindowID p_window = 0) override final; - virtual bool window_is_valid_swapchain(DisplayServer::WindowID p_window = 0) override final; - virtual void window_destroy(DisplayServer::WindowID p_window_id) override final; - virtual RDD::RenderPassID window_get_render_pass(DisplayServer::WindowID p_window = 0) override final; - virtual RDD::FramebufferID window_get_framebuffer(DisplayServer::WindowID p_window = 0) override final; - - virtual RID local_device_create() override final; - virtual void local_device_push_command_buffers(RID p_local_device, const RDD::CommandBufferID *p_buffers, int p_count) override final; - virtual void local_device_sync(RID p_local_device) override final; - virtual void local_device_free(RID p_local_device) override final; - - VkFormat get_screen_format() const; - const VkPhysicalDeviceLimits &get_device_limits() const; - - virtual void set_setup_buffer(RDD::CommandBufferID p_command_buffer) override final; - virtual void append_command_buffer(RDD::CommandBufferID p_command_buffer) override final; - void resize_notify(); - virtual void flush(bool p_flush_setup = false, bool p_flush_pending = false) override final; - virtual Error prepare_buffers(RDD::CommandBufferID p_command_buffer) override final; - virtual void postpare_buffers(RDD::CommandBufferID p_command_buffer) override final; - virtual Error swap_buffers() override final; - virtual Error initialize() override final; - - virtual void command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_end_label(RDD::CommandBufferID p_command_buffer) override final; - void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name); - - virtual String get_device_vendor_name() const override final; - virtual String get_device_name() const override final; - virtual RDD::DeviceType get_device_type() const override final; - virtual String get_device_api_version() const override final; - virtual String get_device_pipeline_cache_uuid() const override final; - - virtual void set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_mode) override final; - virtual DisplayServer::VSyncMode get_vsync_mode(DisplayServer::WindowID p_window = 0) const override final; - - virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) override final; - - VulkanContext(); - virtual ~VulkanContext(); -}; - -#endif // VULKAN_CONTEXT_H diff --git a/drivers/vulkan/vulkan_hooks.cpp b/drivers/vulkan/vulkan_hooks.cpp new file mode 100644 index 0000000000..416efcae80 --- /dev/null +++ b/drivers/vulkan/vulkan_hooks.cpp @@ -0,0 +1,45 @@ +/**************************************************************************/ +/* vulkan_hooks.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "vulkan_hooks.h" + +VulkanHooks *VulkanHooks::singleton = nullptr; + +VulkanHooks::VulkanHooks() { + if (singleton == nullptr) { + singleton = this; + } +} + +VulkanHooks::~VulkanHooks() { + if (singleton == this) { + singleton = nullptr; + } +} diff --git a/drivers/vulkan/vulkan_hooks.h b/drivers/vulkan/vulkan_hooks.h index eaa52658e4..bb30b29cec 100644 --- a/drivers/vulkan/vulkan_hooks.h +++ b/drivers/vulkan/vulkan_hooks.h @@ -38,11 +38,17 @@ #endif class VulkanHooks { +private: + static VulkanHooks *singleton; + public: - virtual bool create_vulkan_instance(const VkInstanceCreateInfo *p_vulkan_create_info, VkInstance *r_instance) { return false; }; - virtual bool get_physical_device(VkPhysicalDevice *r_device) { return false; }; - virtual bool create_vulkan_device(const VkDeviceCreateInfo *p_device_create_info, VkDevice *r_device) { return false; }; - virtual ~VulkanHooks(){}; + VulkanHooks(); + virtual ~VulkanHooks(); + virtual bool create_vulkan_instance(const VkInstanceCreateInfo *p_vulkan_create_info, VkInstance *r_instance) = 0; + virtual bool get_physical_device(VkPhysicalDevice *r_device) = 0; + virtual bool create_vulkan_device(const VkDeviceCreateInfo *p_device_create_info, VkDevice *r_device) = 0; + virtual void set_direct_queue_family_and_index(uint32_t p_queue_family_index, uint32_t p_queue_index) = 0; + static VulkanHooks *get_singleton() { return singleton; } }; #endif // VULKAN_HOOKS_H diff --git a/drivers/wasapi/audio_driver_wasapi.cpp b/drivers/wasapi/audio_driver_wasapi.cpp index e39373e7a0..64f2d1f203 100644 --- a/drivers/wasapi/audio_driver_wasapi.cpp +++ b/drivers/wasapi/audio_driver_wasapi.cpp @@ -557,13 +557,11 @@ Error AudioDriverWASAPI::init() { target_latency_ms = Engine::get_singleton()->get_audio_output_latency(); - Error err = init_output_device(); - if (err != OK) { - ERR_PRINT("WASAPI: init_output_device error"); - } - exit_thread.clear(); + Error err = init_output_device(); + ERR_FAIL_COND_V_MSG(err != OK, err, "WASAPI: init_output_device error."); + thread.start(thread_func, this); return OK; diff --git a/drivers/windows/dir_access_windows.cpp b/drivers/windows/dir_access_windows.cpp index 8bf83823a0..43dd62cdf6 100644 --- a/drivers/windows/dir_access_windows.cpp +++ b/drivers/windows/dir_access_windows.cpp @@ -31,6 +31,7 @@ #if defined(WINDOWS_ENABLED) #include "dir_access_windows.h" +#include "file_access_windows.h" #include "core/config/project_settings.h" #include "core/os/memory.h" @@ -67,7 +68,7 @@ struct DirAccessWindowsPrivate { WIN32_FIND_DATAW fu; // Unicode version. }; -String DirAccessWindows::fix_path(String p_path) const { +String DirAccessWindows::fix_path(const String &p_path) const { String r_path = DirAccess::fix_path(p_path); if (r_path.is_absolute_path() && !r_path.is_network_share_path() && r_path.length() > MAX_PATH) { r_path = "\\\\?\\" + r_path.replace("/", "\\"); @@ -177,6 +178,13 @@ Error DirAccessWindows::make_dir(String p_dir) { p_dir = fix_path(p_dir); } + if (FileAccessWindows::is_path_invalid(p_dir)) { +#ifdef DEBUG_ENABLED + WARN_PRINT("The path :" + p_dir + " is a reserved Windows system pipe, so it can't be used for creating directories."); +#endif + return ERR_INVALID_PARAMETER; + } + p_dir = p_dir.simplify_path().replace("/", "\\"); bool success; diff --git a/drivers/windows/dir_access_windows.h b/drivers/windows/dir_access_windows.h index 1dcab84c9d..576ba18d9a 100644 --- a/drivers/windows/dir_access_windows.h +++ b/drivers/windows/dir_access_windows.h @@ -54,7 +54,7 @@ class DirAccessWindows : public DirAccess { bool _cishidden = false; protected: - virtual String fix_path(String p_path) const override; + virtual String fix_path(const String &p_path) const override; public: virtual Error list_dir_begin() override; ///< This starts dir listing diff --git a/drivers/windows/file_access_windows.cpp b/drivers/windows/file_access_windows.cpp index 9d21073f19..aae06505cd 100644 --- a/drivers/windows/file_access_windows.cpp +++ b/drivers/windows/file_access_windows.cpp @@ -60,12 +60,12 @@ void FileAccessWindows::check_errors() const { bool FileAccessWindows::is_path_invalid(const String &p_path) { // Check for invalid operating system file. - String fname = p_path; + String fname = p_path.get_file().to_lower(); + int dot = fname.find("."); if (dot != -1) { fname = fname.substr(0, dot); } - fname = fname.to_lower(); return invalid_files.has(fname); } @@ -284,6 +284,72 @@ uint8_t FileAccessWindows::get_8() const { return b; } +uint16_t FileAccessWindows::get_16() const { + ERR_FAIL_NULL_V(f, 0); + + if (flags == READ_WRITE || flags == WRITE_READ) { + if (prev_op == WRITE) { + fflush(f); + } + prev_op = READ; + } + + uint16_t b = 0; + if (fread(&b, 1, 2, f) != 2) { + check_errors(); + } + + if (big_endian) { + b = BSWAP16(b); + } + + return b; +} + +uint32_t FileAccessWindows::get_32() const { + ERR_FAIL_NULL_V(f, 0); + + if (flags == READ_WRITE || flags == WRITE_READ) { + if (prev_op == WRITE) { + fflush(f); + } + prev_op = READ; + } + + uint32_t b = 0; + if (fread(&b, 1, 4, f) != 4) { + check_errors(); + } + + if (big_endian) { + b = BSWAP32(b); + } + + return b; +} + +uint64_t FileAccessWindows::get_64() const { + ERR_FAIL_NULL_V(f, 0); + + if (flags == READ_WRITE || flags == WRITE_READ) { + if (prev_op == WRITE) { + fflush(f); + } + prev_op = READ; + } + + uint64_t b = 0; + if (fread(&b, 1, 8, f) != 8) { + check_errors(); + } + + if (big_endian) { + b = BSWAP64(b); + } + + return b; +} + uint64_t FileAccessWindows::get_buffer(uint8_t *p_dst, uint64_t p_length) const { ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); ERR_FAIL_NULL_V(f, -1); @@ -326,6 +392,63 @@ void FileAccessWindows::store_8(uint8_t p_dest) { fwrite(&p_dest, 1, 1, f); } +void FileAccessWindows::store_16(uint16_t p_dest) { + ERR_FAIL_NULL(f); + + if (flags == READ_WRITE || flags == WRITE_READ) { + if (prev_op == READ) { + if (last_error != ERR_FILE_EOF) { + fseek(f, 0, SEEK_CUR); + } + } + prev_op = WRITE; + } + + if (big_endian) { + p_dest = BSWAP16(p_dest); + } + + fwrite(&p_dest, 1, 2, f); +} + +void FileAccessWindows::store_32(uint32_t p_dest) { + ERR_FAIL_NULL(f); + + if (flags == READ_WRITE || flags == WRITE_READ) { + if (prev_op == READ) { + if (last_error != ERR_FILE_EOF) { + fseek(f, 0, SEEK_CUR); + } + } + prev_op = WRITE; + } + + if (big_endian) { + p_dest = BSWAP32(p_dest); + } + + fwrite(&p_dest, 1, 4, f); +} + +void FileAccessWindows::store_64(uint64_t p_dest) { + ERR_FAIL_NULL(f); + + if (flags == READ_WRITE || flags == WRITE_READ) { + if (prev_op == READ) { + if (last_error != ERR_FILE_EOF) { + fseek(f, 0, SEEK_CUR); + } + } + prev_op = WRITE; + } + + if (big_endian) { + p_dest = BSWAP64(p_dest); + } + + fwrite(&p_dest, 1, 8, f); +} + void FileAccessWindows::store_buffer(const uint8_t *p_src, uint64_t p_length) { ERR_FAIL_NULL(f); ERR_FAIL_COND(!p_src && p_length > 0); diff --git a/drivers/windows/file_access_windows.h b/drivers/windows/file_access_windows.h index 73143009fc..173423fb06 100644 --- a/drivers/windows/file_access_windows.h +++ b/drivers/windows/file_access_windows.h @@ -50,10 +50,11 @@ class FileAccessWindows : public FileAccess { void _close(); - static bool is_path_invalid(const String &p_path); static HashSet<String> invalid_files; public: + static bool is_path_invalid(const String &p_path); + virtual String fix_path(const String &p_path) const override; virtual Error open_internal(const String &p_path, int p_mode_flags) override; ///< open a file virtual bool is_open() const override; ///< true when file is open @@ -69,12 +70,18 @@ public: virtual bool eof_reached() const override; ///< reading passed EOF virtual uint8_t get_8() const override; ///< get a byte + virtual uint16_t get_16() const override; + virtual uint32_t get_32() const override; + virtual uint64_t get_64() const override; virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override; virtual Error get_error() const override; ///< get last error virtual void flush() override; virtual void store_8(uint8_t p_dest) override; ///< store a byte + virtual void store_16(uint16_t p_dest) override; + virtual void store_32(uint32_t p_dest) override; + virtual void store_64(uint64_t p_dest) override; virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes virtual bool file_exists(const String &p_name) override; ///< return true if a file exists |