diff options
Diffstat (limited to 'drivers')
47 files changed, 2220 insertions, 767 deletions
diff --git a/drivers/alsamidi/midi_driver_alsamidi.cpp b/drivers/alsamidi/midi_driver_alsamidi.cpp index 6b35987f70..b87be69cc5 100644 --- a/drivers/alsamidi/midi_driver_alsamidi.cpp +++ b/drivers/alsamidi/midi_driver_alsamidi.cpp @@ -82,13 +82,13 @@ size_t MIDIDriverALSAMidi::msg_expected_data(uint8_t status_byte) { } void MIDIDriverALSAMidi::InputConnection::parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, - uint64_t timestamp) { + uint64_t timestamp, int device_index) { switch (msg_category(byte)) { case MessageCategory::RealTime: // Real-Time messages are single byte messages that can // occur at any point. // We pass them straight through. - driver.receive_input_packet(timestamp, &byte, 1); + driver.receive_input_packet(device_index, timestamp, &byte, 1); break; case MessageCategory::Data: @@ -100,7 +100,7 @@ void MIDIDriverALSAMidi::InputConnection::parse_byte(uint8_t byte, MIDIDriverALS // Forward a complete message and reset relevant state. if (received_data == expected_data) { - driver.receive_input_packet(timestamp, buffer, received_data + 1); + driver.receive_input_packet(device_index, timestamp, buffer, received_data + 1); received_data = 0; if (msg_category(buffer[0]) != MessageCategory::Voice) { @@ -130,13 +130,13 @@ void MIDIDriverALSAMidi::InputConnection::parse_byte(uint8_t byte, MIDIDriverALS expected_data = msg_expected_data(byte); skipping_sys_ex = false; if (expected_data == 0) { - driver.receive_input_packet(timestamp, &byte, 1); + driver.receive_input_packet(device_index, timestamp, &byte, 1); } break; } } -int MIDIDriverALSAMidi::InputConnection::read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp) { +int MIDIDriverALSAMidi::InputConnection::read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index) { int ret; do { uint8_t byte = 0; @@ -147,7 +147,7 @@ int MIDIDriverALSAMidi::InputConnection::read_in(MIDIDriverALSAMidi &driver, uin ERR_PRINT("snd_rawmidi_read error: " + String(snd_strerror(ret))); } } else { - parse_byte(byte, driver, timestamp); + parse_byte(byte, driver, timestamp, device_index); } } while (ret > 0); @@ -165,7 +165,7 @@ void MIDIDriverALSAMidi::thread_func(void *p_udata) { size_t connection_count = md->connected_inputs.size(); for (size_t i = 0; i < connection_count; i++) { - connections[i].read_in(*md, timestamp); + connections[i].read_in(*md, timestamp, (int)i); } md->unlock(); diff --git a/drivers/alsamidi/midi_driver_alsamidi.h b/drivers/alsamidi/midi_driver_alsamidi.h index 80cc96310f..95ded3b1c9 100644 --- a/drivers/alsamidi/midi_driver_alsamidi.h +++ b/drivers/alsamidi/midi_driver_alsamidi.h @@ -58,7 +58,7 @@ class MIDIDriverALSAMidi : public MIDIDriver { rawmidi_ptr{ midi_in } {} // Read in and parse available data, forwarding any complete messages through the driver. - int read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp); + int read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index); snd_rawmidi_t *rawmidi_ptr = nullptr; @@ -68,7 +68,7 @@ class MIDIDriverALSAMidi : public MIDIDriver { size_t expected_data = 0; size_t received_data = 0; bool skipping_sys_ex = false; - void parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, uint64_t timestamp); + void parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index); }; Vector<InputConnection> connected_inputs; diff --git a/drivers/coremidi/midi_driver_coremidi.cpp b/drivers/coremidi/midi_driver_coremidi.cpp index ed991b3d9d..87fc7612f7 100644 --- a/drivers/coremidi/midi_driver_coremidi.cpp +++ b/drivers/coremidi/midi_driver_coremidi.cpp @@ -39,8 +39,9 @@ void MIDIDriverCoreMidi::read(const MIDIPacketList *packet_list, void *read_proc_ref_con, void *src_conn_ref_con) { MIDIPacket *packet = const_cast<MIDIPacket *>(packet_list->packet); + int *device_index = static_cast<int *>(src_conn_ref_con); for (UInt32 i = 0; i < packet_list->numPackets; i++) { - receive_input_packet(packet->timeStamp, packet->data, packet->length); + receive_input_packet(*device_index, packet->timeStamp, packet->data, packet->length); packet = MIDIPacketNext(packet); } } @@ -64,7 +65,7 @@ Error MIDIDriverCoreMidi::open() { for (int i = 0; i < sources; i++) { MIDIEndpointRef source = MIDIGetSource(i); if (source) { - MIDIPortConnectSource(port_in, source, (void *)this); + MIDIPortConnectSource(port_in, source, static_cast<void *>(&i)); connected_sources.insert(i, source); } } diff --git a/drivers/d3d12/d3d12ma.cpp b/drivers/d3d12/d3d12ma.cpp index ad7b4e570d..51171141de 100644 --- a/drivers/d3d12/d3d12ma.cpp +++ b/drivers/d3d12/d3d12ma.cpp @@ -42,6 +42,7 @@ #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wnonnull-compare" +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif #if defined(_MSC_VER) diff --git a/drivers/d3d12/rendering_context_driver_d3d12.cpp b/drivers/d3d12/rendering_context_driver_d3d12.cpp index 0eb627dde3..726be064bd 100644 --- a/drivers/d3d12/rendering_context_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_context_driver_d3d12.cpp @@ -84,17 +84,28 @@ RenderingContextDriverD3D12::RenderingContextDriverD3D12() { } RenderingContextDriverD3D12::~RenderingContextDriverD3D12() { + if (lib_d3d12) { + FreeLibrary(lib_d3d12); + } + if (lib_dxgi) { + FreeLibrary(lib_dxgi); + } } Error RenderingContextDriverD3D12::_init_device_factory() { uint32_t agility_sdk_version = GLOBAL_GET("rendering/rendering_device/d3d12/agility_sdk_version"); String agility_sdk_path = String(".\\") + Engine::get_singleton()->get_architecture_name(); + lib_d3d12 = LoadLibraryW(L"D3D12.dll"); + ERR_FAIL_NULL_V(lib_d3d12, ERR_CANT_CREATE); + + lib_dxgi = LoadLibraryW(L"DXGI.dll"); + ERR_FAIL_NULL_V(lib_dxgi, ERR_CANT_CREATE); + // Note: symbol is not available in MinGW import library. - PFN_D3D12_GET_INTERFACE d3d_D3D12GetInterface = (PFN_D3D12_GET_INTERFACE)GetProcAddress(LoadLibraryW(L"D3D12.dll"), "D3D12GetInterface"); - if (d3d_D3D12GetInterface == nullptr) { - // FIXME: Is it intended for this to silently return when it fails to find the symbol? - return OK; + PFN_D3D12_GET_INTERFACE d3d_D3D12GetInterface = (PFN_D3D12_GET_INTERFACE)(void *)GetProcAddress(lib_d3d12, "D3D12GetInterface"); + if (!d3d_D3D12GetInterface) { + return OK; // Fallback to the system loader. } ID3D12SDKConfiguration *sdk_config = nullptr; @@ -110,18 +121,22 @@ Error RenderingContextDriverD3D12::_init_device_factory() { } sdk_config->Release(); } - return OK; } Error RenderingContextDriverD3D12::_initialize_debug_layers() { ComPtr<ID3D12Debug> debug_controller; HRESULT res; + if (device_factory) { res = device_factory->GetConfigurationInterface(CLSID_D3D12DebugGodot, IID_PPV_ARGS(&debug_controller)); } else { - res = D3D12GetDebugInterface(IID_PPV_ARGS(&debug_controller)); + PFN_D3D12_GET_DEBUG_INTERFACE d3d_D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)(void *)GetProcAddress(lib_d3d12, "D3D12GetDebugInterface"); + ERR_FAIL_NULL_V(d3d_D3D12GetDebugInterface, ERR_CANT_CREATE); + + res = d3d_D3D12GetDebugInterface(IID_PPV_ARGS(&debug_controller)); } + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_QUERY_FAILED); debug_controller->EnableDebugLayer(); return OK; @@ -129,7 +144,12 @@ Error RenderingContextDriverD3D12::_initialize_debug_layers() { Error RenderingContextDriverD3D12::_initialize_devices() { const UINT dxgi_factory_flags = use_validation_layers() ? DXGI_CREATE_FACTORY_DEBUG : 0; - HRESULT res = CreateDXGIFactory2(dxgi_factory_flags, IID_PPV_ARGS(&dxgi_factory)); + + typedef HRESULT(WINAPI * PFN_DXGI_CREATE_DXGI_FACTORY2)(UINT, REFIID, void **); + PFN_DXGI_CREATE_DXGI_FACTORY2 dxgi_CreateDXGIFactory2 = (PFN_DXGI_CREATE_DXGI_FACTORY2)(void *)GetProcAddress(lib_dxgi, "CreateDXGIFactory2"); + ERR_FAIL_NULL_V(dxgi_CreateDXGIFactory2, ERR_CANT_CREATE); + + HRESULT res = dxgi_CreateDXGIFactory2(dxgi_factory_flags, IID_PPV_ARGS(&dxgi_factory)); ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); // Enumerate all possible adapters. diff --git a/drivers/d3d12/rendering_context_driver_d3d12.h b/drivers/d3d12/rendering_context_driver_d3d12.h index 694d0b3e4c..f74105ed3d 100644 --- a/drivers/d3d12/rendering_context_driver_d3d12.h +++ b/drivers/d3d12/rendering_context_driver_d3d12.h @@ -107,6 +107,9 @@ public: bool needs_resize = false; }; + HMODULE lib_d3d12 = nullptr; + HMODULE lib_dxgi = nullptr; + IDXGIAdapter1 *create_adapter(uint32_t p_adapter_index) const; ID3D12DeviceFactory *device_factory_get() const; IDXGIFactory2 *dxgi_factory_get() const; diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 6517b4e91b..287726f4db 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -530,7 +530,7 @@ static const D3D12_RESOURCE_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_RESOURCE_DIMENSIO D3D12_RESOURCE_DIMENSION_TEXTURE2D, }; -void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state, ID3D12Resource *p_resource_override) { +void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state) { DEV_ASSERT(p_subresource != UINT32_MAX); // We don't support an "all-resources" command here. #ifdef DEBUG_COUNT_BARRIERS @@ -538,9 +538,16 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso #endif ResourceInfo::States *res_states = p_resource->states_ptr; - D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource]; - ID3D12Resource *res_to_transition = p_resource_override ? p_resource_override : p_resource->resource; + if (p_new_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { + if (unlikely(!res_states->xfamily_fallback.subresources_dirty.is_empty())) { + uint32_t subres_qword = p_subresource / 64; + uint64_t subres_mask = (uint64_t(1) << (p_subresource % 64)); + res_states->xfamily_fallback.subresources_dirty[subres_qword] |= subres_mask; + } + } + + D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource]; // Transitions can be considered redundant if the current state has all the bits of the new state. // This check does not apply to the common state however, which must resort to checking if the state is the same (0). @@ -553,7 +560,7 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso if (res_barriers.size() < res_barriers_count + 1) { res_barriers.resize(res_barriers_count + 1); } - res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(res_to_transition); + res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(p_resource->resource); res_barriers_count++; res_states->last_batch_with_uav_barrier = res_barriers_batch; } @@ -563,7 +570,7 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso if (res_barriers_requests.has(res_states)) { BarrierRequest &br = res_barriers_requests.get(res_states); - DEV_ASSERT(br.dx_resource == res_to_transition); + DEV_ASSERT(br.dx_resource == p_resource->resource); DEV_ASSERT(br.subres_mask_qwords == STEPIFY(res_states->subresource_states.size(), 64) / 64); DEV_ASSERT(br.planes == p_num_planes); @@ -681,7 +688,7 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso } } else { BarrierRequest &br = res_barriers_requests[res_states]; - br.dx_resource = res_to_transition; + br.dx_resource = p_resource->resource; br.subres_mask_qwords = STEPIFY(p_resource->states_ptr->subresource_states.size(), 64) / 64; CRASH_COND(p_resource->states_ptr->subresource_states.size() > BarrierRequest::MAX_SUBRESOURCES); br.planes = p_num_planes; @@ -697,10 +704,6 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso } } - if (p_new_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { - res_states->last_batch_transitioned_to_uav = res_barriers_batch; - } - #ifdef DEBUG_COUNT_BARRIERS frame_barriers_cpu_time += OS::get_singleton()->get_ticks_usec() - start; #endif @@ -1135,19 +1138,13 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p // If views of different families are wanted, special setup is needed for proper sharing among them. // Two options here: - // 1. If ID3DDevice10 is present and driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). - // 2. Otherwise, fall back to an approach based on abusing aliasing, hoping for the best. [[CROSS_FAMILY_ALIASING]] - if (p_format.shareable_formats.size()) { - if (format_capabilities.relaxed_casting_supported) { - ComPtr<ID3D12Device10> device_10; - device->QueryInterface(device_10.GetAddressOf()); - if (device_10) { - relaxed_casting_available = true; - relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size()); - relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format; - relaxed_casting_format_count++; - } - } + // 1. If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA). + // 2. Otherwise, fall back to an approach based on having multiple versions of the resource and copying as needed. [[CROSS_FAMILY_FALLBACK]] + if (p_format.shareable_formats.size() && format_capabilities.relaxed_casting_supported) { + relaxed_casting_available = true; + relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size()); + relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format; + relaxed_casting_format_count++; } HashMap<DataFormat, D3D12_RESOURCE_FLAGS> aliases_forbidden_flags; @@ -1168,9 +1165,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p } if (cross_family_sharing && !relaxed_casting_available) { - // At least guarantee the same layout among aliases. - resource_desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; - // Per https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_texture_layout. if (p_format.texture_type == TEXTURE_TYPE_1D) { ERR_FAIL_V_MSG(TextureID(), "This texture's views require aliasing, but that's not supported for a 1D texture."); @@ -1221,9 +1215,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p // Create. D3D12MA::ALLOCATION_DESC allocation_desc = {}; - if (cross_family_sharing && !relaxed_casting_available) { - allocation_desc.Flags = D3D12MA::ALLOCATION_FLAG_CAN_ALIAS; - } allocation_desc.HeapType = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_DEFAULT; if ((resource_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) { allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; @@ -1343,53 +1334,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = main_uav_desc; uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; - // Create aliases if needed. [[CROSS_FAMILY_ALIASING]] - - using AliasEntry = Pair<DXGI_FORMAT, ID3D12Resource *>; - AliasEntry *aliases = nullptr; - uint32_t alias_count = 0; - if (cross_family_sharing && !relaxed_casting_available) { - aliases = ALLOCA_ARRAY(AliasEntry, p_format.shareable_formats.size()); - - for (int i = 0; i < p_format.shareable_formats.size(); i++) { - DataFormat curr_format = p_format.shareable_formats[i]; - - DXGI_FORMAT format_family = RD_TO_D3D12_FORMAT[curr_format].family; - if (format_family == RD_TO_D3D12_FORMAT[p_format.format].family) { - continue; - } - - D3D12_RESOURCE_DESC alias_resource_desc = *(D3D12_RESOURCE_DESC *)&resource_desc; - alias_resource_desc.Format = format_family; - clear_value.Format = format_family; - if ((alias_resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)) { - if (!texture_get_usages_supported_by_format(curr_format, false).has_flag(TEXTURE_USAGE_STORAGE_BIT)) { - alias_resource_desc.Flags &= ~D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - } - } - ID3D12Resource *alias = nullptr; - HRESULT res = allocator->CreateAliasingResource( - allocation.Get(), - 0, - &alias_resource_desc, - initial_state, - (alias_resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? clear_value_ptr : nullptr, - IID_PPV_ARGS(&alias)); - if (!SUCCEEDED(res)) { - for (uint32_t j = 0; j < alias_count; j++) { - aliases[j].second->Release(); - } - ERR_FAIL_V_MSG(TextureID(), "CreateAliasingResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - } - aliases[alias_count] = AliasEntry(format_family, alias); - alias_count++; - - if (curr_format == p_view.format) { - texture = alias; - } - } - } - // Bookkeep. TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator); @@ -1409,12 +1353,8 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p tex_info->mipmaps = resource_desc.MipLevels; tex_info->view_descs.srv = srv_desc; tex_info->view_descs.uav = uav_desc; - tex_info->main_texture = main_texture.Get(); - tex_info->aliasing_hack.main_uav_desc = main_uav_desc; - if (alias_count) { - for (uint32_t i = 0; i < alias_count; i++) { - tex_info->aliasing_hack.owner_info.aliases.insert(aliases[i].first, aliases[i].second); - } + if ((p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) { + textures_pending_clear.add(&tex_info->pending_clear); } return TextureID(tex_info); @@ -1425,75 +1365,59 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create_from_extension(uint64_ } RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) { - const TextureInfo *owner_tex_info = (const TextureInfo *)p_original_texture.id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND_V(!owner_tex_info->owner_info.allocation, TextureID()); -#endif - - ID3D12Resource *texture = nullptr; - if (owner_tex_info->aliasing_hack.owner_info.aliases.is_empty()) { - texture = owner_tex_info->resource; - } else { - texture = owner_tex_info->main_texture; - for (const KeyValue<DXGI_FORMAT, ComPtr<ID3D12Resource>> &E : owner_tex_info->aliasing_hack.owner_info.aliases) { - if (E.key == RD_TO_D3D12_FORMAT[p_view.format].family) { - texture = E.value.Get(); - break; - } - } - } - - // Describe views. - - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = owner_tex_info->view_descs.srv; - { - srv_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; - srv_desc.Shader4ComponentMapping = _compute_component_mapping(p_view); - } - - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = owner_tex_info->view_descs.uav; - { - uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; - } - - // Bookkeep. - - TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator); - tex_info->resource = texture; - tex_info->states_ptr = owner_tex_info->states_ptr; - tex_info->format = p_view.format; - tex_info->desc = owner_tex_info->desc; - tex_info->base_layer = owner_tex_info->base_layer; - tex_info->layers = owner_tex_info->layers; - tex_info->base_mip = owner_tex_info->base_mip; - tex_info->mipmaps = owner_tex_info->mipmaps; - tex_info->view_descs.srv = srv_desc; - tex_info->view_descs.uav = uav_desc; - tex_info->main_texture = owner_tex_info->main_texture; - tex_info->aliasing_hack.main_uav_desc = owner_tex_info->aliasing_hack.main_uav_desc; - - return TextureID(tex_info); + return _texture_create_shared_from_slice(p_original_texture, p_view, (TextureSliceType)-1, 0, 0, 0, 0); } RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) { - const TextureInfo *owner_tex_info = (const TextureInfo *)p_original_texture.id; + return _texture_create_shared_from_slice(p_original_texture, p_view, p_slice_type, p_layer, p_layers, p_mipmap, p_mipmaps); +} + +RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) { + TextureInfo *owner_tex_info = (TextureInfo *)p_original_texture.id; #ifdef DEBUG_ENABLED ERR_FAIL_COND_V(!owner_tex_info->owner_info.allocation, TextureID()); #endif - // Find appropriate resource instance. + ComPtr<ID3D12Resource> new_texture; + ComPtr<D3D12MA::Allocation> new_allocation; + ID3D12Resource *resource = nullptr; + CD3DX12_RESOURCE_DESC new_tex_resource_desc = owner_tex_info->desc; + bool cross_family = RD_TO_D3D12_FORMAT[p_view.format].family != RD_TO_D3D12_FORMAT[owner_tex_info->format].family; + if (cross_family && !format_capabilities.relaxed_casting_supported) { + // [[CROSS_FAMILY_FALLBACK]]. + // We have to create a new texture of the alternative format. - ID3D12Resource *texture = nullptr; - if (owner_tex_info->aliasing_hack.owner_info.aliases.is_empty()) { - texture = owner_tex_info->resource; - } else { - texture = owner_tex_info->main_texture; - for (const KeyValue<DXGI_FORMAT, ComPtr<ID3D12Resource>> &E : owner_tex_info->aliasing_hack.owner_info.aliases) { - if (E.key == RD_TO_D3D12_FORMAT[p_view.format].family) { - texture = E.value.Get(); - break; + D3D12MA::ALLOCATION_DESC allocation_desc = {}; + allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; + + if (p_slice_type != -1) { +#ifdef DEV_ENABLED + // Actual slicing is not contemplated. If ever needed, let's at least realize. + if (p_slice_type != -1) { + uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; + uint32_t slice_subresorce_count = p_mipmaps * p_layers; + DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); } +#endif + new_tex_resource_desc.DepthOrArraySize = p_layers; + new_tex_resource_desc.MipLevels = p_mipmaps; } + new_tex_resource_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].family; + new_tex_resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; // Alternative formats can only be used as SRVs. + + HRESULT res = allocator->CreateResource( + &allocation_desc, + &new_tex_resource_desc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + new_allocation.GetAddressOf(), + IID_PPV_ARGS(new_texture.GetAddressOf())); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), vformat("D3D12MA::CreateResource failed with error 0x%08ux.", (uint64_t)res)); + + resource = new_texture.Get(); + } else { + resource = owner_tex_info->resource; } // Describe views. @@ -1509,103 +1433,169 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared_from_slice(Text uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; } - // Complete description with slicing. - // (Leveraging aliasing in members of the union as much as possible.) - - srv_desc.Texture1D.MostDetailedMip = p_mipmap; - srv_desc.Texture1D.MipLevels = 1; - - uav_desc.Texture1D.MipSlice = p_mipmap; - - switch (p_slice_type) { - case TEXTURE_SLICE_2D: { - if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer == 0) { - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2D); - } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer == 0) { - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_UNKNOWN); - } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer)) || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + if (p_slice_type != -1) { + // Complete description with slicing. + + switch (p_slice_type) { + case TEXTURE_SLICE_2D: { + if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer == 0) { + srv_desc.Texture2D.MostDetailedMip = p_mipmap; + srv_desc.Texture2D.MipLevels = p_mipmaps; + + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2D); + uav_desc.Texture1D.MipSlice = p_mipmap; + } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer == 0) { + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_UNKNOWN); + } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer)) || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srv_desc.Texture2DArray.MostDetailedMip = p_mipmap; + srv_desc.Texture2DArray.MipLevels = p_mipmaps; + srv_desc.Texture2DArray.FirstArraySlice = p_layer; + srv_desc.Texture2DArray.ArraySize = 1; + srv_desc.Texture2DArray.PlaneSlice = 0; + srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f; + + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + uav_desc.Texture2DArray.MipSlice = p_mipmap; + uav_desc.Texture2DArray.FirstArraySlice = p_layer; + uav_desc.Texture2DArray.ArraySize = 1; + uav_desc.Texture2DArray.PlaneSlice = 0; + } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer))) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srv_desc.Texture2DMSArray.FirstArraySlice = p_layer; + srv_desc.Texture2DMSArray.ArraySize = 1; + + uav_desc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; + } else { + DEV_ASSERT(false); + } + } break; + case TEXTURE_SLICE_CUBEMAP: { + if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || p_layer == 0) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + srv_desc.TextureCube.MostDetailedMip = p_mipmap; + srv_desc.TextureCube.MipLevels = p_mipmaps; + + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); + uav_desc.Texture2DArray.MipSlice = p_mipmap; + uav_desc.Texture2DArray.FirstArraySlice = p_layer; + uav_desc.Texture2DArray.ArraySize = 6; + uav_desc.Texture2DArray.PlaneSlice = 0; + } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || p_layer != 0) { + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + srv_desc.TextureCubeArray.MostDetailedMip = p_mipmap; + srv_desc.TextureCubeArray.MipLevels = p_mipmaps; + srv_desc.TextureCubeArray.First2DArrayFace = p_layer; + srv_desc.TextureCubeArray.NumCubes = 1; + srv_desc.TextureCubeArray.ResourceMinLODClamp = 0.0f; + + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); + uav_desc.Texture2DArray.MipSlice = p_mipmap; + uav_desc.Texture2DArray.FirstArraySlice = p_layer; + uav_desc.Texture2DArray.ArraySize = 6; + uav_desc.Texture2DArray.PlaneSlice = 0; + } else { + DEV_ASSERT(false); + } + } break; + case TEXTURE_SLICE_3D: { + DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE3D); + srv_desc.Texture3D.MostDetailedMip = p_mipmap; + srv_desc.Texture3D.MipLevels = p_mipmaps; + + DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE3D); + uav_desc.Texture3D.MipSlice = p_mipmap; + uav_desc.Texture3D.WSize = -1; + } break; + case TEXTURE_SLICE_2D_ARRAY: { + DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY); + srv_desc.Texture2DArray.MostDetailedMip = p_mipmap; + srv_desc.Texture2DArray.MipLevels = p_mipmaps; srv_desc.Texture2DArray.FirstArraySlice = p_layer; - srv_desc.Texture2DArray.ArraySize = 1; - srv_desc.Texture2DArray.PlaneSlice = 0; - srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f; - - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; - uav_desc.Texture2DArray.FirstArraySlice = p_layer; - uav_desc.Texture2DArray.ArraySize = 1; - uav_desc.Texture2DArray.PlaneSlice = 0; - } else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer))) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - srv_desc.Texture2DMSArray.FirstArraySlice = p_layer; - srv_desc.Texture2DMSArray.ArraySize = 1; - - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; - } else { - DEV_ASSERT(false); - } - } break; - case TEXTURE_SLICE_CUBEMAP: { - if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || p_layer == 0) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - srv_desc.TextureCube.MostDetailedMip = p_mipmap; - srv_desc.TextureCube.MipLevels = 1; - - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); - uav_desc.Texture2DArray.MipSlice = p_mipmap; - uav_desc.Texture2DArray.FirstArraySlice = 0; - uav_desc.Texture2DArray.ArraySize = 6; - uav_desc.Texture2DArray.PlaneSlice = 0; - } else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || p_layer != 0) { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; - srv_desc.TextureCubeArray.MostDetailedMip = p_mipmap; - srv_desc.TextureCubeArray.MipLevels = 1; - srv_desc.TextureCubeArray.First2DArrayFace = p_layer; - srv_desc.TextureCubeArray.NumCubes = 1; - srv_desc.TextureCubeArray.ResourceMinLODClamp = 0.0f; + srv_desc.Texture2DArray.ArraySize = p_layers; DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); uav_desc.Texture2DArray.MipSlice = p_mipmap; uav_desc.Texture2DArray.FirstArraySlice = p_layer; - uav_desc.Texture2DArray.ArraySize = 6; - uav_desc.Texture2DArray.PlaneSlice = 0; - } else { - DEV_ASSERT(false); - } - } break; - case TEXTURE_SLICE_3D: { - DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE3D); - - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE3D); - uav_desc.Texture3D.WSize = -1; - } break; - case TEXTURE_SLICE_2D_ARRAY: { - DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY); - srv_desc.Texture2DArray.FirstArraySlice = p_layer; - srv_desc.Texture2DArray.ArraySize = p_layers; - - DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY); - uav_desc.Texture2DArray.FirstArraySlice = p_layer; - uav_desc.Texture2DArray.ArraySize = p_layers; - } break; - default: - break; + uav_desc.Texture2DArray.ArraySize = p_layers; + } break; + default: + break; + } } // Bookkeep. TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator); - tex_info->resource = texture; - tex_info->states_ptr = owner_tex_info->states_ptr; + tex_info->resource = resource; + if (new_texture.Get()) { + // [[CROSS_FAMILY_FALLBACK]]. + + DEV_ASSERT(cross_family && !format_capabilities.relaxed_casting_supported); + + uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers; +#ifdef DEV_ENABLED + // Actual slicing is not contemplated. If ever needed, let's at least realize. + if (p_slice_type != -1) { + uint32_t slice_subresorce_count = p_mipmaps * p_layers; + DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count); + } +#endif + + tex_info->owner_info.resource = new_texture; + tex_info->owner_info.allocation = new_allocation; + tex_info->owner_info.states.subresource_states.resize(new_texture_subresorce_count); + for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) { + tex_info->owner_info.states.subresource_states[i] = D3D12_RESOURCE_STATE_COPY_DEST; + } + tex_info->states_ptr = &tex_info->owner_info.states; + + ResourceInfo::States::CrossFamillyFallback &xfamily = owner_tex_info->owner_info.states.xfamily_fallback; + if (xfamily.subresources_dirty.is_empty()) { + uint32_t items_required = STEPIFY(new_texture_subresorce_count, sizeof(uint64_t)) / sizeof(uint64_t); + xfamily.subresources_dirty.resize(items_required); + memset(xfamily.subresources_dirty.ptr(), 255, sizeof(uint64_t) * xfamily.subresources_dirty.size()); + + // Create buffer for non-direct copy if it's a format not supporting reinterpret-copy. + DEV_ASSERT(!xfamily.interim_buffer.Get()); + if (owner_tex_info->format == DATA_FORMAT_R16_UINT && p_view.format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) { + uint32_t row_pitch = STEPIFY(owner_tex_info->desc.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + uint32_t buffer_size = sizeof(uint16_t) * row_pitch * owner_tex_info->desc.Height * owner_tex_info->desc.Depth(); + CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(buffer_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); + resource_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + + D3D12MA::ALLOCATION_DESC allocation_desc = {}; + allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + + HRESULT res = allocator->CreateResource( + &allocation_desc, + &resource_desc, + D3D12_RESOURCE_STATE_COPY_SOURCE, // Makes the code that makes the copy easier. + nullptr, + xfamily.interim_buffer_alloc.GetAddressOf(), + IID_PPV_ARGS(xfamily.interim_buffer.GetAddressOf())); + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + } + } + } else { + tex_info->states_ptr = owner_tex_info->states_ptr; + } tex_info->format = p_view.format; - tex_info->desc = owner_tex_info->desc; - tex_info->base_layer = p_layer; - tex_info->layers = p_layers; - tex_info->base_mip = p_mipmap; - tex_info->mipmaps = p_mipmaps; + tex_info->desc = new_tex_resource_desc; + if (p_slice_type == -1) { + tex_info->base_layer = owner_tex_info->base_layer; + tex_info->layers = owner_tex_info->layers; + tex_info->base_mip = owner_tex_info->base_mip; + tex_info->mipmaps = owner_tex_info->mipmaps; + } else { + tex_info->base_layer = p_layer; + tex_info->layers = p_layers; + tex_info->base_mip = p_mipmap; + tex_info->mipmaps = p_mipmaps; + } tex_info->view_descs.srv = srv_desc; tex_info->view_descs.uav = uav_desc; - tex_info->main_texture = owner_tex_info->main_texture; - tex_info->aliasing_hack.main_uav_desc = owner_tex_info->aliasing_hack.main_uav_desc; + tex_info->main_texture = owner_tex_info; return TextureID(tex_info); } @@ -1860,7 +1850,7 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier( VectorView<RDD::BufferBarrier> p_buffer_barriers, VectorView<RDD::TextureBarrier> p_texture_barriers) { if (p_src_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT) && p_dst_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT)) { - // Looks like the intent is a a full barrier. + // Looks like the intent is a full barrier. // In the resource barriers world, we can force a full barrier by discarding some resource, as per // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#synchronous-copy-discard-and-resolve. const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; @@ -1955,48 +1945,44 @@ RDD::CommandQueueID RenderingDeviceDriverD3D12::command_queue_create(CommandQueu return CommandQueueID(command_queue); } -Error RenderingDeviceDriverD3D12::command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) { +Error RenderingDeviceDriverD3D12::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) { CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id); for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { const SemaphoreInfo *semaphore = (const SemaphoreInfo *)(p_wait_semaphores[i].id); command_queue->d3d_queue->Wait(semaphore->d3d_fence.Get(), semaphore->fence_value); } - thread_local LocalVector<ID3D12CommandList *> command_lists; - command_lists.resize(p_cmd_buffers.size()); - for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffers[i].id); - command_lists[i] = cmd_buf_info->cmd_list.Get(); - } + if (p_cmd_buffers.size() > 0) { + thread_local LocalVector<ID3D12CommandList *> command_lists; + command_lists.resize(p_cmd_buffers.size()); + for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffers[i].id); + command_lists[i] = cmd_buf_info->cmd_list.Get(); + } - command_queue->d3d_queue->ExecuteCommandLists(command_lists.size(), command_lists.ptr()); + command_queue->d3d_queue->ExecuteCommandLists(command_lists.size(), command_lists.ptr()); - for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) { - SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_signal_semaphores[i].id); - semaphore->fence_value++; - command_queue->d3d_queue->Signal(semaphore->d3d_fence.Get(), semaphore->fence_value); - } + for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) { + SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_cmd_semaphores[i].id); + semaphore->fence_value++; + command_queue->d3d_queue->Signal(semaphore->d3d_fence.Get(), semaphore->fence_value); + } - if (p_signal_fence) { - FenceInfo *fence = (FenceInfo *)(p_signal_fence.id); - fence->fence_value++; - command_queue->d3d_queue->Signal(fence->d3d_fence.Get(), fence->fence_value); - fence->d3d_fence->SetEventOnCompletion(fence->fence_value, fence->event_handle); + if (p_cmd_fence) { + FenceInfo *fence = (FenceInfo *)(p_cmd_fence.id); + fence->fence_value++; + command_queue->d3d_queue->Signal(fence->d3d_fence.Get(), fence->fence_value); + fence->d3d_fence->SetEventOnCompletion(fence->fence_value, fence->event_handle); + } } - return OK; -} - -Error RenderingDeviceDriverD3D12::command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) { - // D3D12 does not require waiting for the command queue's semaphores to handle presentation. - // We just present the swap chains that were specified and ignore the command queue and the semaphores. HRESULT res; bool any_present_failed = false; for (uint32_t i = 0; i < p_swap_chains.size(); i++) { SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); res = swap_chain->d3d_swap_chain->Present(swap_chain->sync_interval, swap_chain->present_flags); if (!SUCCEEDED(res)) { - print_verbose("D3D12: Presenting swapchain failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + print_verbose(vformat("D3D12: Presenting swapchain failed with error 0x%08ux.", (uint64_t)res)); any_present_failed = true; } } @@ -2356,6 +2342,14 @@ D3D12_RENDER_TARGET_VIEW_DESC RenderingDeviceDriverD3D12::_make_rtv_for_texture( rtv_desc.Texture3D.FirstWSlice = 0; rtv_desc.Texture3D.WSize = -1; } break; + case D3D12_SRV_DIMENSION_TEXTURECUBE: + case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: { + rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + rtv_desc.Texture2DArray.MipSlice = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset; + rtv_desc.Texture2DArray.FirstArraySlice = (p_add_bases ? p_texture_info->base_layer : 0) + p_layer_offset; + rtv_desc.Texture2DArray.ArraySize = p_layers == UINT32_MAX ? p_texture_info->layers : p_layers; + rtv_desc.Texture2DArray.PlaneSlice = 0; + } break; default: { DEV_ASSERT(false); } @@ -2364,6 +2358,36 @@ D3D12_RENDER_TARGET_VIEW_DESC RenderingDeviceDriverD3D12::_make_rtv_for_texture( return rtv_desc; } +D3D12_UNORDERED_ACCESS_VIEW_DESC RenderingDeviceDriverD3D12::_make_ranged_uav_for_texture(const TextureInfo *p_texture_info, uint32_t p_mipmap_offset, uint32_t p_layer_offset, uint32_t p_layers, bool p_add_bases) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = p_texture_info->view_descs.uav; + + uint32_t mip = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset; + switch (p_texture_info->view_descs.uav.ViewDimension) { + case D3D12_UAV_DIMENSION_TEXTURE1D: { + uav_desc.Texture1DArray.MipSlice = mip; + } break; + case D3D12_UAV_DIMENSION_TEXTURE1DARRAY: { + uav_desc.Texture1DArray.MipSlice = mip; + uav_desc.Texture1DArray.FirstArraySlice = mip; + uav_desc.Texture1DArray.ArraySize = p_layers; + } break; + case D3D12_UAV_DIMENSION_TEXTURE2D: { + uav_desc.Texture2D.MipSlice = mip; + } break; + case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: { + uav_desc.Texture2DArray.MipSlice = mip; + uav_desc.Texture2DArray.FirstArraySlice = (p_add_bases ? p_texture_info->base_layer : 0) + p_layer_offset; + uav_desc.Texture2DArray.ArraySize = p_layers; + } break; + case D3D12_UAV_DIMENSION_TEXTURE3D: { + uav_desc.Texture3D.MipSlice = mip; + uav_desc.Texture3D.WSize >>= p_mipmap_offset; + } break; + } + + return uav_desc; +} + D3D12_DEPTH_STENCIL_VIEW_DESC RenderingDeviceDriverD3D12::_make_dsv_for_texture(const TextureInfo *p_texture_info) { D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {}; dsv_desc.Format = RD_TO_D3D12_FORMAT[p_texture_info->format].dsv_format; @@ -2889,7 +2913,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec uint32_t binding = (p_register % GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER) / GODOT_NIR_BINDING_MULTIPLIER; DEV_ASSERT(set < (uint32_t)shader_data_in.sets_bindings.size()); - bool found = false; + [[maybe_unused]] bool found = false; for (int j = 0; j < shader_data_in.sets_bindings[set].size(); j++) { if (shader_data_in.sets_bindings[set][j].binding != binding) { continue; @@ -2907,7 +2931,6 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec } else { CRASH_NOW(); } - found = true; break; } @@ -2917,8 +2940,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec godot_nir_callbacks.report_sc_bit_offset_fn = [](uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data) { ShaderData &shader_data_in = *(ShaderData *)p_data; - - bool found = false; + [[maybe_unused]] bool found = false; for (int j = 0; j < shader_data_in.specialization_constants.size(); j++) { if (shader_data_in.specialization_constants[j].constant_id != p_sc_id) { continue; @@ -2927,7 +2949,6 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[shader_data_in.stage]; DEV_ASSERT(shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] == 0); shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] = p_bit_offset; - found = true; break; } @@ -3201,7 +3222,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec root_sig_desc.Init_1_1(root_params.size(), root_params.ptr(), 0, nullptr, root_sig_flags); ComPtr<ID3DBlob> error_blob; - HRESULT res = D3DX12SerializeVersionedRootSignature(&root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_1, root_sig_blob.GetAddressOf(), error_blob.GetAddressOf()); + HRESULT res = D3DX12SerializeVersionedRootSignature(context_driver->lib_d3d12, &root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_1, root_sig_blob.GetAddressOf(), error_blob.GetAddressOf()); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), Vector<uint8_t>(), "Serialization of root signature failed with error " + vformat("0x%08ux", (uint64_t)res) + " and the following message:\n" + String((char *)error_blob->GetBufferPointer(), error_blob->GetBufferSize())); @@ -3462,7 +3483,10 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect const uint8_t *root_sig_data_ptr = binptr + read_offset; - HRESULT res = D3D12CreateRootSignatureDeserializer(root_sig_data_ptr, binary_data.root_signature_len, IID_PPV_ARGS(shader_info_in.root_signature_deserializer.GetAddressOf())); + PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER d3d_D3D12CreateRootSignatureDeserializer = (PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12CreateRootSignatureDeserializer"); + ERR_FAIL_NULL_V(d3d_D3D12CreateRootSignatureDeserializer, ShaderID()); + + HRESULT res = d3d_D3D12CreateRootSignatureDeserializer(root_sig_data_ptr, binary_data.root_signature_len, IID_PPV_ARGS(shader_info_in.root_signature_deserializer.GetAddressOf())); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ShaderID(), "D3D12CreateRootSignatureDeserializer failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); read_offset += binary_data.root_signature_len; @@ -3786,6 +3810,21 @@ void RenderingDeviceDriverD3D12::uniform_set_free(UniformSetID p_uniform_set) { // ----- COMMANDS ----- void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + // Perform pending blackouts. + { + SelfList<TextureInfo> *E = textures_pending_clear.first(); + while (E) { + TextureSubresourceRange subresources; + subresources.layer_count = E->self()->layers; + subresources.mipmap_count = E->self()->mipmaps; + command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_GENERAL, Color(), subresources); + + SelfList<TextureInfo> *next = E->next(); + E->remove_from_list(); + E = next; + } + } + const UniformSetInfo *uniform_set_info = (const UniformSetInfo *)p_uniform_set.id; const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id; const ShaderInfo::UniformSet &shader_set = shader_info_in->sets[p_set_index]; @@ -3832,13 +3871,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff } DEV_ASSERT((wanted_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) == (bool)(wanted_state & D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); - - if (wanted_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS || wanted_state == D3D12_RESOURCE_STATE_RENDER_TARGET) { - if (!sr.is_buffer) { - TextureInfo *tex_info = (TextureInfo *)sr.resource; - CRASH_COND_MSG(tex_info->resource != tex_info->main_texture, "The texture format used for UAV or RTV must be the main one."); - } - } } } #endif @@ -3918,7 +3950,35 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff for (uint32_t i = 0; i < tex_info->layers; i++) { for (uint32_t j = 0; j < tex_info->mipmaps; j++) { uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, subresource, planes, wanted_state, tex_info->main_texture); + + if ((wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) { + // [[CROSS_FAMILY_FALLBACK]]. + if (tex_info->owner_info.resource && tex_info->main_texture && tex_info->main_texture != tex_info) { + uint32_t subres_qword = subresource / 64; + uint64_t subres_mask = (uint64_t(1) << (subresource % 64)); + if ((tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] & subres_mask)) { + // Prepare for copying the write-to texture to this one, if out-of-date. + _resource_transition_batch(tex_info->main_texture, subresource, planes, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(tex_info, subresource, planes, D3D12_RESOURCE_STATE_COPY_DEST); + + CommandBufferInfo::FamilyFallbackCopy ffc; + ffc.texture = tex_info; + ffc.subresource = subresource; + ffc.mipmap = j; + ffc.dst_wanted_state = wanted_state; + + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + cmd_buf_info->family_fallback_copies.resize(cmd_buf_info->family_fallback_copies.size() + 1); + cmd_buf_info->family_fallback_copies[cmd_buf_info->family_fallback_copy_count] = ffc; + cmd_buf_info->family_fallback_copy_count++; + + tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] &= ~subres_mask; + } + continue; + } + } + + _resource_transition_batch(tex_info, subresource, planes, wanted_state); } } } @@ -3927,7 +3987,56 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff } if (p_set_index == shader_info_in->sets.size() - 1) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + + // [[CROSS_FAMILY_FALLBACK]]. + for (uint32_t i = 0; i < cmd_buf_info->family_fallback_copy_count; i++) { + const CommandBufferInfo::FamilyFallbackCopy &ffc = cmd_buf_info->family_fallback_copies[i]; + + D3D12_TEXTURE_COPY_LOCATION dst_tex = {}; + dst_tex.pResource = ffc.texture->resource; + dst_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_tex.SubresourceIndex = ffc.subresource; + + D3D12_TEXTURE_COPY_LOCATION src_tex = {}; + src_tex.pResource = ffc.texture->main_texture->resource; + src_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_tex.SubresourceIndex = ffc.subresource; + + const ResourceInfo::States::CrossFamillyFallback &xfamily = ffc.texture->main_texture->owner_info.states.xfamily_fallback; + if (xfamily.interim_buffer.Get()) { + // Must copy via a buffer due to reinterpret-copy known not to be available for these data types. + D3D12_TEXTURE_COPY_LOCATION buf_loc = {}; + buf_loc.pResource = xfamily.interim_buffer.Get(); + buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + buf_loc.PlacedFootprint.Offset = 0; + buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->main_texture->desc.Format; + buf_loc.PlacedFootprint.Footprint.Width = MAX(1u, ffc.texture->main_texture->desc.Width >> ffc.mipmap); + buf_loc.PlacedFootprint.Footprint.Height = MAX(1u, ffc.texture->main_texture->desc.Height >> ffc.mipmap); + buf_loc.PlacedFootprint.Footprint.Depth = MAX(1u, (uint32_t)ffc.texture->main_texture->desc.Depth() >> ffc.mipmap); + buf_loc.PlacedFootprint.Footprint.RowPitch = STEPIFY(buf_loc.PlacedFootprint.Footprint.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST); + cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); + + cmd_buf_info->cmd_list->CopyTextureRegion(&buf_loc, 0, 0, 0, &src_tex, nullptr); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE); + cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier); + + buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->desc.Format; + cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &buf_loc, nullptr); + } else { + // Direct copy is possible. + cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &src_tex, nullptr); + } + + // Set the specific SRV state we wanted from the beginning to the alternative version of the texture. + _resource_transition_batch(ffc.texture, ffc.subresource, 1, ffc.dst_wanted_state); + } + cmd_buf_info->family_fallback_copy_count = 0; + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); } } @@ -4241,17 +4350,17 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff frames[frame_idx].desc_heap_walkers.aux.advance(); } -void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) { +void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_buf_locfer, VectorView<BufferCopyRegion> p_regions) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id; - BufferInfo *dst_buf_info = (BufferInfo *)p_dst_buffer.id; + BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id; _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(dst_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); for (uint32_t i = 0; i < p_regions.size(); i++) { - cmd_buf_info->cmd_list->CopyBufferRegion(dst_buf_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size); + cmd_buf_info->cmd_list->CopyBufferRegion(buf_loc_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size); } } @@ -4319,12 +4428,29 @@ void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_b void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_texture.id; + if (tex_info->main_texture) { + tex_info = tex_info->main_texture; + } + + auto _transition_subresources = [&](D3D12_RESOURCE_STATES p_new_state) { + for (uint32_t i = 0; i < p_subresources.layer_count; i++) { + for (uint32_t j = 0; j < p_subresources.mipmap_count; j++) { + UINT subresource = D3D12CalcSubresource( + p_subresources.base_mipmap + j, + p_subresources.base_layer + i, + 0, + tex_info->desc.MipLevels, + tex_info->desc.ArraySize()); + _resource_transition_batch(tex_info, subresource, 1, p_new_state); + } + } + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + }; - ID3D12Resource *res_to_clear = tex_info->main_texture ? tex_info->main_texture : tex_info->resource; if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { // Clear via RTV. - if (frames[frame_idx].desc_heap_walkers.rtv.is_at_eof()) { + if (frames[frame_idx].desc_heap_walkers.rtv.get_free_handles() < p_subresources.mipmap_count) { if (!frames[frame_idx].desc_heaps_exhausted_reported.rtv) { frames[frame_idx].desc_heaps_exhausted_reported.rtv = true; ERR_FAIL_MSG( @@ -4335,37 +4461,29 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c } } - D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap, p_subresources.base_layer, p_subresources.layer_count, false); - rtv_desc.Format = tex_info->aliasing_hack.main_uav_desc.Format; + _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET); - for (uint32_t i = 0; i < p_subresources.layer_count; i++) { - for (uint32_t j = 0; j < p_subresources.mipmap_count; j++) { - UINT subresource = D3D12CalcSubresource( - p_subresources.base_mipmap + j, - p_subresources.base_layer + i, - 0, - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, subresource, 1, D3D12_RESOURCE_STATE_RENDER_TARGET, tex_info->main_texture); - } - } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) { + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); + rtv_desc.Format = tex_info->view_descs.uav.Format; + device->CreateRenderTargetView( + tex_info->resource, + &rtv_desc, + frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle()); - device->CreateRenderTargetView( - res_to_clear, - &rtv_desc, - frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle()); - cmd_buf_info->cmd_list->ClearRenderTargetView( - frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle(), - p_color.components, - 0, - nullptr); - frames[frame_idx].desc_heap_walkers.rtv.advance(); + cmd_buf_info->cmd_list->ClearRenderTargetView( + frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle(), + p_color.components, + 0, + nullptr); + + frames[frame_idx].desc_heap_walkers.rtv.advance(); + } } else { // Clear via UAV. _command_check_descriptor_sets(p_cmd_buffer); - if (frames[frame_idx].desc_heap_walkers.resources.is_at_eof()) { + if (frames[frame_idx].desc_heap_walkers.resources.get_free_handles() < p_subresources.mipmap_count) { if (!frames[frame_idx].desc_heaps_exhausted_reported.resources) { frames[frame_idx].desc_heaps_exhausted_reported.resources = true; ERR_FAIL_MSG( @@ -4375,7 +4493,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c return; } } - if (frames[frame_idx].desc_heap_walkers.aux.is_at_eof()) { + if (frames[frame_idx].desc_heap_walkers.aux.get_free_handles() < p_subresources.mipmap_count) { if (!frames[frame_idx].desc_heaps_exhausted_reported.aux) { frames[frame_idx].desc_heaps_exhausted_reported.aux = true; ERR_FAIL_MSG( @@ -4386,47 +4504,38 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c } } - for (uint32_t i = 0; i < p_subresources.layer_count; i++) { - for (uint32_t j = 0; j < p_subresources.mipmap_count; j++) { - UINT subresource = D3D12CalcSubresource( - p_subresources.base_mipmap + j, - p_subresources.base_layer + i, - 0, - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, subresource, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, tex_info->main_texture); - } - } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); - - device->CreateUnorderedAccessView( - res_to_clear, - nullptr, - &tex_info->aliasing_hack.main_uav_desc, - frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle()); - - device->CopyDescriptorsSimple( - 1, - frames[frame_idx].desc_heap_walkers.resources.get_curr_cpu_handle(), - frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - - UINT values[4] = { - (UINT)p_color.get_r8(), - (UINT)p_color.get_g8(), - (UINT)p_color.get_b8(), - (UINT)p_color.get_a8(), - }; - cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint( - frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(), - frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(), - res_to_clear, - values, - 0, - nullptr); + _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = _make_ranged_uav_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false); + device->CreateUnorderedAccessView( + tex_info->resource, + nullptr, + &uav_desc, + frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle()); + device->CopyDescriptorsSimple( + 1, + frames[frame_idx].desc_heap_walkers.resources.get_curr_cpu_handle(), + frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + UINT values[4] = { + (UINT)p_color.get_r8(), + (UINT)p_color.get_g8(), + (UINT)p_color.get_b8(), + (UINT)p_color.get_a8(), + }; + cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint( + frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(), + frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(), + tex_info->resource, + values, + 0, + nullptr); - frames[frame_idx].desc_heap_walkers.resources.advance(); - frames[frame_idx].desc_heap_walkers.aux.advance(); + frames[frame_idx].desc_heap_walkers.resources.advance(); + frames[frame_idx].desc_heap_walkers.aux.advance(); + } } } @@ -4436,7 +4545,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id; if (buf_info->flags.is_for_upload) { - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE, nullptr); + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } uint32_t pixel_size = get_image_format_pixel_size(tex_info->format); @@ -4472,7 +4581,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID tex_info->desc.ArraySize()); CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); - _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST, tex_info->main_texture); + _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); } _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); @@ -4497,12 +4606,12 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID } } -void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) { +void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_buf_locfer, VectorView<BufferTextureCopyRegion> p_regions) { const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_src_texture.id; - BufferInfo *buf_info = (BufferInfo *)p_dst_buffer.id; + BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id; - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST, nullptr); + _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); uint32_t block_w = 0, block_h = 0; get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h); @@ -4516,7 +4625,7 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE, tex_info->main_texture); + _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); @@ -4664,7 +4773,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd 0, p_texture_info->desc.MipLevels, p_texture_info->desc.ArraySize()); - _resource_transition_batch(p_texture_info, subresource, planes, p_states, nullptr); + _resource_transition_batch(p_texture_info, subresource, planes, p_states); } } }; @@ -5927,17 +6036,23 @@ Error RenderingDeviceDriverD3D12::_initialize_device() { HRESULT res; if (is_in_developer_mode()) { + typedef HRESULT(WINAPI * PFN_D3D12_ENABLE_EXPERIMENTAL_FEATURES)(_In_ UINT, _In_count_(NumFeatures) const IID *, _In_opt_count_(NumFeatures) void *, _In_opt_count_(NumFeatures) UINT *); + PFN_D3D12_ENABLE_EXPERIMENTAL_FEATURES d3d_D3D12EnableExperimentalFeatures = (PFN_D3D12_ENABLE_EXPERIMENTAL_FEATURES)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12EnableExperimentalFeatures"); + ERR_FAIL_NULL_V(d3d_D3D12EnableExperimentalFeatures, ERR_CANT_CREATE); + UUID experimental_features[] = { D3D12ExperimentalShaderModels }; - D3D12EnableExperimentalFeatures(1, experimental_features, nullptr, nullptr); + d3d_D3D12EnableExperimentalFeatures(1, experimental_features, nullptr, nullptr); } ID3D12DeviceFactory *device_factory = context_driver->device_factory_get(); if (device_factory != nullptr) { res = device_factory->CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device.GetAddressOf())); } else { - res = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device.GetAddressOf())); - } + PFN_D3D12_CREATE_DEVICE d3d_D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12CreateDevice"); + ERR_FAIL_NULL_V(d3d_D3D12CreateDevice, ERR_CANT_CREATE); + res = d3d_D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(device.GetAddressOf())); + } ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12CreateDevice failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); if (context_driver->use_validation_layers()) { @@ -6036,20 +6151,44 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { multiview_capabilities.is_supported = false; subgroup_capabilities.size = 0; subgroup_capabilities.wave_ops_supported = false; - shader_capabilities.shader_model = D3D_SHADER_MODEL_6_0; + shader_capabilities.shader_model = (D3D_SHADER_MODEL)0; shader_capabilities.native_16bit_ops = false; storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = false; format_capabilities.relaxed_casting_supported = false; - // Check shader model. - D3D12_FEATURE_DATA_SHADER_MODEL shader_model = {}; - shader_model.HighestShaderModel = MIN(D3D_HIGHEST_SHADER_MODEL, D3D_SHADER_MODEL_6_6); - res = device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + { + static const D3D_SHADER_MODEL SMS_TO_CHECK[] = { + D3D_SHADER_MODEL_6_6, + D3D_SHADER_MODEL_6_5, + D3D_SHADER_MODEL_6_4, + D3D_SHADER_MODEL_6_3, + D3D_SHADER_MODEL_6_2, + D3D_SHADER_MODEL_6_1, + D3D_SHADER_MODEL_6_0, // Determined by NIR (dxil_min_shader_model). + }; + + D3D12_FEATURE_DATA_SHADER_MODEL shader_model = {}; + for (uint32_t i = 0; i < ARRAY_SIZE(SMS_TO_CHECK); i++) { + shader_model.HighestShaderModel = SMS_TO_CHECK[i]; + res = device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); + if (SUCCEEDED(res)) { + shader_capabilities.shader_model = shader_model.HighestShaderModel; + break; + } + if (res == E_INVALIDARG) { + continue; // Must assume the device doesn't know about the SM just checked. + } + ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + } + +#define D3D_SHADER_MODEL_TO_STRING(m_sm) vformat("%d.%d", (m_sm >> 4), (m_sm & 0xf)) - shader_capabilities.shader_model = shader_model.HighestShaderModel; - print_verbose("- Shader:"); - print_verbose(" model: " + itos(shader_capabilities.shader_model >> 4) + "." + itos(shader_capabilities.shader_model & 0xf)); + ERR_FAIL_COND_V_MSG(!shader_capabilities.shader_model, ERR_UNAVAILABLE, + vformat("No support for any of the suitable shader models (%s-%s) has been found.", D3D_SHADER_MODEL_TO_STRING(SMS_TO_CHECK[ARRAY_SIZE(SMS_TO_CHECK) - 1]), D3D_SHADER_MODEL_TO_STRING(SMS_TO_CHECK[0]))); + + print_verbose("- Shader:"); + print_verbose(" model: " + D3D_SHADER_MODEL_TO_STRING(shader_capabilities.shader_model)); + } D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); @@ -6132,7 +6271,15 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { } if (format_capabilities.relaxed_casting_supported) { +#if 0 print_verbose("- Relaxed casting supported"); +#else + // Certain configurations (Windows 11 with an updated Nvida driver) crash when using relaxed casting. + // Therefore, we disable it temporarily until we can assure that it's reliable. + // There are fallbacks in place that work in every case, if less efficient. + format_capabilities.relaxed_casting_supported = false; + print_verbose("- Relaxed casting supported (but disabled for now)"); +#endif } else { print_verbose("- Relaxed casting not supported"); } @@ -6178,6 +6325,7 @@ Error RenderingDeviceDriverD3D12::_initialize_allocator() { D3D12MA::ALLOCATOR_DESC allocator_desc = {}; allocator_desc.pDevice = device.Get(); allocator_desc.pAdapter = adapter.Get(); + allocator_desc.Flags = D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED; HRESULT res = D3D12MA::CreateAllocator(&allocator_desc, &allocator); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateAllocator failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 595ee30966..852cb9db0e 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -33,6 +33,7 @@ #include "core/templates/hash_map.h" #include "core/templates/paged_allocator.h" +#include "core/templates/self_list.h" #include "servers/rendering/rendering_device_driver.h" #if defined(__GNUC__) && !defined(__clang__) @@ -215,17 +216,21 @@ private: struct States { // As many subresources as mipmaps * layers; planes (for depth-stencil) are tracked together. TightLocalVector<D3D12_RESOURCE_STATES> subresource_states; // Used only if not a view. - uint32_t last_batch_transitioned_to_uav = 0; uint32_t last_batch_with_uav_barrier = 0; + struct CrossFamillyFallback { + TightLocalVector<uint64_t> subresources_dirty; + ComPtr<ID3D12Resource> interim_buffer; + ComPtr<D3D12MA::Allocation> interim_buffer_alloc; + } xfamily_fallback; // [[CROSS_FAMILY_FALLBACK]]. }; - ID3D12Resource *resource = nullptr; // Non-null even if a view. + ID3D12Resource *resource = nullptr; // Non-null even if not owned. struct { ComPtr<ID3D12Resource> resource; ComPtr<D3D12MA::Allocation> allocation; States states; - } owner_info; // All empty if a view. - States *states_ptr = nullptr; // Own or from another if a view. + } owner_info; // All empty if the resource is not owned. + States *states_ptr = nullptr; // Own or from another if it doesn't own the D3D12 resource. }; struct BarrierRequest { @@ -257,7 +262,7 @@ private: uint64_t frame_barriers_cpu_time = 0; #endif - void _resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state, ID3D12Resource *p_resource_override = nullptr); + void _resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state); void _resource_transitions_flush(ID3D12GraphicsCommandList *p_cmd_list); /*****************/ @@ -298,16 +303,12 @@ private: D3D12_UNORDERED_ACCESS_VIEW_DESC uav; } view_descs = {}; - ID3D12Resource *main_texture = nullptr; - struct { - D3D12_UNORDERED_ACCESS_VIEW_DESC main_uav_desc; - struct { - HashMap<DXGI_FORMAT, ComPtr<ID3D12Resource>> aliases; // Key is the DXGI format family. - } owner_info = {}; - } aliasing_hack = {}; // [[CROSS_FAMILY_ALIASING]] + TextureInfo *main_texture = nullptr; UINT mapped_subresource = UINT_MAX; + SelfList<TextureInfo> pending_clear{ this }; }; + SelfList<TextureInfo>::List textures_pending_clear; HashMap<DXGI_FORMAT, uint32_t> format_sample_counts_mask_cache; @@ -331,6 +332,10 @@ public: virtual void texture_unmap(TextureID p_texture) override final; virtual BitField<TextureUsageBits> texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; +private: + TextureID _texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps); + +public: /*****************/ /**** SAMPLER ****/ /*****************/ @@ -413,8 +418,7 @@ private: public: virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override; - virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) override; - virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) override; + virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) override; virtual void command_queue_free(CommandQueueID p_cmd_queue) override; private: @@ -460,6 +464,16 @@ private: RenderPassState render_pass_state; bool descriptor_heaps_set = false; + + // [[CROSS_FAMILY_FALLBACK]]. + struct FamilyFallbackCopy { + TextureInfo *texture = nullptr; + uint32_t subresource = 0; + uint32_t mipmap = 0; + D3D12_RESOURCE_STATES dst_wanted_state = {}; + }; + LocalVector<FamilyFallbackCopy> family_fallback_copies; + uint32_t family_fallback_copy_count = 0; }; public: @@ -514,6 +528,7 @@ private: }; D3D12_RENDER_TARGET_VIEW_DESC _make_rtv_for_texture(const TextureInfo *p_texture_info, uint32_t p_mipmap_offset, uint32_t p_layer_offset, uint32_t p_layers, bool p_add_bases = true); + D3D12_UNORDERED_ACCESS_VIEW_DESC _make_ranged_uav_for_texture(const TextureInfo *p_texture_info, uint32_t p_mipmap_offset, uint32_t p_layer_offset, uint32_t p_layers, bool p_add_bases = true); D3D12_DEPTH_STENCIL_VIEW_DESC _make_dsv_for_texture(const TextureInfo *p_texture_info); FramebufferID _framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height, bool p_is_screen); @@ -759,6 +774,7 @@ public: virtual void command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) override final; virtual void command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) override final; +public: virtual void command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) override final; virtual void command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) override final; diff --git a/drivers/gles3/effects/copy_effects.cpp b/drivers/gles3/effects/copy_effects.cpp index 29e7de873b..6e64652982 100644 --- a/drivers/gles3/effects/copy_effects.cpp +++ b/drivers/gles3/effects/copy_effects.cpp @@ -155,12 +155,14 @@ void CopyEffects::copy_to_and_from_rect(const Rect2 &p_rect) { draw_screen_quad(); } -void CopyEffects::copy_screen() { - bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_DEFAULT); +void CopyEffects::copy_screen(float p_multiply) { + bool success = copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_SCREEN); if (!success) { return; } + copy.shader.version_set_uniform(CopyShaderGLES3::MULTIPLY, p_multiply, copy.shader_version, CopyShaderGLES3::MODE_SCREEN); + draw_screen_triangle(); } @@ -205,8 +207,8 @@ void CopyEffects::bilinear_blur(GLuint p_source_texture, int p_mipmap_count, con glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffers[i % 2]); source_region = dest_region; } - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDeleteFramebuffers(2, framebuffers); } @@ -272,7 +274,7 @@ void CopyEffects::gaussian_blur(GLuint p_source_texture, int p_mipmap_count, con source_region = dest_region; normalized_source_region = normalized_dest_region; } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDeleteFramebuffers(1, &framebuffer); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); diff --git a/drivers/gles3/effects/copy_effects.h b/drivers/gles3/effects/copy_effects.h index e65ebbce03..1f7b3ee689 100644 --- a/drivers/gles3/effects/copy_effects.h +++ b/drivers/gles3/effects/copy_effects.h @@ -33,7 +33,7 @@ #ifdef GLES3_ENABLED -#include "drivers/gles3/shaders/copy.glsl.gen.h" +#include "drivers/gles3/shaders/effects/copy.glsl.gen.h" namespace GLES3 { @@ -64,7 +64,7 @@ public: void copy_to_rect(const Rect2 &p_rect); void copy_to_rect_3d(const Rect2 &p_rect, float p_layer, int p_type, float p_lod = 0.0f); void copy_to_and_from_rect(const Rect2 &p_rect); - void copy_screen(); + void copy_screen(float p_multiply = 1.0); void copy_cube_to_rect(const Rect2 &p_rect); void copy_cube_to_panorama(float p_mip_level); void bilinear_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region); diff --git a/drivers/gles3/effects/glow.cpp b/drivers/gles3/effects/glow.cpp new file mode 100644 index 0000000000..9728b089aa --- /dev/null +++ b/drivers/gles3/effects/glow.cpp @@ -0,0 +1,173 @@ +/**************************************************************************/ +/* glow.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifdef GLES3_ENABLED + +#include "glow.h" +#include "../storage/texture_storage.h" + +using namespace GLES3; + +Glow *Glow::singleton = nullptr; + +Glow *Glow::get_singleton() { + return singleton; +} + +Glow::Glow() { + singleton = this; + + glow.shader.initialize(); + glow.shader_version = glow.shader.version_create(); + + { // Screen Triangle. + glGenBuffers(1, &screen_triangle); + glBindBuffer(GL_ARRAY_BUFFER, screen_triangle); + + const float qv[6] = { + -1.0f, + -1.0f, + 3.0f, + -1.0f, + -1.0f, + 3.0f, + }; + + glBufferData(GL_ARRAY_BUFFER, sizeof(float) * 6, qv, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind + + glGenVertexArrays(1, &screen_triangle_array); + glBindVertexArray(screen_triangle_array); + glBindBuffer(GL_ARRAY_BUFFER, screen_triangle); + glVertexAttribPointer(RS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 2, nullptr); + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind + } +} + +Glow::~Glow() { + glDeleteBuffers(1, &screen_triangle); + glDeleteVertexArrays(1, &screen_triangle_array); + + glow.shader.version_free(glow.shader_version); + + singleton = nullptr; +} + +void Glow::_draw_screen_triangle() { + glBindVertexArray(screen_triangle_array); + glDrawArrays(GL_TRIANGLES, 0, 3); + glBindVertexArray(0); +} + +void Glow::process_glow(GLuint p_source_color, Size2i p_size, const Glow::GLOWLEVEL *p_glow_buffers, uint32_t p_view, bool p_use_multiview) { + ERR_FAIL_COND(p_source_color == 0); + ERR_FAIL_COND(p_glow_buffers[3].color == 0); + + // Reset some OpenGL state... + glDisable(GL_BLEND); + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + + // Start with our filter pass + { + glBindFramebuffer(GL_FRAMEBUFFER, p_glow_buffers[0].fbo); + glViewport(0, 0, p_glow_buffers[0].size.x, p_glow_buffers[0].size.y); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(p_use_multiview ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D, p_source_color); + + uint64_t specialization = p_use_multiview ? GlowShaderGLES3::USE_MULTIVIEW : 0; + bool success = glow.shader.version_bind_shader(glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + if (!success) { + return; + } + + glow.shader.version_set_uniform(GlowShaderGLES3::PIXEL_SIZE, 1.0 / p_glow_buffers[0].size.x, 1.0 / p_glow_buffers[0].size.y, glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + glow.shader.version_set_uniform(GlowShaderGLES3::VIEW, float(p_view), glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + glow.shader.version_set_uniform(GlowShaderGLES3::LUMINANCE_MULTIPLIER, luminance_multiplier, glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + glow.shader.version_set_uniform(GlowShaderGLES3::GLOW_BLOOM, glow_bloom, glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + glow.shader.version_set_uniform(GlowShaderGLES3::GLOW_HDR_THRESHOLD, glow_hdr_bleed_threshold, glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + glow.shader.version_set_uniform(GlowShaderGLES3::GLOW_HDR_SCALE, glow_hdr_bleed_scale, glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + glow.shader.version_set_uniform(GlowShaderGLES3::GLOW_LUMINANCE_CAP, glow_hdr_luminance_cap, glow.shader_version, GlowShaderGLES3::MODE_FILTER, specialization); + + _draw_screen_triangle(); + } + + // Continue with downsampling + { + bool success = glow.shader.version_bind_shader(glow.shader_version, GlowShaderGLES3::MODE_DOWNSAMPLE, 0); + if (!success) { + return; + } + + for (int i = 1; i < 4; i++) { + glBindFramebuffer(GL_FRAMEBUFFER, p_glow_buffers[i].fbo); + glViewport(0, 0, p_glow_buffers[i].size.x, p_glow_buffers[i].size.y); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, p_glow_buffers[i - 1].color); + + glow.shader.version_set_uniform(GlowShaderGLES3::PIXEL_SIZE, 1.0 / p_glow_buffers[i].size.x, 1.0 / p_glow_buffers[i].size.y, glow.shader_version, GlowShaderGLES3::MODE_DOWNSAMPLE); + + _draw_screen_triangle(); + } + } + + // Now upsample + { + bool success = glow.shader.version_bind_shader(glow.shader_version, GlowShaderGLES3::MODE_UPSAMPLE, 0); + if (!success) { + return; + } + + for (int i = 2; i >= 0; i--) { + glBindFramebuffer(GL_FRAMEBUFFER, p_glow_buffers[i].fbo); + glViewport(0, 0, p_glow_buffers[i].size.x, p_glow_buffers[i].size.y); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, p_glow_buffers[i + 1].color); + + glow.shader.version_set_uniform(GlowShaderGLES3::PIXEL_SIZE, 1.0 / p_glow_buffers[i].size.x, 1.0 / p_glow_buffers[i].size.y, glow.shader_version, GlowShaderGLES3::MODE_UPSAMPLE); + + _draw_screen_triangle(); + } + } + + glDisable(GL_BLEND); + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_TRUE); + glUseProgram(0); + glBindTexture(GL_TEXTURE_2D, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); +} + +#endif // GLES3_ENABLED diff --git a/drivers/gles3/effects/glow.h b/drivers/gles3/effects/glow.h new file mode 100644 index 0000000000..a1be6e1f4c --- /dev/null +++ b/drivers/gles3/effects/glow.h @@ -0,0 +1,89 @@ +/**************************************************************************/ +/* glow.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef GLOW_GLES3_H +#define GLOW_GLES3_H + +#ifdef GLES3_ENABLED + +#include "drivers/gles3/shaders/effects/glow.glsl.gen.h" + +namespace GLES3 { + +class Glow { +private: + static Glow *singleton; + + struct GLOW { + GlowShaderGLES3 shader; + RID shader_version; + } glow; + + float luminance_multiplier = 1.0; + + float glow_intensity = 1.0; + float glow_bloom = 0.0; + float glow_hdr_bleed_threshold = 1.0; + float glow_hdr_bleed_scale = 2.0; + float glow_hdr_luminance_cap = 12.0; + + // Use for full-screen effects. Slightly more efficient than screen_quad as this eliminates pixel overdraw along the diagonal. + GLuint screen_triangle = 0; + GLuint screen_triangle_array = 0; + + void _draw_screen_triangle(); + +public: + struct GLOWLEVEL { + Size2i size; + GLuint color = 0; + GLuint fbo = 0; + }; + + static Glow *get_singleton(); + + Glow(); + ~Glow(); + + void set_intensity(float p_value) { glow_intensity = p_value; } + void set_luminance_multiplier(float p_luminance_multiplier) { luminance_multiplier = p_luminance_multiplier; } + void set_glow_bloom(float p_bloom) { glow_bloom = p_bloom; } + void set_glow_hdr_bleed_threshold(float p_threshold) { glow_hdr_bleed_threshold = p_threshold; } + void set_glow_hdr_bleed_scale(float p_scale) { glow_hdr_bleed_scale = p_scale; } + void set_glow_hdr_luminance_cap(float p_cap) { glow_hdr_luminance_cap = p_cap; } + + void process_glow(GLuint p_source_color, Size2i p_size, const GLOWLEVEL *p_glow_buffers, uint32_t p_view = 0, bool p_use_multiview = false); +}; + +} //namespace GLES3 + +#endif // GLES3_ENABLED + +#endif // GLOW_GLES3_H diff --git a/drivers/gles3/effects/post_effects.cpp b/drivers/gles3/effects/post_effects.cpp new file mode 100644 index 0000000000..8ad872f319 --- /dev/null +++ b/drivers/gles3/effects/post_effects.cpp @@ -0,0 +1,153 @@ +/**************************************************************************/ +/* post_effects.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifdef GLES3_ENABLED + +#include "post_effects.h" +#include "../storage/texture_storage.h" + +using namespace GLES3; + +PostEffects *PostEffects::singleton = nullptr; + +PostEffects *PostEffects::get_singleton() { + return singleton; +} + +PostEffects::PostEffects() { + singleton = this; + + post.shader.initialize(); + post.shader_version = post.shader.version_create(); + post.shader.version_bind_shader(post.shader_version, PostShaderGLES3::MODE_DEFAULT); + + { // Screen Triangle. + glGenBuffers(1, &screen_triangle); + glBindBuffer(GL_ARRAY_BUFFER, screen_triangle); + + const float qv[6] = { + -1.0f, + -1.0f, + 3.0f, + -1.0f, + -1.0f, + 3.0f, + }; + + glBufferData(GL_ARRAY_BUFFER, sizeof(float) * 6, qv, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind + + glGenVertexArrays(1, &screen_triangle_array); + glBindVertexArray(screen_triangle_array); + glBindBuffer(GL_ARRAY_BUFFER, screen_triangle); + glVertexAttribPointer(RS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 2, nullptr); + glEnableVertexAttribArray(RS::ARRAY_VERTEX); + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind + } +} + +PostEffects::~PostEffects() { + singleton = nullptr; + glDeleteBuffers(1, &screen_triangle); + glDeleteVertexArrays(1, &screen_triangle_array); + post.shader.version_free(post.shader_version); +} + +void PostEffects::_draw_screen_triangle() { + glBindVertexArray(screen_triangle_array); + glDrawArrays(GL_TRIANGLES, 0, 3); + glBindVertexArray(0); +} + +void PostEffects::post_copy(GLuint p_dest_framebuffer, Size2i p_dest_size, GLuint p_source_color, Size2i p_source_size, float p_luminance_multiplier, const Glow::GLOWLEVEL *p_glow_buffers, float p_glow_intensity, uint32_t p_view, bool p_use_multiview) { + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + glDisable(GL_BLEND); + + glBindFramebuffer(GL_FRAMEBUFFER, p_dest_framebuffer); + glViewport(0, 0, p_dest_size.x, p_dest_size.y); + + PostShaderGLES3::ShaderVariant mode = PostShaderGLES3::MODE_DEFAULT; + uint64_t flags = 0; + if (p_use_multiview) { + flags |= PostShaderGLES3::USE_MULTIVIEW; + } + if (p_glow_buffers != nullptr) { + flags |= PostShaderGLES3::USE_GLOW; + } + if (p_luminance_multiplier != 1.0) { + flags |= PostShaderGLES3::USE_LUMINANCE_MULTIPLIER; + } + + bool success = post.shader.version_bind_shader(post.shader_version, mode, flags); + if (!success) { + return; + } + + GLenum texture_target = p_use_multiview ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; + glActiveTexture(GL_TEXTURE0); + glBindTexture(texture_target, p_source_color); + glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + if (p_glow_buffers != nullptr) { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, p_glow_buffers[0].color); + + post.shader.version_set_uniform(PostShaderGLES3::PIXEL_SIZE, 1.0 / p_source_size.x, 1.0 / p_source_size.y, post.shader_version, mode, flags); + post.shader.version_set_uniform(PostShaderGLES3::GLOW_INTENSITY, p_glow_intensity, post.shader_version, mode, flags); + } + + post.shader.version_set_uniform(PostShaderGLES3::VIEW, float(p_view), post.shader_version, mode, flags); + post.shader.version_set_uniform(PostShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, post.shader_version, mode, flags); + + _draw_screen_triangle(); + + // Reset state + if (p_glow_buffers != nullptr) { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + } + + // Return back to nearest + glActiveTexture(GL_TEXTURE0); + glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glBindTexture(texture_target, 0); + + glDisable(GL_BLEND); + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_TRUE); + glUseProgram(0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); +} + +#endif // GLES3_ENABLED diff --git a/drivers/gles3/effects/post_effects.h b/drivers/gles3/effects/post_effects.h new file mode 100644 index 0000000000..b90c77d6c7 --- /dev/null +++ b/drivers/gles3/effects/post_effects.h @@ -0,0 +1,69 @@ +/**************************************************************************/ +/* post_effects.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef POST_EFFECTS_GLES3_H +#define POST_EFFECTS_GLES3_H + +#ifdef GLES3_ENABLED + +#include "drivers/gles3/shaders/effects/post.glsl.gen.h" +#include "glow.h" + +namespace GLES3 { + +class PostEffects { +private: + struct Post { + PostShaderGLES3 shader; + RID shader_version; + } post; + + static PostEffects *singleton; + + // Use for full-screen effects. Slightly more efficient than screen_quad as this eliminates pixel overdraw along the diagonal. + GLuint screen_triangle = 0; + GLuint screen_triangle_array = 0; + + void _draw_screen_triangle(); + +public: + static PostEffects *get_singleton(); + + PostEffects(); + ~PostEffects(); + + void post_copy(GLuint p_dest_framebuffer, Size2i p_dest_size, GLuint p_source_color, Size2i p_source_size, float p_luminance_multiplier, const Glow::GLOWLEVEL *p_glow_buffers, float p_glow_intensity, uint32_t p_view = 0, bool p_use_multiview = false); +}; + +} //namespace GLES3 + +#endif // GLES3_ENABLED + +#endif // POST_EFFECTS_GLES3_H diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 80daa9a907..de990a4222 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -629,14 +629,28 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou state.canvas_instance_batches[state.current_batch_index].material_data = material_data; if (shader_data_cache) { state.canvas_instance_batches[state.current_batch_index].vertex_input_mask = shader_data_cache->vertex_input_mask; - } else { - state.canvas_instance_batches[state.current_batch_index].vertex_input_mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_COLOR | RS::ARRAY_TEX_UV; } } GLES3::CanvasShaderData::BlendMode blend_mode = shader_data_cache ? shader_data_cache->blend_mode : GLES3::CanvasShaderData::BLEND_MODE_MIX; - _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used); + if (!ci->repeat_size.x && !ci->repeat_size.y) { + _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, Point2()); + } else { + Point2 start_pos = ci->repeat_size * -(ci->repeat_times / 2); + Point2 end_pos = ci->repeat_size * ci->repeat_times + ci->repeat_size + start_pos; + Point2 pos = start_pos; + + do { + do { + _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, pos); + pos.y += ci->repeat_size.y; + } while (pos.y < end_pos.y); + + pos.x += ci->repeat_size.x; + pos.y = start_pos.y; + } while (pos.x < end_pos.x); + } } if (index == 0) { @@ -786,7 +800,7 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou state.last_item_index += index; } -void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used) { +void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, const Point2 &p_offset) { RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? state.default_filter : p_item->texture_filter; if (texture_filter != state.canvas_instance_batches[state.current_batch_index].filter) { @@ -804,6 +818,11 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_rend } Transform2D base_transform = p_canvas_transform_inverse * p_item->final_transform; + + if (p_offset.x || p_offset.y) { + base_transform *= Transform2D(0, p_offset / p_item->xform.get_scale()); + } + Transform2D draw_transform; // Used by transform command Color base_color = p_item->final_modulate; @@ -1689,7 +1708,7 @@ void RasterizerCanvasGLES3::light_update_shadow(RID p_rid, int p_shadow_index, c } glBindVertexArray(0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDepthMask(GL_FALSE); glDisable(GL_DEPTH_TEST); glDisable(GL_SCISSOR_TEST); @@ -1797,7 +1816,7 @@ void RasterizerCanvasGLES3::light_update_directional_shadow(RID p_rid, int p_sha cl->shadow.directional_xform = to_shadow * to_light_xform; glBindVertexArray(0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDepthMask(GL_FALSE); glDisable(GL_DEPTH_TEST); glDisable(GL_SCISSOR_TEST); @@ -1911,7 +1930,7 @@ void RasterizerCanvasGLES3::render_sdf(RID p_render_target, LightOccluderInstanc texture_storage->render_target_sdf_process(p_render_target); //done rendering, process it glBindVertexArray(0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } RID RasterizerCanvasGLES3::occluder_polygon_create() { diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index 2b70df3238..a3762e828e 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -269,7 +269,7 @@ public: RID material; GLES3::CanvasMaterialData *material_data = nullptr; CanvasShaderGLES3::ShaderVariant shader_variant = CanvasShaderGLES3::MODE_QUAD; - uint64_t vertex_input_mask; + uint64_t vertex_input_mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_FORMAT_COLOR | RS::ARRAY_FORMAT_TEX_UV; const Item::Command *command = nullptr; Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch. @@ -357,7 +357,7 @@ public: void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used, RenderingMethod::RenderInfo *r_render_info = nullptr) override; void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr); - void _record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch, bool &r_sdf_used); + void _record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch, bool &r_sdf_used, const Point2 &p_offset); void _render_batch(Light *p_lights, uint32_t p_index, RenderingMethod::RenderInfo *r_render_info = nullptr); bool _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization); void _new_batch(bool &r_batch_broken); diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index f5296f969f..73ee277074 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -194,6 +194,11 @@ typedef void(GLAPIENTRY *DebugMessageCallbackARB)(DEBUGPROCARB callback, const v void RasterizerGLES3::initialize() { Engine::get_singleton()->print_header(vformat("OpenGL API %s - Compatibility - Using Device: %s - %s", RS::get_singleton()->get_video_adapter_api_version(), RS::get_singleton()->get_video_adapter_vendor(), RS::get_singleton()->get_video_adapter_name())); + + // FLIP XY Bug: Are more devices affected? + // Confirmed so far: all Adreno 3xx + // ok on some tested Adreno devices: 4xx, 5xx and 6xx + flip_xy_bugfix = GLES3::Config::get_singleton()->adreno_3xx_compatibility; } void RasterizerGLES3::finalize() { @@ -201,6 +206,8 @@ void RasterizerGLES3::finalize() { memdelete(canvas); memdelete(gi); memdelete(fog); + memdelete(post_effects); + memdelete(glow); memdelete(copy_effects); memdelete(light_storage); memdelete(particles_storage); @@ -347,6 +354,8 @@ RasterizerGLES3::RasterizerGLES3() { particles_storage = memnew(GLES3::ParticlesStorage); light_storage = memnew(GLES3::LightStorage); copy_effects = memnew(GLES3::CopyEffects); + glow = memnew(GLES3::Glow); + post_effects = memnew(GLES3::PostEffects); gi = memnew(GLES3::GI); fog = memnew(GLES3::Fog); canvas = memnew(RasterizerCanvasGLES3()); @@ -394,12 +403,22 @@ void RasterizerGLES3::_blit_render_target_to_screen(RID p_render_target, Display } Vector2i screen_rect_end = p_screen_rect.get_end(); + + // Adreno (TM) 3xx devices have a bug that create wrong Landscape rotation of 180 degree + // Reversing both the X and Y axis is equivalent to rotating 180 degrees + bool flip_x = false; + if (flip_xy_bugfix && screen_rect_end.x > screen_rect_end.y) { + flip_y = !flip_y; + flip_x = !flip_x; + } + glBlitFramebuffer(0, 0, rt->size.x, rt->size.y, - p_screen_rect.position.x, flip_y ? screen_rect_end.y : p_screen_rect.position.y, screen_rect_end.x, flip_y ? p_screen_rect.position.y : screen_rect_end.y, + flip_x ? screen_rect_end.x : p_screen_rect.position.x, flip_y ? screen_rect_end.y : p_screen_rect.position.y, + flip_x ? p_screen_rect.position.x : screen_rect_end.x, flip_y ? p_screen_rect.position.y : screen_rect_end.y, GL_COLOR_BUFFER_BIT, GL_NEAREST); if (read_fbo != 0) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDeleteFramebuffers(1, &read_fbo); } } @@ -423,7 +442,7 @@ void RasterizerGLES3::set_boot_image(const Ref<Image> &p_image, const Color &p_c Size2i win_size = DisplayServer::get_singleton()->window_get_size(); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glViewport(0, 0, win_size.width, win_size.height); glEnable(GL_BLEND); glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h index cf3cedfea1..8d52dc2365 100644 --- a/drivers/gles3/rasterizer_gles3.h +++ b/drivers/gles3/rasterizer_gles3.h @@ -34,6 +34,8 @@ #ifdef GLES3_ENABLED #include "effects/copy_effects.h" +#include "effects/glow.h" +#include "effects/post_effects.h" #include "environment/fog.h" #include "environment/gi.h" #include "rasterizer_canvas_gles3.h" @@ -53,6 +55,7 @@ private: float delta = 0; double time_total = 0.0; + bool flip_xy_bugfix = false; static bool gles_over_gl; @@ -67,6 +70,8 @@ protected: GLES3::GI *gi = nullptr; GLES3::Fog *fog = nullptr; GLES3::CopyEffects *copy_effects = nullptr; + GLES3::Glow *glow = nullptr; + GLES3::PostEffects *post_effects = nullptr; RasterizerCanvasGLES3 *canvas = nullptr; RasterizerSceneGLES3 *scene = nullptr; static RasterizerGLES3 *singleton; diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index efd554eac9..f9af86e19b 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -764,7 +764,7 @@ void RasterizerSceneGLES3::_setup_sky(const RenderDataGLES3 *p_render_data, cons } } -void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_luminance_multiplier, bool p_use_multiview, bool p_flip_y) { +void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_sky_energy_multiplier, float p_luminance_multiplier, bool p_use_multiview, bool p_flip_y, bool p_apply_color_adjustments_in_post) { GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); ERR_FAIL_COND(p_env.is_null()); @@ -778,6 +778,10 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, if (p_flip_y) { spec_constants |= SkyShaderGLES3::USE_INVERTED_Y; } + if (!p_apply_color_adjustments_in_post) { + spec_constants |= SkyShaderGLES3::APPLY_TONEMAPPING; + // TODO add BCS and color corrections once supported. + } RS::EnvironmentBG background = environment_get_background(p_env); @@ -832,6 +836,7 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::PROJECTION, camera.columns[2][0], camera.columns[0][0], camera.columns[2][1], camera.columns[1][1], shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::POSITION, p_transform.origin, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::TIME, time, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::SKY_ENERGY_MULTIPLIER, p_sky_energy_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); if (p_use_multiview) { @@ -843,7 +848,7 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, glDrawArrays(GL_TRIANGLES, 0, 3); } -void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_luminance_multiplier) { +void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_sky_energy_multiplier) { GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton(); ERR_FAIL_COND(p_env.is_null()); @@ -939,20 +944,17 @@ void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_p material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::POSITION, p_transform.origin, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::TIME, time, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::PROJECTION, cm.columns[2][0], cm.columns[0][0], cm.columns[2][1], cm.columns[1][1], shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); - material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::SKY_ENERGY_MULTIPLIER, p_sky_energy_multiplier, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, 1.0, shader_data->version, SkyShaderGLES3::MODE_CUBEMAP); glBindVertexArray(sky_globals.screen_triangle_array); glViewport(0, 0, sky->radiance_size, sky->radiance_size); glBindFramebuffer(GL_FRAMEBUFFER, sky->radiance_framebuffer); - glDisable(GL_BLEND); - glDepthMask(GL_FALSE); - glDisable(GL_DEPTH_TEST); - scene_state.current_depth_test = GLES3::SceneShaderData::DEPTH_TEST_DISABLED; - glDisable(GL_SCISSOR_TEST); - glDisable(GL_CULL_FACE); - scene_state.cull_mode = GLES3::SceneShaderData::CULL_DISABLED; + scene_state.reset_gl_state(); + scene_state.set_gl_cull_mode(GLES3::SceneShaderData::CULL_DISABLED); + scene_state.enable_gl_blend(false); for (int i = 0; i < 6; i++) { Basis local_view = Basis::looking_at(view_normals[i], view_up[i]); @@ -969,17 +971,13 @@ void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_p _filter_sky_radiance(sky, 0); //Just copy over the first mipmap } sky->processing_layer = 1; - sky->baked_exposure = p_luminance_multiplier; + sky->baked_exposure = p_sky_energy_multiplier; sky->reflection_dirty = false; } else { if (sky_mode == RS::SKY_MODE_INCREMENTAL && sky->processing_layer < max_processing_layer) { - glDisable(GL_BLEND); - glDepthMask(GL_FALSE); - glDisable(GL_DEPTH_TEST); - scene_state.current_depth_test = GLES3::SceneShaderData::DEPTH_TEST_DISABLED; - glDisable(GL_SCISSOR_TEST); - glDisable(GL_CULL_FACE); - scene_state.cull_mode = GLES3::SceneShaderData::CULL_DISABLED; + scene_state.reset_gl_state(); + scene_state.set_gl_cull_mode(GLES3::SceneShaderData::CULL_DISABLED); + scene_state.enable_gl_blend(false); _filter_sky_radiance(sky, sky->processing_layer); sky->processing_layer++; @@ -1109,7 +1107,7 @@ void RasterizerSceneGLES3::_filter_sky_radiance(Sky *p_sky, int p_base_layer) { } glBindVertexArray(0); glViewport(0, 0, p_sky->screen_size.x, p_sky->screen_size.y); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } Ref<Image> RasterizerSceneGLES3::sky_bake_panorama(RID p_sky, float p_energy, bool p_bake_irradiance, const Size2i &p_size) { @@ -1150,7 +1148,7 @@ Ref<Image> RasterizerSceneGLES3::sky_bake_panorama(RID p_sky, float p_energy, bo copy_effects->copy_cube_to_panorama(p_bake_irradiance ? float(sky->mipmap_count) : 0.0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDeleteFramebuffers(1, &rad_fbo); // Create a dummy texture so we can use texture_2d_get. RID tex_rid = GLES3::TextureStorage::get_singleton()->texture_allocate(); @@ -1584,6 +1582,8 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da scene_state.ubo.screen_pixel_size[0] = screen_pixel_size.x; scene_state.ubo.screen_pixel_size[1] = screen_pixel_size.y; + scene_state.ubo.luminance_multiplier = p_render_data->luminance_multiplier; + scene_state.ubo.shadow_bias = p_shadow_bias; scene_state.ubo.pancake_shadows = p_pancake_shadows; @@ -2271,14 +2271,10 @@ void RasterizerSceneGLES3::_render_shadow_pass(RID p_light, RID p_shadow_atlas, glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_GLOBALS_UNIFORM_LOCATION, global_buffer); glBindBuffer(GL_UNIFORM_BUFFER, 0); - glDisable(GL_BLEND); - glDepthMask(GL_TRUE); - glEnable(GL_DEPTH_TEST); + scene_state.reset_gl_state(); + scene_state.enable_gl_depth_test(true); + scene_state.enable_gl_depth_draw(true); glDepthFunc(GL_LESS); - glDisable(GL_SCISSOR_TEST); - glCullFace(GL_BACK); - glEnable(GL_CULL_FACE); - scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; glColorMask(0, 0, 0, 0); glDrawBuffers(0, nullptr); @@ -2303,27 +2299,44 @@ void RasterizerSceneGLES3::_render_shadow_pass(RID p_light, RID p_shadow_atlas, _render_list_template<PASS_MODE_SHADOW>(&render_list_params, &render_data, 0, render_list[RENDER_LIST_SECONDARY].elements.size()); glColorMask(1, 1, 1, 1); - glDisable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); + scene_state.enable_gl_depth_test(false); + scene_state.enable_gl_depth_draw(true); glDisable(GL_CULL_FACE); scene_state.cull_mode = GLES3::SceneShaderData::CULL_DISABLED; - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } -void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray<RenderGeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_voxel_gi_instances, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, const PagedArray<RID> &p_fog_volumes, RID p_environment, RID p_camera_attributes, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data, RenderingMethod::RenderInfo *r_render_info) { +void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray<RenderGeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_voxel_gi_instances, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, const PagedArray<RID> &p_fog_volumes, RID p_environment, RID p_camera_attributes, RID p_compositor, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data, RenderingMethod::RenderInfo *r_render_info) { GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); GLES3::Config *config = GLES3::Config::get_singleton(); RENDER_TIMESTAMP("Setup 3D Scene"); + bool apply_color_adjustments_in_post = false; + Ref<RenderSceneBuffersGLES3> rb; if (p_render_buffers.is_valid()) { rb = p_render_buffers; ERR_FAIL_COND(rb.is_null()); + + if (rb->get_scaling_3d_mode() != RS::VIEWPORT_SCALING_3D_MODE_OFF) { + // If we're scaling, we apply tonemapping etc. in post, so disable it during rendering + apply_color_adjustments_in_post = true; + } } GLES3::RenderTarget *rt = texture_storage->get_render_target(rb->render_target); ERR_FAIL_NULL(rt); + bool glow_enabled = false; + if (p_environment.is_valid() && rb.is_valid()) { + glow_enabled = environment_get_glow_enabled(p_environment); + rb->set_glow_enabled(glow_enabled); // ensure our intermediate buffer is available if glow is enabled + if (glow_enabled) { + // If glow is enabled, we apply tonemapping etc. in post, so disable it during rendering + apply_color_adjustments_in_post = true; + } + } + // Assign render data // Use the format from rendererRD RenderDataGLES3 render_data; @@ -2359,6 +2372,13 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ // this should be the same for all cameras.. render_data.lod_distance_multiplier = p_camera_data->main_projection.get_lod_multiplier(); + if (rt->color_type == GL_UNSIGNED_INT_2_10_10_10_REV && glow_enabled) { + // As our output is in sRGB and we're using 10bit color space, we can fake a little HDR to do glow... + render_data.luminance_multiplier = 0.25; + } else { + render_data.luminance_multiplier = 1.0; + } + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { render_data.screen_mesh_lod_threshold = 0.0; } else { @@ -2519,9 +2539,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ glBindFramebuffer(GL_FRAMEBUFFER, fbo); glViewport(0, 0, rb->internal_size.x, rb->internal_size.y); - glCullFace(GL_BACK); - glEnable(GL_CULL_FACE); - scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; + scene_state.reset_gl_state(); // Do depth prepass if it's explicitly enabled bool use_depth_prepass = config->use_depth_prepass; @@ -2533,11 +2551,11 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ RENDER_TIMESTAMP("Depth Prepass"); //pre z pass - glDisable(GL_BLEND); - glDepthMask(GL_TRUE); - glEnable(GL_DEPTH_TEST); + scene_state.enable_gl_depth_test(true); + scene_state.enable_gl_depth_draw(true); + scene_state.enable_gl_blend(false); glDepthFunc(GL_LEQUAL); - glDisable(GL_SCISSOR_TEST); + scene_state.enable_gl_scissor_test(false); glColorMask(0, 0, 0, 0); RasterizerGLES3::clear_depth(1.0); @@ -2560,21 +2578,19 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ } glBlendEquation(GL_FUNC_ADD); - if (render_data.transparent_bg) { glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - glEnable(GL_BLEND); + scene_state.enable_gl_blend(true); } else { glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); - glDisable(GL_BLEND); + scene_state.enable_gl_blend(false); } scene_state.current_blend_mode = GLES3::SceneShaderData::BLEND_MODE_MIX; - glEnable(GL_DEPTH_TEST); + scene_state.enable_gl_scissor_test(false); + scene_state.enable_gl_depth_test(true); + scene_state.enable_gl_depth_draw(true); glDepthFunc(GL_LEQUAL); - glDepthMask(GL_TRUE); - scene_state.current_depth_test = GLES3::SceneShaderData::DEPTH_TEST_ENABLED; - scene_state.current_depth_draw = GLES3::SceneShaderData::DEPTH_DRAW_ALWAYS; { GLuint db = GL_COLOR_ATTACHMENT0; @@ -2589,7 +2605,19 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ if (!keep_color) { clear_color.a = render_data.transparent_bg ? 0.0f : 1.0f; glClearBufferfv(GL_COLOR, 0, clear_color.components); + } else if (fbo != rt->fbo) { + // Need to copy our current contents to our intermediate/MSAA buffer + GLES3::CopyEffects *copy_effects = GLES3::CopyEffects::get_singleton(); + + scene_state.enable_gl_depth_test(false); + scene_state.enable_gl_depth_draw(false); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(rt->view_count > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D, rt->color); + + copy_effects->copy_screen(render_data.luminance_multiplier); } + RENDER_TIMESTAMP("Render Opaque Pass"); uint64_t spec_constant_base_flags = 0; @@ -2606,26 +2634,28 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ if (render_data.environment.is_valid() && environment_get_fog_mode(render_data.environment) == RS::EnvironmentFogMode::ENV_FOG_MODE_DEPTH) { spec_constant_base_flags |= SceneShaderGLES3::USE_DEPTH_FOG; } + + if (!apply_color_adjustments_in_post) { + spec_constant_base_flags |= SceneShaderGLES3::APPLY_TONEMAPPING; + + // TODO add BCS and Color corrections here once supported. + } } // Render Opaque Objects. RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, spec_constant_base_flags, use_wireframe); _render_list_template<PASS_MODE_COLOR>(&render_list_params, &render_data, 0, render_list[RENDER_LIST_OPAQUE].elements.size()); - glDepthMask(GL_FALSE); - scene_state.current_depth_draw = GLES3::SceneShaderData::DEPTH_DRAW_DISABLED; + scene_state.enable_gl_depth_draw(false); if (draw_sky) { RENDER_TIMESTAMP("Render Sky"); - glEnable(GL_DEPTH_TEST); - glDisable(GL_BLEND); - glEnable(GL_CULL_FACE); - glCullFace(GL_BACK); - scene_state.current_depth_test = GLES3::SceneShaderData::DEPTH_TEST_ENABLED; - scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; + scene_state.enable_gl_depth_test(true); + scene_state.enable_gl_blend(false); + scene_state.set_gl_cull_mode(GLES3::SceneShaderData::CULL_BACK); - _draw_sky(render_data.environment, render_data.cam_projection, render_data.cam_transform, sky_energy_multiplier, p_camera_data->view_count > 1, flip_y); + _draw_sky(render_data.environment, render_data.cam_projection, render_data.cam_transform, sky_energy_multiplier, render_data.luminance_multiplier, p_camera_data->view_count > 1, flip_y, apply_color_adjustments_in_post); } if (scene_state.used_screen_texture || scene_state.used_depth_texture) { @@ -2674,7 +2704,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ } RENDER_TIMESTAMP("Render 3D Transparent Pass"); - glEnable(GL_BLEND); + scene_state.enable_gl_blend(true); //Render transparent pass RenderListParameters render_list_params_alpha(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), reverse_cull, spec_constant_base_flags, use_wireframe); @@ -2689,7 +2719,10 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ if (rb.is_valid()) { _render_buffers_debug_draw(rb, p_shadow_atlas, fbo); } - glDisable(GL_BLEND); + + // Reset stuff that may trip up the next process. + scene_state.reset_gl_state(); + glUseProgram(0); _render_post_processing(&render_data); @@ -2700,6 +2733,9 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ void RasterizerSceneGLES3::_render_post_processing(const RenderDataGLES3 *p_render_data) { GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); + GLES3::Glow *glow = GLES3::Glow::get_singleton(); + GLES3::PostEffects *post_effects = GLES3::PostEffects::get_singleton(); + Ref<RenderSceneBuffersGLES3> rb = p_render_data->render_buffers; ERR_FAIL_COND(rb.is_null()); @@ -2714,6 +2750,26 @@ void RasterizerSceneGLES3::_render_post_processing(const RenderDataGLES3 *p_rend GLuint fbo_int = rb->get_internal_fbo(); GLuint fbo_rt = texture_storage->render_target_get_fbo(render_target); // TODO if MSAA 2D is enabled and we're not using rt_msaa, get 2D render target here. + // Check if we have glow enabled and if so, check if our buffers were allocated + bool glow_enabled = false; + float glow_intensity = 1.0; + float glow_bloom = 0.0; + float glow_hdr_bleed_threshold = 1.0; + float glow_hdr_bleed_scale = 2.0; + float glow_hdr_luminance_cap = 12.0; + if (p_render_data->environment.is_valid()) { + glow_enabled = environment_get_glow_enabled(p_render_data->environment); + glow_intensity = environment_get_glow_intensity(p_render_data->environment); + glow_bloom = environment_get_glow_bloom(p_render_data->environment); + glow_hdr_bleed_threshold = environment_get_glow_hdr_bleed_threshold(p_render_data->environment); + glow_hdr_bleed_scale = environment_get_glow_hdr_bleed_scale(p_render_data->environment); + glow_hdr_luminance_cap = environment_get_glow_hdr_luminance_cap(p_render_data->environment); + } + + if (glow_enabled) { + rb->check_glow_buffers(); + } + if (view_count == 1) { // Resolve if needed. if (fbo_msaa_3d != 0 && msaa3d_needs_resolve) { @@ -2729,23 +2785,41 @@ void RasterizerSceneGLES3::_render_post_processing(const RenderDataGLES3 *p_rend glBlitFramebuffer(0, 0, internal_size.x, internal_size.y, 0, 0, internal_size.x, internal_size.y, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); } + // Rendered to intermediate buffer, must copy to our render target if (fbo_int != 0) { - // TODO If we have glow or other post processing, we upscale only depth here, post processing will also do scaling. + // Apply glow/bloom if requested? then populate our glow buffers + GLuint color = fbo_int != 0 ? rb->get_internal_color() : texture_storage->render_target_get_color(render_target); + const GLES3::Glow::GLOWLEVEL *glow_buffers = nullptr; + if (glow_enabled) { + glow_buffers = rb->get_glow_buffers(); + + glow->set_luminance_multiplier(p_render_data->luminance_multiplier); + + glow->set_intensity(glow_intensity); + glow->set_glow_bloom(glow_bloom); + glow->set_glow_hdr_bleed_threshold(glow_hdr_bleed_threshold); + glow->set_glow_hdr_bleed_scale(glow_hdr_bleed_scale); + glow->set_glow_hdr_luminance_cap(glow_hdr_luminance_cap); + + glow->process_glow(color, internal_size, glow_buffers); + } + + // Copy color buffer + post_effects->post_copy(fbo_rt, target_size, color, internal_size, p_render_data->luminance_multiplier, glow_buffers, glow_intensity); + + // Copy depth buffer glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo_int); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_rt); - glBlitFramebuffer(0, 0, internal_size.x, internal_size.y, 0, 0, target_size.x, target_size.y, GL_COLOR_BUFFER_BIT, GL_LINEAR); glBlitFramebuffer(0, 0, internal_size.x, internal_size.y, 0, 0, target_size.x, target_size.y, GL_DEPTH_BUFFER_BIT, GL_NEAREST); } glBindFramebuffer(GL_FRAMEBUFFER, fbo_rt); } else if ((fbo_msaa_3d != 0 && msaa3d_needs_resolve) || (fbo_int != 0)) { // TODO investigate if it's smarter to cache these FBOs - GLuint fbos[2]; // read and write - glGenFramebuffers(2, fbos); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); + GLuint fbos[3]; // read, write and post + glGenFramebuffers(3, fbos); + // Resolve if needed. if (fbo_msaa_3d != 0 && msaa3d_needs_resolve) { GLuint read_color = rb->get_msaa3d_color(); GLuint read_depth = rb->get_msaa3d_depth(); @@ -2760,6 +2834,9 @@ void RasterizerSceneGLES3::_render_post_processing(const RenderDataGLES3 *p_rend write_depth = texture_storage->render_target_get_depth(render_target); } + glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); + for (uint32_t v = 0; v < view_count; v++) { glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, read_color, 0, v); glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, read_depth, 0, v); @@ -2769,25 +2846,53 @@ void RasterizerSceneGLES3::_render_post_processing(const RenderDataGLES3 *p_rend } } + // Rendered to intermediate buffer, must copy to our render target if (fbo_int != 0) { - GLuint read_color = rb->get_internal_color(); - GLuint read_depth = rb->get_internal_depth(); + // Apply glow/bloom if requested? then populate our glow buffers + const GLES3::Glow::GLOWLEVEL *glow_buffers = nullptr; + GLuint source_color = fbo_int != 0 ? rb->get_internal_color() : texture_storage->render_target_get_color(render_target); + + if (glow_enabled) { + glow_buffers = rb->get_glow_buffers(); + + glow->set_luminance_multiplier(p_render_data->luminance_multiplier); + + glow->set_intensity(glow_intensity); + glow->set_glow_bloom(glow_bloom); + glow->set_glow_hdr_bleed_threshold(glow_hdr_bleed_threshold); + glow->set_glow_hdr_bleed_scale(glow_hdr_bleed_scale); + glow->set_glow_hdr_luminance_cap(glow_hdr_luminance_cap); + } + GLuint write_color = texture_storage->render_target_get_color(render_target); + + for (uint32_t v = 0; v < view_count; v++) { + if (glow_enabled) { + glow->process_glow(source_color, internal_size, glow_buffers, v, true); + } + + glBindFramebuffer(GL_FRAMEBUFFER, fbos[2]); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, write_color, 0, v); + post_effects->post_copy(fbos[2], target_size, source_color, internal_size, p_render_data->luminance_multiplier, glow_buffers, glow_intensity, v, true); + } + + // Copy depth + GLuint read_depth = rb->get_internal_depth(); GLuint write_depth = texture_storage->render_target_get_depth(render_target); + glBindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); + for (uint32_t v = 0; v < view_count; v++) { - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, read_color, 0, v); glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, read_depth, 0, v); - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, write_color, 0, v); glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, write_depth, 0, v); - glBlitFramebuffer(0, 0, internal_size.x, internal_size.y, 0, 0, target_size.x, target_size.y, GL_COLOR_BUFFER_BIT, GL_LINEAR); glBlitFramebuffer(0, 0, internal_size.x, internal_size.y, 0, 0, target_size.x, target_size.y, GL_DEPTH_BUFFER_BIT, GL_NEAREST); } } glBindFramebuffer(GL_FRAMEBUFFER, fbo_rt); - glDeleteFramebuffers(2, fbos); + glDeleteFramebuffers(3, fbos); } } @@ -2884,33 +2989,15 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } if constexpr (p_pass_mode == PASS_MODE_COLOR_TRANSPARENT) { - if (scene_state.current_depth_test != shader->depth_test) { - if (shader->depth_test == GLES3::SceneShaderData::DEPTH_TEST_DISABLED) { - glDisable(GL_DEPTH_TEST); - } else { - glEnable(GL_DEPTH_TEST); - } - scene_state.current_depth_test = shader->depth_test; - } + scene_state.enable_gl_depth_test(shader->depth_test == GLES3::SceneShaderData::DEPTH_TEST_ENABLED); } if constexpr (p_pass_mode != PASS_MODE_SHADOW) { - if (scene_state.current_depth_draw != shader->depth_draw) { - switch (shader->depth_draw) { - case GLES3::SceneShaderData::DEPTH_DRAW_OPAQUE: { - glDepthMask((p_pass_mode == PASS_MODE_COLOR && !GLES3::Config::get_singleton()->use_depth_prepass) || - p_pass_mode == PASS_MODE_DEPTH); - } break; - case GLES3::SceneShaderData::DEPTH_DRAW_ALWAYS: { - glDepthMask(GL_TRUE); - } break; - case GLES3::SceneShaderData::DEPTH_DRAW_DISABLED: { - glDepthMask(GL_FALSE); - } break; - } + if (shader->depth_draw == GLES3::SceneShaderData::DEPTH_DRAW_OPAQUE) { + scene_state.enable_gl_depth_draw((p_pass_mode == PASS_MODE_COLOR && !GLES3::Config::get_singleton()->use_depth_prepass) || p_pass_mode == PASS_MODE_DEPTH); + } else { + scene_state.enable_gl_depth_draw(shader->depth_draw == GLES3::SceneShaderData::DEPTH_DRAW_ALWAYS); } - - scene_state.current_depth_draw = shader->depth_draw; } bool uses_additive_lighting = (inst->light_passes.size() + p_render_data->directional_shadow_count) > 0; @@ -2937,7 +3024,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } if (uses_additive_lighting && pass == 1 && !p_render_data->transparent_bg) { // Enable blending if in opaque pass and not already enabled. - glEnable(GL_BLEND); + scene_state.enable_gl_blend(true); } if (pass < int32_t(inst->light_passes.size())) { RID light_instance_rid = inst->light_passes[pass].light_instance_rid; @@ -3017,18 +3104,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } } - if (scene_state.cull_mode != cull_mode) { - if (cull_mode == GLES3::SceneShaderData::CULL_DISABLED) { - glDisable(GL_CULL_FACE); - } else { - if (scene_state.cull_mode == GLES3::SceneShaderData::CULL_DISABLED) { - // Last time was disabled, so enable and set proper face. - glEnable(GL_CULL_FACE); - } - glCullFace(cull_mode == GLES3::SceneShaderData::CULL_FRONT ? GL_FRONT : GL_BACK); - } - scene_state.cull_mode = cull_mode; - } + scene_state.set_gl_cull_mode(cull_mode); RS::PrimitiveType primitive = surf->primitive; if (shader->uses_point_size) { @@ -3417,7 +3493,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, if constexpr (p_pass_mode == PASS_MODE_COLOR) { if (uses_additive_lighting && !p_render_data->transparent_bg) { // Disable additive blending if enabled for additive lights. - glDisable(GL_BLEND); + scene_state.enable_gl_blend(false); } } } @@ -3480,14 +3556,10 @@ void RasterizerSceneGLES3::render_particle_collider_heightfield(RID p_collider, glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_GLOBALS_UNIFORM_LOCATION, global_buffer); glBindBuffer(GL_UNIFORM_BUFFER, 0); - glDisable(GL_BLEND); - glDepthMask(GL_TRUE); - glEnable(GL_DEPTH_TEST); + scene_state.reset_gl_state(); + scene_state.enable_gl_depth_test(true); + scene_state.enable_gl_depth_draw(true); glDepthFunc(GL_LESS); - glDisable(GL_SCISSOR_TEST); - glCullFace(GL_BACK); - glEnable(GL_CULL_FACE); - scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; glDrawBuffers(0, nullptr); @@ -3501,7 +3573,7 @@ void RasterizerSceneGLES3::render_particle_collider_heightfield(RID p_collider, _render_list_template<PASS_MODE_SHADOW>(&render_list_params, &render_data, 0, render_list[RENDER_LIST_SECONDARY].elements.size()); glColorMask(1, 1, 1, 1); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } void RasterizerSceneGLES3::_render_uv2(const PagedArray<RenderGeometryInstance *> &p_instances, GLuint p_framebuffer, const Rect2i &p_region) { @@ -3530,14 +3602,10 @@ void RasterizerSceneGLES3::_render_uv2(const PagedArray<RenderGeometryInstance * glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_GLOBALS_UNIFORM_LOCATION, global_buffer); glBindBuffer(GL_UNIFORM_BUFFER, 0); - glDisable(GL_BLEND); - glDepthMask(GL_TRUE); - glEnable(GL_DEPTH_TEST); + scene_state.reset_gl_state(); + scene_state.enable_gl_depth_test(true); + scene_state.enable_gl_depth_draw(true); glDepthFunc(GL_LESS); - glDisable(GL_SCISSOR_TEST); - glCullFace(GL_BACK); - glEnable(GL_CULL_FACE); - scene_state.cull_mode = GLES3::SceneShaderData::CULL_BACK; TightLocalVector<GLenum> draw_buffers; draw_buffers.push_back(GL_COLOR_ATTACHMENT0); @@ -3587,7 +3655,7 @@ void RasterizerSceneGLES3::_render_uv2(const PagedArray<RenderGeometryInstance * GLuint db = GL_COLOR_ATTACHMENT0; glDrawBuffers(1, &db); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } } @@ -3629,10 +3697,9 @@ void RasterizerSceneGLES3::_render_buffers_debug_draw(Ref<RenderSceneBuffersGLES glBindFramebuffer(GL_FRAMEBUFFER, shadow_atlas_fb); glViewport(0, 0, shadow_atlas_size, shadow_atlas_size); glActiveTexture(GL_TEXTURE0); - glDepthMask(GL_TRUE); + scene_state.enable_gl_depth_draw(true); glDepthFunc(GL_ALWAYS); - glDisable(GL_CULL_FACE); - scene_state.cull_mode = GLES3::SceneShaderData::CULL_DISABLED; + scene_state.set_gl_cull_mode(GLES3::SceneShaderData::CULL_DISABLED); // Loop through quadrants and copy shadows over. for (int quadrant = 0; quadrant < 4; quadrant++) { @@ -3692,7 +3759,7 @@ void RasterizerSceneGLES3::_render_buffers_debug_draw(Ref<RenderSceneBuffersGLES copy_effects->copy_to_rect(Rect2(Vector2(), Vector2(0.5, 0.5))); glBindTexture(GL_TEXTURE_2D, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } } if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_DIRECTIONAL_SHADOW_ATLAS) { @@ -3706,8 +3773,8 @@ void RasterizerSceneGLES3::_render_buffers_debug_draw(Ref<RenderSceneBuffersGLES glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_RED); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_ONE); - glDisable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); + scene_state.enable_gl_depth_test(false); + scene_state.enable_gl_depth_draw(false); copy_effects->copy_to_rect(Rect2(Vector2(), Vector2(0.5, 0.5))); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_RED); @@ -3902,6 +3969,10 @@ bool RasterizerSceneGLES3::free(RID p_rid) { } else if (RSG::camera_attributes->owns_camera_attributes(p_rid)) { //not much to delete, just free it RSG::camera_attributes->camera_attributes_free(p_rid); + } else if (is_compositor(p_rid)) { + compositor_free(p_rid); + } else if (is_compositor_effect(p_rid)) { + compositor_effect_free(p_rid); } else { return false; } diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index ed59aba266..ce0876972a 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -127,6 +127,8 @@ struct RenderDataGLES3 { uint32_t spot_light_count = 0; uint32_t omni_light_count = 0; + float luminance_multiplier = 1.0; + RenderingMethod::RenderInfo *render_info = nullptr; /* Shadow data */ @@ -404,15 +406,14 @@ private: float fog_height_density; float fog_depth_curve; - float pad; + float fog_sun_scatter; float fog_depth_begin; float fog_light_color[3]; float fog_depth_end; - float fog_sun_scatter; - float shadow_bias; + float luminance_multiplier; uint32_t camera_visible_layers; bool pancake_shadows; }; @@ -442,10 +443,85 @@ private: bool used_depth_prepass = false; GLES3::SceneShaderData::BlendMode current_blend_mode = GLES3::SceneShaderData::BLEND_MODE_MIX; - GLES3::SceneShaderData::DepthDraw current_depth_draw = GLES3::SceneShaderData::DEPTH_DRAW_OPAQUE; - GLES3::SceneShaderData::DepthTest current_depth_test = GLES3::SceneShaderData::DEPTH_TEST_DISABLED; GLES3::SceneShaderData::Cull cull_mode = GLES3::SceneShaderData::CULL_BACK; + bool current_blend_enabled = false; + bool current_depth_draw_enabled = false; + bool current_depth_test_enabled = false; + bool current_scissor_test_enabled = false; + + void reset_gl_state() { + glDisable(GL_BLEND); + current_blend_enabled = false; + + glDisable(GL_SCISSOR_TEST); + current_scissor_test_enabled = false; + + glCullFace(GL_BACK); + glEnable(GL_CULL_FACE); + cull_mode = GLES3::SceneShaderData::CULL_BACK; + + glDepthMask(GL_FALSE); + current_depth_draw_enabled = false; + glDisable(GL_DEPTH_TEST); + current_depth_test_enabled = false; + } + + void set_gl_cull_mode(GLES3::SceneShaderData::Cull p_mode) { + if (cull_mode != p_mode) { + if (p_mode == GLES3::SceneShaderData::CULL_DISABLED) { + glDisable(GL_CULL_FACE); + } else { + if (cull_mode == GLES3::SceneShaderData::CULL_DISABLED) { + // Last time was disabled, so enable and set proper face. + glEnable(GL_CULL_FACE); + } + glCullFace(p_mode == GLES3::SceneShaderData::CULL_FRONT ? GL_FRONT : GL_BACK); + } + cull_mode = p_mode; + } + } + + void enable_gl_blend(bool p_enabled) { + if (current_blend_enabled != p_enabled) { + if (p_enabled) { + glEnable(GL_BLEND); + } else { + glDisable(GL_BLEND); + } + current_blend_enabled = p_enabled; + } + } + + void enable_gl_scissor_test(bool p_enabled) { + if (current_scissor_test_enabled != p_enabled) { + if (p_enabled) { + glEnable(GL_SCISSOR_TEST); + } else { + glDisable(GL_SCISSOR_TEST); + } + current_scissor_test_enabled = p_enabled; + } + } + + void enable_gl_depth_draw(bool p_enabled) { + if (current_depth_draw_enabled != p_enabled) { + glDepthMask(p_enabled ? GL_TRUE : GL_FALSE); + current_depth_draw_enabled = p_enabled; + } + } + + void enable_gl_depth_test(bool p_enabled) { + if (current_depth_test_enabled != p_enabled) { + if (p_enabled) { + glEnable(GL_DEPTH_TEST); + } else { + glDisable(GL_DEPTH_TEST); + } + current_depth_test_enabled = p_enabled; + } + } + bool texscreen_copied = false; bool used_screen_texture = false; bool used_normal_texture = false; @@ -656,9 +732,9 @@ protected: void _setup_sky(const RenderDataGLES3 *p_render_data, const PagedArray<RID> &p_lights, const Projection &p_projection, const Transform3D &p_transform, const Size2i p_screen_size); void _invalidate_sky(Sky *p_sky); void _update_dirty_skys(); - void _update_sky_radiance(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_luminance_multiplier); + void _update_sky_radiance(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_sky_energy_multiplier); void _filter_sky_radiance(Sky *p_sky, int p_base_layer); - void _draw_sky(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_luminance_multiplier, bool p_use_multiview, bool p_flip_y); + void _draw_sky(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_sky_energy_multiplier, float p_luminance_multiplier, bool p_use_multiview, bool p_flip_y, bool p_apply_color_adjustments_in_post); void _free_sky_data(Sky *p_sky); // Needed for a single argument calls (material and uv2). @@ -737,7 +813,7 @@ public: void voxel_gi_set_quality(RS::VoxelGIQuality) override; - void render_scene(const Ref<RenderSceneBuffers> &p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray<RenderGeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_voxel_gi_instances, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, const PagedArray<RID> &p_fog_volumes, RID p_environment, RID p_camera_attributes, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RenderingMethod::RenderInfo *r_render_info = nullptr) override; + void render_scene(const Ref<RenderSceneBuffers> &p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray<RenderGeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_voxel_gi_instances, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, const PagedArray<RID> &p_fog_volumes, RID p_environment, RID p_camera_attributes, RID p_compositor, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RenderingMethod::RenderInfo *r_render_info = nullptr) override; void render_material(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, const PagedArray<RenderGeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) override; void render_particle_collider_heightfield(RID p_collider, const Transform3D &p_transform, const PagedArray<RenderGeometryInstance *> &p_instances) override; diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 551136ce36..43b146152d 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -585,6 +585,19 @@ bool ShaderGLES3::_load_from_cache(Version *p_version) { Version::Specialization specialization; specialization.id = glCreateProgram(); + if (feedback_count) { + Vector<const char *> feedback; + for (int feedback_index = 0; feedback_index < feedback_count; feedback_index++) { + if (feedbacks[feedback_index].specialization == 0 || (feedbacks[feedback_index].specialization & specialization_key)) { + // Specialization for this feedback is enabled. + feedback.push_back(feedbacks[feedback_index].name); + } + } + + if (!feedback.is_empty()) { + glTransformFeedbackVaryings(specialization.id, feedback.size(), feedback.ptr(), GL_INTERLEAVED_ATTRIBS); + } + } glProgramBinary(specialization.id, variant_format, variant_bytes.ptr(), variant_bytes.size()); GLint link_status = 0; diff --git a/drivers/gles3/shaders/SCsub b/drivers/gles3/shaders/SCsub index 34713e7e29..0292b5d519 100644 --- a/drivers/gles3/shaders/SCsub +++ b/drivers/gles3/shaders/SCsub @@ -12,8 +12,10 @@ if "GLES3_GLSL" in env["BUILDERS"]: # make sure we recompile shaders if include files change env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#gles3_builders.py"]) + # compile shaders + + # as we have a few, not yet, converted files we name the ones we want to include: env.GLES3_GLSL("canvas.glsl") - env.GLES3_GLSL("copy.glsl") env.GLES3_GLSL("scene.glsl") env.GLES3_GLSL("sky.glsl") env.GLES3_GLSL("cubemap_filter.glsl") @@ -22,3 +24,10 @@ if "GLES3_GLSL" in env["BUILDERS"]: env.GLES3_GLSL("particles.glsl") env.GLES3_GLSL("particles_copy.glsl") env.GLES3_GLSL("skeleton.glsl") + + # once we finish conversion we can introduce this to cover all files: + # for glsl_file in glsl_files: + # env.GLES3_GLSL(glsl_file) + + +SConscript("effects/SCsub") diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index 80e28cf9fc..8da7d7dc80 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -187,8 +187,31 @@ void main() { #endif // !USE_INSTANCING #else // !USE_ATTRIBUTES - vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0)); - vec2 vertex_base = vertex_base_arr[gl_VertexID % 6]; + + // crash on Adreno 320/330 + //vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0)); + //vec2 vertex_base = vertex_base_arr[gl_VertexID % 6]; + //----------------------------------------- + // ID | 0 | 1 | 2 | 3 | 4 | 5 | + //----------------------------------------- + // X | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | + // Y | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | + //----------------------------------------- + // no crash or freeze on all Adreno 3xx with 'if / else if' and slightly faster! + int vertex_id = gl_VertexID % 6; + vec2 vertex_base; + if (vertex_id == 0) + vertex_base = vec2(0.0, 0.0); + else if (vertex_id == 1) + vertex_base = vec2(0.0, 1.0); + else if (vertex_id == 2) + vertex_base = vec2(1.0, 1.0); + else if (vertex_id == 3) + vertex_base = vec2(1.0, 0.0); + else if (vertex_id == 4) + vertex_base = vec2(0.0, 0.0); + else if (vertex_id == 5) + vertex_base = vec2(1.0, 1.0); vec2 uv = read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw) * ((read_draw_data_flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy); vec4 color = read_draw_data_modulation; @@ -475,16 +498,12 @@ vec4 light_shadow_compute(uint light_base, vec4 light_color, vec4 shadow_uv void light_blend_compute(uint light_base, vec4 light_color, inout vec3 color) { uint blend_mode = light_array[light_base].flags & LIGHT_FLAGS_BLEND_MASK; - switch (blend_mode) { - case LIGHT_FLAGS_BLEND_MODE_ADD: { - color.rgb += light_color.rgb * light_color.a; - } break; - case LIGHT_FLAGS_BLEND_MODE_SUB: { - color.rgb -= light_color.rgb * light_color.a; - } break; - case LIGHT_FLAGS_BLEND_MODE_MIX: { - color.rgb = mix(color.rgb, light_color.rgb, light_color.a); - } break; + if (blend_mode == LIGHT_FLAGS_BLEND_MODE_ADD) { + color.rgb += light_color.rgb * light_color.a; + } else if (blend_mode == LIGHT_FLAGS_BLEND_MODE_SUB) { + color.rgb -= light_color.rgb * light_color.a; + } else if (blend_mode == LIGHT_FLAGS_BLEND_MODE_MIX) { + color.rgb = mix(color.rgb, light_color.rgb, light_color.a); } } diff --git a/drivers/gles3/shaders/effects/SCsub b/drivers/gles3/shaders/effects/SCsub new file mode 100644 index 0000000000..38b185ed88 --- /dev/null +++ b/drivers/gles3/shaders/effects/SCsub @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +Import("env") + +if "GLES3_GLSL" in env["BUILDERS"]: + # find all include files + gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + + # find all shader code(all glsl files excluding our include files) + glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files] + + # make sure we recompile shaders if include files change + env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#gles3_builders.py"]) + + # compile shaders + for glsl_file in glsl_files: + env.GLES3_GLSL(glsl_file) diff --git a/drivers/gles3/shaders/copy.glsl b/drivers/gles3/shaders/effects/copy.glsl index db63b5d348..06f63ba629 100644 --- a/drivers/gles3/shaders/copy.glsl +++ b/drivers/gles3/shaders/effects/copy.glsl @@ -6,6 +6,7 @@ mode_copy_section = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY mode_copy_section_source = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY \n#define MODE_COPY_FROM mode_copy_section_3d = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY \n#define USE_TEXTURE_3D mode_copy_section_2d_array = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY \n#define USE_TEXTURE_2D_ARRAY +mode_screen = #define MODE_SIMPLE_COPY \n#define MODE_MULTIPLY mode_gaussian_blur = #define MODE_GAUSSIAN_BLUR mode_mipmap = #define MODE_MIPMAP mode_simple_color = #define MODE_SIMPLE_COLOR \n#define USE_COPY_SECTION @@ -55,6 +56,10 @@ uniform float lod; uniform vec4 color_in; #endif +#ifdef MODE_MULTIPLY +uniform float multiply; +#endif + #ifdef MODE_GAUSSIAN_BLUR // Defined in 0-1 coords. uniform highp vec2 pixel_size; @@ -105,10 +110,14 @@ void main() { vec4 color = textureLod(source_2d_array, vec3(uv_interp, layer), lod); #else vec4 color = texture(source, uv_interp); -#endif +#endif // USE_TEXTURE_3D + +#ifdef MODE_MULTIPLY + color *= multiply; +#endif // MODE_MULTIPLY frag_color = color; -#endif +#endif // MODE_SIMPLE_COPY #ifdef MODE_SIMPLE_COLOR frag_color = color_in; diff --git a/drivers/gles3/shaders/effects/glow.glsl b/drivers/gles3/shaders/effects/glow.glsl new file mode 100644 index 0000000000..b43691c737 --- /dev/null +++ b/drivers/gles3/shaders/effects/glow.glsl @@ -0,0 +1,113 @@ +/* clang-format off */ +#[modes] + +// Based on Dual filtering glow as explained in Marius Bjørge presentation at Siggraph 2015 "Bandwidth-Efficient Rendering" + +mode_filter = #define MODE_FILTER +mode_downsample = #define MODE_DOWNSAMPLE +mode_upsample = #define MODE_UPSAMPLE + +#[specializations] + +USE_MULTIVIEW = false + +#[vertex] +layout(location = 0) in vec2 vertex_attrib; + +/* clang-format on */ + +out vec2 uv_interp; + +void main() { + uv_interp = vertex_attrib * 0.5 + 0.5; + gl_Position = vec4(vertex_attrib, 1.0, 1.0); +} + +/* clang-format off */ +#[fragment] +/* clang-format on */ + +#ifdef MODE_FILTER +#ifdef USE_MULTIVIEW +uniform sampler2DArray source_color; // texunit:0 +#else +uniform sampler2D source_color; // texunit:0 +#endif // USE_MULTIVIEW +uniform float view; +uniform vec2 pixel_size; +uniform float luminance_multiplier; +uniform float glow_bloom; +uniform float glow_hdr_threshold; +uniform float glow_hdr_scale; +uniform float glow_luminance_cap; +#endif // MODE_FILTER + +#ifdef MODE_DOWNSAMPLE +uniform sampler2D source_color; // texunit:0 +uniform vec2 pixel_size; +#endif // MODE_DOWNSAMPLE + +#ifdef MODE_UPSAMPLE +uniform sampler2D source_color; // texunit:0 +uniform vec2 pixel_size; +#endif // MODE_UPSAMPLE + +in vec2 uv_interp; + +layout(location = 0) out vec4 frag_color; + +void main() { +#ifdef MODE_FILTER + // Note, we read from an image with double resolution, so we average those out +#ifdef USE_MULTIVIEW + vec2 half_pixel = pixel_size * 0.5; + vec3 uv = vec3(uv_interp, view); + vec3 color = textureLod(source_color, uv, 0.0).rgb * 4.0; + color += textureLod(source_color, uv - vec3(half_pixel, 0.0), 0.0).rgb; + color += textureLod(source_color, uv + vec3(half_pixel, 0.0), 0.0).rgb; + color += textureLod(source_color, uv - vec3(half_pixel.x, -half_pixel.y, 0.0), 0.0).rgb; + color += textureLod(source_color, uv + vec3(half_pixel.x, -half_pixel.y, 0.0), 0.0).rgb; +#else + vec2 half_pixel = pixel_size * 0.5; + vec2 uv = uv_interp; + vec3 color = textureLod(source_color, uv, 0.0).rgb * 4.0; + color += textureLod(source_color, uv - half_pixel, 0.0).rgb; + color += textureLod(source_color, uv + half_pixel, 0.0).rgb; + color += textureLod(source_color, uv - vec2(half_pixel.x, -half_pixel.y), 0.0).rgb; + color += textureLod(source_color, uv + vec2(half_pixel.x, -half_pixel.y), 0.0).rgb; +#endif // USE_MULTIVIEW + color /= luminance_multiplier * 8.0; + + float feedback_factor = max(color.r, max(color.g, color.b)); + float feedback = max(smoothstep(glow_hdr_threshold, glow_hdr_threshold + glow_hdr_scale, feedback_factor), glow_bloom); + + color = min(color * feedback, vec3(glow_luminance_cap)); + + frag_color = vec4(luminance_multiplier * color, 1.0); +#endif // MODE_FILTER + +#ifdef MODE_DOWNSAMPLE + vec2 half_pixel = pixel_size * 0.5; + vec4 color = textureLod(source_color, uv_interp, 0.0) * 4.0; + color += textureLod(source_color, uv_interp - half_pixel, 0.0); + color += textureLod(source_color, uv_interp + half_pixel, 0.0); + color += textureLod(source_color, uv_interp - vec2(half_pixel.x, -half_pixel.y), 0.0); + color += textureLod(source_color, uv_interp + vec2(half_pixel.x, -half_pixel.y), 0.0); + frag_color = color / 8.0; +#endif // MODE_DOWNSAMPLE + +#ifdef MODE_UPSAMPLE + vec2 half_pixel = pixel_size * 0.5; + + vec4 color = textureLod(source_color, uv_interp + vec2(-half_pixel.x * 2.0, 0.0), 0.0); + color += textureLod(source_color, uv_interp + vec2(-half_pixel.x, half_pixel.y), 0.0) * 2.0; + color += textureLod(source_color, uv_interp + vec2(0.0, half_pixel.y * 2.0), 0.0); + color += textureLod(source_color, uv_interp + vec2(half_pixel.x, half_pixel.y), 0.0) * 2.0; + color += textureLod(source_color, uv_interp + vec2(half_pixel.x * 2.0, 0.0), 0.0); + color += textureLod(source_color, uv_interp + vec2(half_pixel.x, -half_pixel.y), 0.0) * 2.0; + color += textureLod(source_color, uv_interp + vec2(0.0, -half_pixel.y * 2.0), 0.0); + color += textureLod(source_color, uv_interp + vec2(-half_pixel.x, -half_pixel.y), 0.0) * 2.0; + + frag_color = color / 12.0; +#endif // MODE_UPSAMPLE +} diff --git a/drivers/gles3/shaders/effects/post.glsl b/drivers/gles3/shaders/effects/post.glsl new file mode 100644 index 0000000000..e61171c92a --- /dev/null +++ b/drivers/gles3/shaders/effects/post.glsl @@ -0,0 +1,96 @@ +/* clang-format off */ +#[modes] +mode_default = #define MODE_DEFAULT +// mode_glow = #define MODE_GLOW + +#[specializations] + +USE_MULTIVIEW = false +USE_GLOW = false +USE_LUMINANCE_MULTIPLIER = false + +#[vertex] +layout(location = 0) in vec2 vertex_attrib; + +/* clang-format on */ + +out vec2 uv_interp; + +void main() { + uv_interp = vertex_attrib * 0.5 + 0.5; + gl_Position = vec4(vertex_attrib, 1.0, 1.0); +} + +/* clang-format off */ +#[fragment] +/* clang-format on */ + +#include "../tonemap_inc.glsl" + +#ifdef USE_MULTIVIEW +uniform sampler2DArray source_color; // texunit:0 +#else +uniform sampler2D source_color; // texunit:0 +#endif // USE_MULTIVIEW + +uniform float view; +uniform float luminance_multiplier; + +#ifdef USE_GLOW +uniform sampler2D glow_color; // texunit:1 +uniform vec2 pixel_size; +uniform float glow_intensity; + +vec4 get_glow_color(vec2 uv) { + vec2 half_pixel = pixel_size * 0.5; + + vec4 color = textureLod(glow_color, uv + vec2(-half_pixel.x * 2.0, 0.0), 0.0); + color += textureLod(glow_color, uv + vec2(-half_pixel.x, half_pixel.y), 0.0) * 2.0; + color += textureLod(glow_color, uv + vec2(0.0, half_pixel.y * 2.0), 0.0); + color += textureLod(glow_color, uv + vec2(half_pixel.x, half_pixel.y), 0.0) * 2.0; + color += textureLod(glow_color, uv + vec2(half_pixel.x * 2.0, 0.0), 0.0); + color += textureLod(glow_color, uv + vec2(half_pixel.x, -half_pixel.y), 0.0) * 2.0; + color += textureLod(glow_color, uv + vec2(0.0, -half_pixel.y * 2.0), 0.0); + color += textureLod(glow_color, uv + vec2(-half_pixel.x, -half_pixel.y), 0.0) * 2.0; + + return color / 12.0; +} +#endif // USE_GLOW + +in vec2 uv_interp; + +layout(location = 0) out vec4 frag_color; + +void main() { +#ifdef USE_MULTIVIEW + vec4 color = texture(source_color, vec3(uv_interp, view)); +#else + vec4 color = texture(source_color, uv_interp); +#endif + +#ifdef USE_GLOW + vec4 glow = get_glow_color(uv_interp) * glow_intensity; + + // Just use softlight... + glow.rgb = clamp(glow.rgb, vec3(0.0f), vec3(1.0f)); + color.rgb = max((color.rgb + glow.rgb) - (color.rgb * glow.rgb), vec3(0.0)); +#endif // USE_GLOW + +#ifdef USE_LUMINANCE_MULTIPLIER + color = color / luminance_multiplier; +#endif + + color.rgb = srgb_to_linear(color.rgb); + color.rgb = apply_tonemapping(color.rgb, white); + color.rgb = linear_to_srgb(color.rgb); + +#ifdef USE_BCS + color.rgb = apply_bcs(color.rgb, bcs); +#endif + +#ifdef USE_COLOR_CORRECTION + color.rgb = apply_color_correction(color.rgb, color_correction); +#endif + + frag_color = color; +} diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 667cbb3d90..a6db90c3f5 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -28,6 +28,7 @@ LIGHT_USE_PSSM4 = false LIGHT_USE_PSSM_BLEND = false BASE_PASS = true USE_ADDITIVE_LIGHTING = false +APPLY_TONEMAPPING = true // We can only use one type of light per additive pass. This means that if USE_ADDITIVE_LIGHTING is defined, and // these are false, we are doing a directional light pass. ADDITIVE_OMNI = false @@ -185,18 +186,17 @@ layout(std140) uniform SceneData { // ubo:2 uint fog_mode; float fog_density; float fog_height; - float fog_height_density; + float fog_height_density; float fog_depth_curve; - float pad; + float fog_sun_scatter; float fog_depth_begin; vec3 fog_light_color; float fog_depth_end; - float fog_sun_scatter; - float shadow_bias; + float luminance_multiplier; uint camera_visible_layers; bool pancake_shadows; } @@ -676,18 +676,17 @@ layout(std140) uniform SceneData { // ubo:2 uint fog_mode; float fog_density; float fog_height; - float fog_height_density; + float fog_height_density; float fog_depth_curve; - float pad; + float fog_sun_scatter; float fog_depth_begin; vec3 fog_light_color; float fog_depth_end; - float fog_sun_scatter; - float shadow_bias; + float luminance_multiplier; uint camera_visible_layers; bool pancake_shadows; } @@ -1201,7 +1200,10 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 f vec3 spot_dir = spot_lights[idx].direction; float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights[idx].cone_angle); float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights[idx].cone_angle)); - spot_attenuation *= 1.0 - pow(spot_rim, spot_lights[idx].cone_attenuation); + + mediump float cone_attenuation = spot_lights[idx].cone_attenuation; + spot_attenuation *= 1.0 - pow(spot_rim, cone_attenuation); + vec3 color = spot_lights[idx].color; float size_A = 0.0; @@ -1269,7 +1271,7 @@ vec4 fog_process(vec3 vertex) { float fog_z = smoothstep(scene_data.fog_depth_begin, scene_data.fog_depth_end, length(vertex)); fog_amount = pow(fog_z, scene_data.fog_depth_curve) * scene_data.fog_density; #else - fog_amount = 1 - exp(min(0.0, -length(vertex) * scene_data.fog_density)); + fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data.fog_density)); #endif // USE_DEPTH_FOG if (abs(scene_data.fog_height_density) >= 0.0001) { @@ -1758,7 +1760,9 @@ void main() { // Tonemap before writing as we are writing to an sRGB framebuffer frag_color.rgb *= exposure; +#ifdef APPLY_TONEMAPPING frag_color.rgb = apply_tonemapping(frag_color.rgb, white); +#endif frag_color.rgb = linear_to_srgb(frag_color.rgb); #ifdef USE_BCS @@ -1973,7 +1977,9 @@ void main() { // Tonemap before writing as we are writing to an sRGB framebuffer additive_light_color *= exposure; +#ifdef APPLY_TONEMAPPING additive_light_color = apply_tonemapping(additive_light_color, white); +#endif additive_light_color = linear_to_srgb(additive_light_color); #ifdef USE_BCS @@ -1986,6 +1992,9 @@ void main() { frag_color.rgb += additive_light_color; #endif // USE_ADDITIVE_LIGHTING + + frag_color.rgb *= scene_data.luminance_multiplier; + #endif // !RENDER_MATERIAL -#endif //!MODE_RENDER_DEPTH +#endif // !MODE_RENDER_DEPTH } diff --git a/drivers/gles3/shaders/sky.glsl b/drivers/gles3/shaders/sky.glsl index 9f9c22cf6d..b10ea12e6e 100644 --- a/drivers/gles3/shaders/sky.glsl +++ b/drivers/gles3/shaders/sky.glsl @@ -12,6 +12,7 @@ mode_cubemap_quarter_res = #define USE_CUBEMAP_PASS \n#define USE_QUARTER_RES_PA USE_MULTIVIEW = false USE_INVERTED_Y = true +APPLY_TONEMAPPING = true #[vertex] @@ -103,6 +104,7 @@ uniform mat4 orientation; uniform vec4 projection; uniform vec3 position; uniform float time; +uniform float sky_energy_multiplier; uniform float luminance_multiplier; uniform float fog_aerial_perspective; @@ -195,12 +197,14 @@ void main() { } - color *= luminance_multiplier; + color *= sky_energy_multiplier; // Convert to Linear for tonemapping so color matches scene shader better color = srgb_to_linear(color); color *= exposure; +#ifdef APPLY_TONEMAPPING color = apply_tonemapping(color, white); +#endif color = linear_to_srgb(color); #ifdef USE_BCS @@ -211,10 +215,10 @@ void main() { color = apply_color_correction(color, color_correction); #endif - frag_color.rgb = color; + frag_color.rgb = color * luminance_multiplier; frag_color.a = alpha; #ifdef USE_DEBANDING - frag_color.rgb += interleaved_gradient_noise(gl_FragCoord.xy) * luminance_multiplier; + frag_color.rgb += interleaved_gradient_noise(gl_FragCoord.xy) * sky_energy_multiplier * luminance_multiplier; #endif } diff --git a/drivers/gles3/storage/config.cpp b/drivers/gles3/storage/config.cpp index 5d01ab0346..1a41b60836 100644 --- a/drivers/gles3/storage/config.cpp +++ b/drivers/gles3/storage/config.cpp @@ -166,6 +166,11 @@ Config::Config() { max_renderable_elements = GLOBAL_GET("rendering/limits/opengl/max_renderable_elements"); max_renderable_lights = GLOBAL_GET("rendering/limits/opengl/max_renderable_lights"); max_lights_per_object = GLOBAL_GET("rendering/limits/opengl/max_lights_per_object"); + + //Adreno 3xx Compatibility + const String rendering_device_name = String::utf8((const char *)glGetString(GL_RENDERER)); + //TODO: Check the number between 300 and 399(?) + adreno_3xx_compatibility = (rendering_device_name.left(13) == "Adreno (TM) 3"); } Config::~Config() { diff --git a/drivers/gles3/storage/config.h b/drivers/gles3/storage/config.h index 1c0a5178bd..c3ab65f0bc 100644 --- a/drivers/gles3/storage/config.h +++ b/drivers/gles3/storage/config.h @@ -91,6 +91,8 @@ public: bool rt_msaa_multiview_supported = false; bool multiview_supported = false; + bool adreno_3xx_compatibility = false; + #ifdef ANDROID_ENABLED PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC eglFramebufferTextureMultiviewOVR = nullptr; PFNGLTEXSTORAGE3DMULTISAMPLEPROC eglTexStorage3DMultisample = nullptr; diff --git a/drivers/gles3/storage/light_storage.cpp b/drivers/gles3/storage/light_storage.cpp index 5421f57646..2259c61e5b 100644 --- a/drivers/gles3/storage/light_storage.cpp +++ b/drivers/gles3/storage/light_storage.cpp @@ -1044,7 +1044,7 @@ bool LightStorage::_shadow_atlas_find_shadow(ShadowAtlas *shadow_atlas, int *p_i glBindTexture(GL_TEXTURE_2D, 0); } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); r_quadrant = qidx; r_shadow = shadow_atlas->quadrants[qidx].textures.size(); @@ -1135,7 +1135,7 @@ void LightStorage::update_directional_shadow_atlas() { glClear(GL_DEPTH_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } void LightStorage::directional_shadow_atlas_set_size(int p_size, bool p_16_bits) { diff --git a/drivers/gles3/storage/light_storage.h b/drivers/gles3/storage/light_storage.h index 96e6200219..a6b236f3ec 100644 --- a/drivers/gles3/storage/light_storage.h +++ b/drivers/gles3/storage/light_storage.h @@ -686,7 +686,7 @@ public: glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, atlas->debug_texture, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); return atlas->debug_fbo; } diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index 5600449d00..23376b4381 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -2607,7 +2607,7 @@ void CanvasShaderData::set_code(const String &p_code) { MaterialStorage::get_singleton()->shaders.canvas_shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines, texture_uniform_data); ERR_FAIL_COND(!MaterialStorage::get_singleton()->shaders.canvas_shader.version_is_valid(version)); - vertex_input_mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_COLOR | RS::ARRAY_TEX_UV; + vertex_input_mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_FORMAT_COLOR | RS::ARRAY_FORMAT_TEX_UV; vertex_input_mask |= uses_custom0 << RS::ARRAY_CUSTOM0; vertex_input_mask |= uses_custom1 << RS::ARRAY_CUSTOM1; diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index 8ab66e2bc6..e073db3cfd 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -33,6 +33,7 @@ #include "mesh_storage.h" #include "config.h" #include "material_storage.h" +#include "texture_storage.h" #include "utilities.h" using namespace GLES3; @@ -219,7 +220,7 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) glGenBuffers(1, &s->vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, s->vertex_buffer); // If we have an uncompressed surface that contains normals, but not tangents, we need to differentiate the array - // from a compressed array in the shader. To do so, we allow the the normal to read 4 components out of the buffer + // from a compressed array in the shader. To do so, we allow the normal to read 4 components out of the buffer // But only give it 2 components per normal. So essentially, each vertex reads the next normal in normal.zw. // This allows us to avoid adding a shader permutation, and avoid passing dummy tangents. Since the stride is kept small // this should still be a net win for bandwidth. @@ -1248,7 +1249,7 @@ void MeshStorage::update_mesh_instances() { } glEnable(GL_RASTERIZER_DISCARD); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); // Process skeletons and blend shapes using transform feedback while (dirty_mesh_instance_arrays.first()) { MeshInstance *mi = dirty_mesh_instance_arrays.first()->self(); diff --git a/drivers/gles3/storage/particles_storage.cpp b/drivers/gles3/storage/particles_storage.cpp index e263acf88b..4d563ab28b 100644 --- a/drivers/gles3/storage/particles_storage.cpp +++ b/drivers/gles3/storage/particles_storage.cpp @@ -395,7 +395,7 @@ AABB ParticlesStorage::particles_get_current_aabb(RID p_particles) { bool first = true; const uint8_t *data_ptr = (const uint8_t *)buffer.ptr(); - uint32_t particle_data_size = sizeof(ParticleInstanceData3D) + sizeof(float) * particles->userdata_count; + uint32_t particle_data_size = sizeof(ParticleInstanceData3D); for (int i = 0; i < total_amount; i++) { const ParticleInstanceData3D &particle_data = *(const ParticleInstanceData3D *)&data_ptr[particle_data_size * i]; @@ -818,7 +818,7 @@ void ParticlesStorage::particles_set_view_axis(RID p_particles, const Vector3 &p } glEnable(GL_RASTERIZER_DISCARD); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); _particles_update_instance_buffer(particles, axis, p_up_axis); glDisable(GL_RASTERIZER_DISCARD); } @@ -1002,7 +1002,7 @@ void ParticlesStorage::_particles_update_instance_buffer(Particles *particles, c void ParticlesStorage::update_particles() { glEnable(GL_RASTERIZER_DISCARD); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); GLuint global_buffer = GLES3::MaterialStorage::get_singleton()->global_shader_parameters_get_uniform_buffer(); @@ -1262,7 +1262,7 @@ GLuint ParticlesStorage::particles_collision_get_heightfield_framebuffer(RID p_p particles_collision->heightfield_fb_size = size; glBindTexture(GL_TEXTURE_2D, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } return particles_collision->heightfield_fb; diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.cpp b/drivers/gles3/storage/render_scene_buffers_gles3.cpp index 33bb808856..de0a64f5fe 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.cpp +++ b/drivers/gles3/storage/render_scene_buffers_gles3.cpp @@ -47,6 +47,13 @@ #define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 #endif +RenderSceneBuffersGLES3::RenderSceneBuffersGLES3() { + for (int i = 0; i < 4; i++) { + glow.levels[i].color = 0; + glow.levels[i].fbo = 0; + } +} + RenderSceneBuffersGLES3::~RenderSceneBuffersGLES3() { free_render_buffer_data(); } @@ -114,7 +121,7 @@ GLuint RenderSceneBuffersGLES3::_rt_get_cached_fbo(GLuint p_color, GLuint p_dept msaa3d.cached_fbos.push_back(new_fbo); } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); #endif return new_fbo.fbo; @@ -137,9 +144,22 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p //use_debanding = p_config->get_use_debanding(); view_count = config->multiview_supported ? p_config->get_view_count() : 1; - ERR_FAIL_COND(view_count == 0); bool use_multiview = view_count > 1; + // Get color format data from our render target so we match those + if (render_target.is_valid()) { + color_internal_format = texture_storage->render_target_get_color_internal_format(render_target); + color_format = texture_storage->render_target_get_color_format(render_target); + color_type = texture_storage->render_target_get_color_type(render_target); + color_format_size = texture_storage->render_target_get_color_format_size(render_target); + } else { + // reflection probe? or error? + color_internal_format = GL_RGBA8; + color_format = GL_RGBA; + color_type = GL_UNSIGNED_BYTE; + color_format_size = 4; + } + // Check our scaling mode if (scaling_3d_mode != RS::VIEWPORT_SCALING_3D_MODE_OFF && internal_size.x == 0 && internal_size.y == 0) { // Disable, no size set. @@ -153,14 +173,38 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_BILINEAR; } - bool use_internal_buffer = scaling_3d_mode != RS::VIEWPORT_SCALING_3D_MODE_OFF; // TODO also need this if doing post processing like glow + // Check if we support MSAA. + if (msaa3d.mode != RS::VIEWPORT_MSAA_DISABLED && internal_size.x == 0 && internal_size.y == 0) { + // Disable, no size set. + msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; + } else if (!use_multiview && msaa3d.mode != RS::VIEWPORT_MSAA_DISABLED && !config->msaa_supported && !config->rt_msaa_supported) { + WARN_PRINT_ONCE("MSAA is not supported on this device."); + msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; + } else if (use_multiview && msaa3d.mode != RS::VIEWPORT_MSAA_DISABLED && !config->msaa_multiview_supported && !config->rt_msaa_multiview_supported) { + WARN_PRINT_ONCE("Multiview MSAA is not supported on this device."); + msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; + } + + // We don't create our buffers right away because post effects can be made active at any time and change our buffer configuration. +} + +void RenderSceneBuffersGLES3::_check_render_buffers() { + GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); + GLES3::Config *config = GLES3::Config::get_singleton(); + + ERR_FAIL_COND(view_count == 0); + + bool use_internal_buffer = scaling_3d_mode != RS::VIEWPORT_SCALING_3D_MODE_OFF || glow.glow_enabled; + uint32_t depth_format_size = 3; + bool use_multiview = view_count > 1; + + if ((!use_internal_buffer || internal3d.color != 0) && (msaa3d.mode == RS::VIEWPORT_MSAA_DISABLED || msaa3d.color != 0)) { + // already setup! + return; + } + if (use_internal_buffer) { // Setup our internal buffer. - bool is_transparent = texture_storage->render_target_get_transparent(render_target); - GLuint color_internal_format = is_transparent ? GL_RGBA8 : GL_RGB10_A2; - GLuint color_format = GL_RGBA; - GLuint color_type = is_transparent ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_2_10_10_10_REV; - GLenum texture_target = use_multiview ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; // Create our color buffer. @@ -178,7 +222,7 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - GLES3::Utilities::get_singleton()->texture_allocated_data(internal3d.color, internal_size.x * internal_size.y * view_count * 4, "3D color texture"); + GLES3::Utilities::get_singleton()->texture_allocated_data(internal3d.color, internal_size.x * internal_size.y * view_count * color_format_size, "3D color texture"); // Create our depth buffer. glGenTextures(1, &internal3d.depth); @@ -195,7 +239,7 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - GLES3::Utilities::get_singleton()->texture_allocated_data(internal3d.depth, internal_size.x * internal_size.y * view_count * 3, "3D depth texture"); + GLES3::Utilities::get_singleton()->texture_allocated_data(internal3d.depth, internal_size.x * internal_size.y * view_count * depth_format_size, "3D depth texture"); // Create our internal 3D FBO. // Note that if MSAA is used and our rt_msaa_* extensions are available, this is only used for blitting and effects. @@ -221,19 +265,7 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p } glBindTexture(texture_target, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); - } - - // Check if we support MSAA. - if (msaa3d.mode != RS::VIEWPORT_MSAA_DISABLED && internal_size.x == 0 && internal_size.y == 0) { - // Disable, no size set. - msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; - } else if (!use_multiview && msaa3d.mode != RS::VIEWPORT_MSAA_DISABLED && !config->msaa_supported && !config->rt_msaa_supported) { - WARN_PRINT_ONCE("MSAA is not supported on this device."); - msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; - } else if (use_multiview && msaa3d.mode != RS::VIEWPORT_MSAA_DISABLED && !config->msaa_multiview_supported && !config->rt_msaa_multiview_supported) { - WARN_PRINT_ONCE("Multiview MSAA is not supported on this device."); - msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } if (msaa3d.mode != RS::VIEWPORT_MSAA_DISABLED) { @@ -255,9 +287,6 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p msaa3d.needs_resolve = true; msaa3d.check_fbo_cache = false; - bool is_transparent = texture_storage->render_target_get_transparent(render_target); - GLuint color_internal_format = is_transparent ? GL_RGBA8 : GL_RGB10_A2; - // Create our color buffer. glGenRenderbuffers(1, &msaa3d.color); glBindRenderbuffer(GL_RENDERBUFFER, msaa3d.color); @@ -282,20 +311,18 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { _clear_msaa3d_buffers(); + msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; WARN_PRINT("Could not create 3D MSAA buffers, status: " + texture_storage->get_framebuffer_error(status)); } glBindRenderbuffer(GL_RENDERBUFFER, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); #if !defined(IOS_ENABLED) && !defined(WEB_ENABLED) } else if (use_multiview && !config->rt_msaa_multiview_supported) { // Render to texture extensions not supported? fall back to MSAA textures through GL_EXT_multiview_texture_multisample. msaa3d.needs_resolve = true; msaa3d.check_fbo_cache = false; - bool is_transparent = texture_storage->render_target_get_transparent(render_target); - GLuint color_internal_format = is_transparent ? GL_RGBA8 : GL_RGB10_A2; - // Create our color buffer. glGenTextures(1, &msaa3d.color); glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, msaa3d.color); @@ -306,7 +333,7 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, msaa3d.samples, color_internal_format, internal_size.x, internal_size.y, view_count, GL_TRUE); #endif - GLES3::Utilities::get_singleton()->texture_allocated_data(msaa3d.color, internal_size.x * internal_size.y * view_count * 4 * msaa3d.samples, "MSAA 3D color texture"); + GLES3::Utilities::get_singleton()->texture_allocated_data(msaa3d.color, internal_size.x * internal_size.y * view_count * color_format_size * msaa3d.samples, "MSAA 3D color texture"); // Create our depth buffer. glGenTextures(1, &msaa3d.depth); @@ -318,7 +345,7 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, msaa3d.samples, GL_DEPTH_COMPONENT24, internal_size.x, internal_size.y, view_count, GL_TRUE); #endif - GLES3::Utilities::get_singleton()->texture_allocated_data(msaa3d.depth, internal_size.x * internal_size.y * view_count * msaa3d.samples, "MSAA 3D depth texture"); + GLES3::Utilities::get_singleton()->texture_allocated_data(msaa3d.depth, internal_size.x * internal_size.y * view_count * depth_format_size * msaa3d.samples, "MSAA 3D depth texture"); // Create our MSAA 3D FBO. glGenFramebuffers(1, &msaa3d.fbo); @@ -330,11 +357,12 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { _clear_msaa3d_buffers(); + msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; WARN_PRINT("Could not create 3D MSAA buffers, status: " + texture_storage->get_framebuffer_error(status)); } glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); #endif #if defined(ANDROID_ENABLED) || defined(WEB_ENABLED) // Only supported on OpenGLES! } else if (!use_internal_buffer) { @@ -358,10 +386,11 @@ void RenderSceneBuffersGLES3::configure(const RenderSceneBuffersConfiguration *p GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { _clear_msaa3d_buffers(); + msaa3d.mode = RS::VIEWPORT_MSAA_DISABLED; WARN_PRINT("Could not create 3D MSAA framebuffer, status: " + texture_storage->get_framebuffer_error(status)); } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); #endif } else { // HUH? how did we get here? @@ -435,13 +464,9 @@ void RenderSceneBuffersGLES3::check_backbuffer(bool p_need_color, bool p_need_de glBindFramebuffer(GL_FRAMEBUFFER, backbuffer3d.fbo); - bool is_transparent = texture_storage->render_target_get_transparent(render_target); - GLuint color_internal_format = is_transparent ? GL_RGBA8 : GL_RGB10_A2; - GLuint color_format = GL_RGBA; - GLuint color_type = is_transparent ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_2_10_10_10_REV; - bool use_multiview = view_count > 1 && GLES3::Config::get_singleton()->multiview_supported; GLenum texture_target = use_multiview ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; + uint32_t depth_format_size = 3; if (backbuffer3d.color == 0 && p_need_color) { glGenTextures(1, &backbuffer3d.color); @@ -458,7 +483,7 @@ void RenderSceneBuffersGLES3::check_backbuffer(bool p_need_color, bool p_need_de glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - GLES3::Utilities::get_singleton()->texture_allocated_data(backbuffer3d.color, internal_size.x * internal_size.y * view_count * 4, "3D Back buffer color texture"); + GLES3::Utilities::get_singleton()->texture_allocated_data(backbuffer3d.color, internal_size.x * internal_size.y * view_count * color_format_size, "3D Back buffer color texture"); #ifndef IOS_ENABLED if (use_multiview) { @@ -486,7 +511,7 @@ void RenderSceneBuffersGLES3::check_backbuffer(bool p_need_color, bool p_need_de glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - GLES3::Utilities::get_singleton()->texture_allocated_data(backbuffer3d.depth, internal_size.x * internal_size.y * view_count * 3, "3D back buffer depth texture"); + GLES3::Utilities::get_singleton()->texture_allocated_data(backbuffer3d.depth, internal_size.x * internal_size.y * view_count * depth_format_size, "3D back buffer depth texture"); #ifndef IOS_ENABLED if (use_multiview) { @@ -506,7 +531,7 @@ void RenderSceneBuffersGLES3::check_backbuffer(bool p_need_color, bool p_need_de } glBindTexture(texture_target, 0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } void RenderSceneBuffersGLES3::_clear_back_buffers() { @@ -526,21 +551,101 @@ void RenderSceneBuffersGLES3::_clear_back_buffers() { } } +void RenderSceneBuffersGLES3::set_glow_enabled(bool p_glow_enabled) { + if (glow.glow_enabled != p_glow_enabled) { + glow.glow_enabled = p_glow_enabled; + + // Clear our main buffers, this can impact them. + _clear_msaa3d_buffers(); + _clear_intermediate_buffers(); + } +} + +void RenderSceneBuffersGLES3::check_glow_buffers() { + if (glow.levels[0].color != 0) { + // already have these setup.. + return; + } + + GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); + Size2i level_size = internal_size; + for (int i = 0; i < 4; i++) { + level_size.x = MAX(level_size.x >> 1, 4); + level_size.y = MAX(level_size.y >> 1, 4); + + glow.levels[i].size = level_size; + + // Create our texture + glGenTextures(1, &glow.levels[i].color); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, glow.levels[i].color); + + glTexImage2D(GL_TEXTURE_2D, 0, color_internal_format, level_size.x, level_size.y, 0, color_format, color_type, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + + GLES3::Utilities::get_singleton()->texture_allocated_data(glow.levels[i].color, level_size.x * level_size.y * color_format_size, String("Glow buffer ") + String::num_int64(i)); + + // Create our FBO + glGenFramebuffers(1, &glow.levels[i].fbo); + glBindFramebuffer(GL_FRAMEBUFFER, glow.levels[i].fbo); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, glow.levels[i].color, 0); + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + WARN_PRINT("Could not create glow buffers, status: " + texture_storage->get_framebuffer_error(status)); + _clear_glow_buffers(); + break; + } + } + + glBindTexture(GL_TEXTURE_2D, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); +} + +void RenderSceneBuffersGLES3::_clear_glow_buffers() { + for (int i = 0; i < 4; i++) { + if (glow.levels[i].fbo != 0) { + glDeleteFramebuffers(1, &glow.levels[i].fbo); + glow.levels[i].fbo = 0; + } + + if (glow.levels[i].color != 0) { + GLES3::Utilities::get_singleton()->texture_free_data(glow.levels[i].color); + glow.levels[i].color = 0; + } + } +} + void RenderSceneBuffersGLES3::free_render_buffer_data() { _clear_msaa3d_buffers(); _clear_intermediate_buffers(); _clear_back_buffers(); + _clear_glow_buffers(); } GLuint RenderSceneBuffersGLES3::get_render_fbo() { GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); GLuint rt_fbo = 0; + _check_render_buffers(); + if (msaa3d.check_fbo_cache) { GLuint color = texture_storage->render_target_get_color(render_target); GLuint depth = texture_storage->render_target_get_depth(render_target); rt_fbo = _rt_get_cached_fbo(color, depth, msaa3d.samples, view_count); + if (rt_fbo == 0) { + // Somehow couldn't obtain this? Just render without MSAA. + rt_fbo = texture_storage->render_target_get_fbo(render_target); + } } else if (msaa3d.fbo != 0) { // We have an MSAA fbo, render to our MSAA buffer return msaa3d.fbo; diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h index 39aa1fb973..8d03d3438d 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.h +++ b/drivers/gles3/storage/render_scene_buffers_gles3.h @@ -33,6 +33,7 @@ #ifdef GLES3_ENABLED +#include "drivers/gles3/effects/glow.h" #include "servers/rendering/storage/render_scene_buffers.h" #include "platform_gl.h" @@ -52,6 +53,12 @@ public: RID render_target; + // Color format details from our render target + GLuint color_internal_format = GL_RGBA8; + GLuint color_format = GL_RGBA; + GLuint color_type = GL_UNSIGNED_BYTE; + uint32_t color_format_size = 4; + struct FBDEF { GLuint color = 0; GLuint depth = 0; @@ -74,31 +81,24 @@ public: FBDEF backbuffer3d; // our back buffer - // Built-in textures used for ping pong image processing and blurring. - struct Blur { - RID texture; - - struct Mipmap { - RID texture; - int width; - int height; - GLuint fbo; - }; - - Vector<Mipmap> mipmaps; - }; - - Blur blur[2]; //the second one starts from the first mipmap + // Buffers for our glow implementation + struct GLOW { + bool glow_enabled = false; + GLES3::Glow::GLOWLEVEL levels[4]; + } glow; private: + void _check_render_buffers(); void _clear_msaa3d_buffers(); void _clear_intermediate_buffers(); void _clear_back_buffers(); + void _clear_glow_buffers(); void _rt_attach_textures(GLuint p_color, GLuint p_depth, GLsizei p_samples, uint32_t p_view_count); GLuint _rt_get_cached_fbo(GLuint p_color, GLuint p_depth, GLsizei p_samples, uint32_t p_view_count); public: + RenderSceneBuffersGLES3(); virtual ~RenderSceneBuffersGLES3(); virtual void configure(const RenderSceneBuffersConfiguration *p_config) override; @@ -109,19 +109,45 @@ public: void free_render_buffer_data(); void check_backbuffer(bool p_need_color, bool p_need_depth); // Check if we need to initialize our backbuffer. + void check_glow_buffers(); // Check if we need to initialise our glow buffers. GLuint get_render_fbo(); - GLuint get_msaa3d_fbo() const { return msaa3d.fbo; } - GLuint get_msaa3d_color() const { return msaa3d.color; } - GLuint get_msaa3d_depth() const { return msaa3d.depth; } - bool get_msaa_needs_resolve() const { return msaa3d.needs_resolve; } - GLuint get_internal_fbo() const { return internal3d.fbo; } - GLuint get_internal_color() const { return internal3d.color; } - GLuint get_internal_depth() const { return internal3d.depth; } + GLuint get_msaa3d_fbo() { + _check_render_buffers(); + return msaa3d.fbo; + } + GLuint get_msaa3d_color() { + _check_render_buffers(); + return msaa3d.color; + } + GLuint get_msaa3d_depth() { + _check_render_buffers(); + return msaa3d.depth; + } + bool get_msaa_needs_resolve() { + _check_render_buffers(); + return msaa3d.needs_resolve; + } + GLuint get_internal_fbo() { + _check_render_buffers(); + return internal3d.fbo; + } + GLuint get_internal_color() { + _check_render_buffers(); + return internal3d.color; + } + GLuint get_internal_depth() { + _check_render_buffers(); + return internal3d.depth; + } GLuint get_backbuffer_fbo() const { return backbuffer3d.fbo; } GLuint get_backbuffer() const { return backbuffer3d.color; } GLuint get_backbuffer_depth() const { return backbuffer3d.depth; } + bool get_glow_enabled() const { return glow.glow_enabled; } + void set_glow_enabled(bool p_glow_enabled); + const GLES3::Glow::GLOWLEVEL *get_glow_buffers() const { return &glow.levels[0]; } + // Getters _FORCE_INLINE_ RID get_render_target() const { return render_target; } diff --git a/drivers/gles3/storage/texture_storage.cpp b/drivers/gles3/storage/texture_storage.cpp index bd4793f4dc..ffbad4c83b 100644 --- a/drivers/gles3/storage/texture_storage.cpp +++ b/drivers/gles3/storage/texture_storage.cpp @@ -1090,7 +1090,7 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { glReadPixels(0, 0, texture->alloc_width, texture->alloc_height, GL_RGBA, GL_UNSIGNED_BYTE, &w[0]); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDeleteTextures(1, &temp_color_texture); glDeleteFramebuffers(1, &temp_framebuffer); @@ -1162,7 +1162,7 @@ Ref<Image> TextureStorage::texture_2d_layer_get(RID p_texture, int p_layer) cons glReadPixels(0, 0, texture->alloc_width, texture->alloc_height, GL_RGBA, GL_UNSIGNED_BYTE, &w[0]); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDeleteTextures(1, &temp_color_texture); glDeleteFramebuffers(1, &temp_framebuffer); @@ -1265,7 +1265,7 @@ Vector<Ref<Image>> TextureStorage::texture_3d_get(RID p_texture) const { Vector<Ref<Image>> ret = _texture_3d_read_framebuffer(texture); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); glDeleteTextures(1, &temp_color_texture); glDeleteFramebuffers(1, &temp_framebuffer); @@ -1920,7 +1920,7 @@ void TextureStorage::update_texture_atlas() { copy_effects->copy_to_rect(t->uv_rect); } } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo); } /* DECAL API */ @@ -1981,10 +1981,25 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { Config *config = Config::get_singleton(); - rt->color_internal_format = rt->is_transparent ? GL_RGBA8 : GL_RGB10_A2; - rt->color_format = GL_RGBA; - rt->color_type = rt->is_transparent ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_2_10_10_10_REV; - rt->image_format = Image::FORMAT_RGBA8; + if (rt->hdr) { + rt->color_internal_format = GL_RGBA16F; + rt->color_format = GL_RGBA; + rt->color_type = GL_FLOAT; + rt->color_format_size = 8; + rt->image_format = Image::FORMAT_RGBAF; + } else if (rt->is_transparent) { + rt->color_internal_format = GL_RGBA8; + rt->color_format = GL_RGBA; + rt->color_type = GL_UNSIGNED_BYTE; + rt->color_format_size = 4; + rt->image_format = Image::FORMAT_RGBA8; + } else { + rt->color_internal_format = GL_RGB10_A2; + rt->color_format = GL_RGBA; + rt->color_type = GL_UNSIGNED_INT_2_10_10_10_REV; + rt->color_format_size = 4; + rt->image_format = Image::FORMAT_RGBA8; + } glDisable(GL_SCISSOR_TEST); glColorMask(1, 1, 1, 1); @@ -2023,7 +2038,7 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { texture->gl_set_filter(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST); texture->gl_set_repeat(RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); - GLES3::Utilities::get_singleton()->texture_allocated_data(rt->color, rt->size.x * rt->size.y * rt->view_count * 4, "Render target color texture"); + GLES3::Utilities::get_singleton()->texture_allocated_data(rt->color, rt->size.x * rt->size.y * rt->view_count * rt->color_format_size, "Render target color texture"); } #ifndef IOS_ENABLED if (use_multiview) { @@ -2194,7 +2209,7 @@ void GLES3::TextureStorage::check_backbuffer(RenderTarget *rt, const bool uses_s } else { glTexImage2D(texture_target, 0, rt->color_internal_format, rt->size.x, rt->size.y, 0, rt->color_format, rt->color_type, nullptr); } - GLES3::Utilities::get_singleton()->texture_allocated_data(rt->backbuffer, rt->size.x * rt->size.y * rt->view_count * 4, "Render target backbuffer color texture (3D)"); + GLES3::Utilities::get_singleton()->texture_allocated_data(rt->backbuffer, rt->size.x * rt->size.y * rt->view_count * rt->color_format_size, "Render target backbuffer color texture (3D)"); glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); @@ -2548,6 +2563,54 @@ RS::ViewportMSAA TextureStorage::render_target_get_msaa(RID p_render_target) con return rt->msaa; } +void TextureStorage::render_target_set_use_hdr(RID p_render_target, bool p_use_hdr_2d) { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL(rt); + ERR_FAIL_COND(rt->direct_to_screen); + if (p_use_hdr_2d == rt->hdr) { + return; + } + + _clear_render_target(rt); + rt->hdr = p_use_hdr_2d; + _update_render_target(rt); +} + +bool TextureStorage::render_target_is_using_hdr(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL_V(rt, false); + + return rt->hdr; +} + +GLuint TextureStorage::render_target_get_color_internal_format(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL_V(rt, GL_RGBA8); + + return rt->color_internal_format; +} + +GLuint TextureStorage::render_target_get_color_format(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL_V(rt, GL_RGBA); + + return rt->color_format; +} + +GLuint TextureStorage::render_target_get_color_type(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL_V(rt, GL_UNSIGNED_BYTE); + + return rt->color_type; +} + +uint32_t TextureStorage::render_target_get_color_format_size(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL_V(rt, 4); + + return rt->color_format_size; +} + void TextureStorage::render_target_request_clear(RID p_render_target, const Color &p_clear_color) { RenderTarget *rt = render_target_owner.get_or_null(p_render_target); ERR_FAIL_NULL(rt); diff --git a/drivers/gles3/storage/texture_storage.h b/drivers/gles3/storage/texture_storage.h index 91bb676711..ef310262c7 100644 --- a/drivers/gles3/storage/texture_storage.h +++ b/drivers/gles3/storage/texture_storage.h @@ -347,9 +347,11 @@ struct RenderTarget { GLuint backbuffer = 0; GLuint backbuffer_depth = 0; + bool hdr = false; // For Compatibility this effects both 2D and 3D rendering! GLuint color_internal_format = GL_RGBA8; GLuint color_format = GL_RGBA; GLuint color_type = GL_UNSIGNED_BYTE; + uint32_t color_format_size = 4; Image::Format image_format = Image::FORMAT_RGBA8; GLuint sdf_texture_write = 0; @@ -631,14 +633,19 @@ public: virtual void render_target_set_msaa_needs_resolve(RID p_render_target, bool p_needs_resolve) override {} virtual bool render_target_get_msaa_needs_resolve(RID p_render_target) const override { return false; } virtual void render_target_do_msaa_resolve(RID p_render_target) override {} - virtual void render_target_set_use_hdr(RID p_render_target, bool p_use_hdr_2d) override {} - virtual bool render_target_is_using_hdr(RID p_render_target) const override { return false; } + virtual void render_target_set_use_hdr(RID p_render_target, bool p_use_hdr_2d) override; + virtual bool render_target_is_using_hdr(RID p_render_target) const override; // new void render_target_set_as_unused(RID p_render_target) override { render_target_clear_used(p_render_target); } + GLuint render_target_get_color_internal_format(RID p_render_target) const; + GLuint render_target_get_color_format(RID p_render_target) const; + GLuint render_target_get_color_type(RID p_render_target) const; + uint32_t render_target_get_color_format_size(RID p_render_target) const; + void render_target_request_clear(RID p_render_target, const Color &p_clear_color) override; bool render_target_is_clear_requested(RID p_render_target) override; Color render_target_get_clear_request_color(RID p_render_target) override; diff --git a/drivers/gles3/storage/utilities.cpp b/drivers/gles3/storage/utilities.cpp index 793b3f64f0..c4fbe098cd 100644 --- a/drivers/gles3/storage/utilities.cpp +++ b/drivers/gles3/storage/utilities.cpp @@ -160,6 +160,8 @@ RS::InstanceType Utilities::get_base_type(RID p_rid) const { return RS::INSTANCE_PARTICLES; } else if (GLES3::ParticlesStorage::get_singleton()->owns_particles_collision(p_rid)) { return RS::INSTANCE_PARTICLES_COLLISION; + } else if (owns_visibility_notifier(p_rid)) { + return RS::INSTANCE_VISIBLITY_NOTIFIER; } return RS::INSTANCE_NONE; } @@ -207,6 +209,9 @@ bool Utilities::free(RID p_rid) { } else if (GLES3::MeshStorage::get_singleton()->owns_skeleton(p_rid)) { GLES3::MeshStorage::get_singleton()->skeleton_free(p_rid); return true; + } else if (owns_visibility_notifier(p_rid)) { + visibility_notifier_free(p_rid); + return true; } else { return false; } @@ -233,32 +238,69 @@ void Utilities::base_update_dependency(RID p_base, DependencyTracker *p_instance } else if (ParticlesStorage::get_singleton()->owns_particles_collision(p_base)) { Dependency *dependency = ParticlesStorage::get_singleton()->particles_collision_get_dependency(p_base); p_instance->update_dependency(dependency); + } else if (owns_visibility_notifier(p_base)) { + VisibilityNotifier *vn = get_visibility_notifier(p_base); + p_instance->update_dependency(&vn->dependency); } } /* VISIBILITY NOTIFIER */ RID Utilities::visibility_notifier_allocate() { - return RID(); + return visibility_notifier_owner.allocate_rid(); } void Utilities::visibility_notifier_initialize(RID p_notifier) { + visibility_notifier_owner.initialize_rid(p_notifier, VisibilityNotifier()); } void Utilities::visibility_notifier_free(RID p_notifier) { + VisibilityNotifier *vn = visibility_notifier_owner.get_or_null(p_notifier); + vn->dependency.deleted_notify(p_notifier); + visibility_notifier_owner.free(p_notifier); } void Utilities::visibility_notifier_set_aabb(RID p_notifier, const AABB &p_aabb) { + VisibilityNotifier *vn = visibility_notifier_owner.get_or_null(p_notifier); + ERR_FAIL_NULL(vn); + vn->aabb = p_aabb; + vn->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); } void Utilities::visibility_notifier_set_callbacks(RID p_notifier, const Callable &p_enter_callbable, const Callable &p_exit_callable) { + VisibilityNotifier *vn = visibility_notifier_owner.get_or_null(p_notifier); + ERR_FAIL_NULL(vn); + vn->enter_callback = p_enter_callbable; + vn->exit_callback = p_exit_callable; } AABB Utilities::visibility_notifier_get_aabb(RID p_notifier) const { - return AABB(); + const VisibilityNotifier *vn = visibility_notifier_owner.get_or_null(p_notifier); + ERR_FAIL_NULL_V(vn, AABB()); + return vn->aabb; } void Utilities::visibility_notifier_call(RID p_notifier, bool p_enter, bool p_deferred) { + VisibilityNotifier *vn = visibility_notifier_owner.get_or_null(p_notifier); + ERR_FAIL_NULL(vn); + + if (p_enter) { + if (!vn->enter_callback.is_null()) { + if (p_deferred) { + vn->enter_callback.call_deferred(); + } else { + vn->enter_callback.call(); + } + } + } else { + if (!vn->exit_callback.is_null()) { + if (p_deferred) { + vn->exit_callback.call_deferred(); + } else { + vn->exit_callback.call(); + } + } + } } /* TIMING */ diff --git a/drivers/gles3/storage/utilities.h b/drivers/gles3/storage/utilities.h index b9603b972e..7c3b08717e 100644 --- a/drivers/gles3/storage/utilities.h +++ b/drivers/gles3/storage/utilities.h @@ -39,10 +39,25 @@ namespace GLES3 { +/* VISIBILITY NOTIFIER */ + +struct VisibilityNotifier { + AABB aabb; + Callable enter_callback; + Callable exit_callback; + Dependency dependency; +}; + class Utilities : public RendererUtilities { private: static Utilities *singleton; + /* VISIBILITY NOTIFIER */ + + mutable RID_Owner<VisibilityNotifier> visibility_notifier_owner; + + /* MISC */ + struct ResourceAllocation { #ifdef DEV_ENABLED String name; @@ -149,6 +164,10 @@ public: virtual void base_update_dependency(RID p_base, DependencyTracker *p_instance) override; /* VISIBILITY NOTIFIER */ + + VisibilityNotifier *get_visibility_notifier(RID p_rid) { return visibility_notifier_owner.get_or_null(p_rid); }; + bool owns_visibility_notifier(RID p_rid) const { return visibility_notifier_owner.owns(p_rid); }; + virtual RID visibility_notifier_allocate() override; virtual void visibility_notifier_initialize(RID p_notifier) override; virtual void visibility_notifier_free(RID p_notifier) override; diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp index d1e4d207e7..a35d8bfdde 100644 --- a/drivers/unix/file_access_unix.cpp +++ b/drivers/unix/file_access_unix.cpp @@ -395,7 +395,7 @@ Error FileAccessUnix::_set_unix_permissions(const String &p_file, BitField<FileA } bool FileAccessUnix::_get_hidden_attribute(const String &p_file) { -#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__) +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__) String file = fix_path(p_file); struct stat st = {}; @@ -409,7 +409,7 @@ bool FileAccessUnix::_get_hidden_attribute(const String &p_file) { } Error FileAccessUnix::_set_hidden_attribute(const String &p_file, bool p_hidden) { -#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__) +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__) String file = fix_path(p_file); struct stat st = {}; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index f48e6eb7ed..21cf54b4be 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -1113,12 +1113,12 @@ void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, u } Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t p_frame_count) { - // Frame count is not required for the Vulkan driver, so we just ignore it. - context_device = context_driver->device_get(p_device_index); physical_device = context_driver->physical_device_get(p_device_index); vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties); + frame_count = p_frame_count; + // Copy the queue family properties the context already retrieved. uint32_t queue_family_count = context_driver->queue_family_get_count(p_device_index); queue_family_properties.resize(queue_family_count); @@ -2131,21 +2131,18 @@ RDD::CommandQueueID RenderingDeviceDriverVulkan::command_queue_create(CommandQue return CommandQueueID(command_queue); } -Error RenderingDeviceDriverVulkan::command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) { +Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) { DEV_ASSERT(p_cmd_queue.id != 0); + VkResult err; CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); Queue &device_queue = queue_families[command_queue->queue_family][command_queue->queue_index]; - Fence *fence = (Fence *)(p_signal_fence.id); + Fence *fence = (Fence *)(p_cmd_fence.id); VkFence vk_fence = (fence != nullptr) ? fence->vk_fence : VK_NULL_HANDLE; - thread_local LocalVector<VkCommandBuffer> command_buffers; thread_local LocalVector<VkSemaphore> wait_semaphores; - thread_local LocalVector<VkSemaphore> signal_semaphores; thread_local LocalVector<VkPipelineStageFlags> wait_semaphores_stages; - command_buffers.clear(); wait_semaphores.clear(); - signal_semaphores.clear(); wait_semaphores_stages.clear(); if (!command_queue->pending_semaphores_for_execute.is_empty()) { @@ -2158,117 +2155,142 @@ Error RenderingDeviceDriverVulkan::command_queue_execute(CommandQueueID p_cmd_qu command_queue->pending_semaphores_for_execute.clear(); } - for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { - command_buffers.push_back(VkCommandBuffer(p_cmd_buffers[i].id)); - } - for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { // FIXME: Allow specifying the stage mask in more detail. wait_semaphores.push_back(VkSemaphore(p_wait_semaphores[i].id)); wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); } - for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) { - signal_semaphores.push_back(VkSemaphore(p_signal_semaphores[i].id)); - } - - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.waitSemaphoreCount = wait_semaphores.size(); - submit_info.pWaitSemaphores = wait_semaphores.ptr(); - submit_info.pWaitDstStageMask = wait_semaphores_stages.ptr(); - submit_info.commandBufferCount = command_buffers.size(); - submit_info.pCommandBuffers = command_buffers.ptr(); - submit_info.signalSemaphoreCount = signal_semaphores.size(); - submit_info.pSignalSemaphores = signal_semaphores.ptr(); + if (p_cmd_buffers.size() > 0) { + thread_local LocalVector<VkCommandBuffer> command_buffers; + thread_local LocalVector<VkSemaphore> signal_semaphores; + command_buffers.clear(); + signal_semaphores.clear(); - device_queue.submit_mutex.lock(); - VkResult err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence); - device_queue.submit_mutex.unlock(); - ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); - - if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) { - fence->queue_signaled_from = command_queue; + for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { + command_buffers.push_back(VkCommandBuffer(p_cmd_buffers[i].id)); + } - // Indicate to the fence that it should release the semaphores that were waited on this submission the next time the fence is waited on. - for (uint32_t i = 0; i < command_queue->pending_semaphores_for_fence.size(); i++) { - command_queue->image_semaphores_for_fences.push_back({ fence, command_queue->pending_semaphores_for_fence[i] }); + for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) { + signal_semaphores.push_back(VkSemaphore(p_cmd_semaphores[i].id)); } - command_queue->pending_semaphores_for_fence.clear(); - } + VkSemaphore present_semaphore = VK_NULL_HANDLE; + if (p_swap_chains.size() > 0) { + if (command_queue->present_semaphores.is_empty()) { + // Create the semaphores used for presentation if they haven't been created yet. + VkSemaphore semaphore = VK_NULL_HANDLE; + VkSemaphoreCreateInfo create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + for (uint32_t i = 0; i < frame_count; i++) { + err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); + command_queue->present_semaphores.push_back(semaphore); + } + } - return OK; -} + // If a presentation semaphore is required, cycle across the ones available on the queue. It is technically possible + // and valid to reuse the same semaphore for this particular operation, but we create multiple ones anyway in case + // some hardware expects multiple semaphores to be used. + present_semaphore = command_queue->present_semaphores[command_queue->present_semaphore_index]; + signal_semaphores.push_back(present_semaphore); + command_queue->present_semaphore_index = (command_queue->present_semaphore_index + 1) % command_queue->present_semaphores.size(); + } -Error RenderingDeviceDriverVulkan::command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) { - DEV_ASSERT(p_cmd_queue.id != 0); + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.waitSemaphoreCount = wait_semaphores.size(); + submit_info.pWaitSemaphores = wait_semaphores.ptr(); + submit_info.pWaitDstStageMask = wait_semaphores_stages.ptr(); + submit_info.commandBufferCount = command_buffers.size(); + submit_info.pCommandBuffers = command_buffers.ptr(); + submit_info.signalSemaphoreCount = signal_semaphores.size(); + submit_info.pSignalSemaphores = signal_semaphores.ptr(); + + device_queue.submit_mutex.lock(); + err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence); + device_queue.submit_mutex.unlock(); + ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); + + if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) { + fence->queue_signaled_from = command_queue; + + // Indicate to the fence that it should release the semaphores that were waited on this submission the next time the fence is waited on. + for (uint32_t i = 0; i < command_queue->pending_semaphores_for_fence.size(); i++) { + command_queue->image_semaphores_for_fences.push_back({ fence, command_queue->pending_semaphores_for_fence[i] }); + } - CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); - Queue &device_queue = queue_families[command_queue->queue_family][command_queue->queue_index]; + command_queue->pending_semaphores_for_fence.clear(); + } - thread_local LocalVector<VkSwapchainKHR> swapchains; - thread_local LocalVector<uint32_t> image_indices; - thread_local LocalVector<VkSemaphore> wait_semaphores; - thread_local LocalVector<VkResult> results; - swapchains.clear(); - image_indices.clear(); - for (uint32_t i = 0; i < p_swap_chains.size(); i++) { - SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); - swapchains.push_back(swap_chain->vk_swapchain); - DEV_ASSERT(swap_chain->image_index < swap_chain->images.size()); - image_indices.push_back(swap_chain->image_index); + if (present_semaphore != VK_NULL_HANDLE) { + // If command buffers were executed, swap chains must wait on the present semaphore used by the command queue. + wait_semaphores.clear(); + wait_semaphores.push_back(present_semaphore); + } } - wait_semaphores.clear(); - for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { - wait_semaphores.push_back(VkSemaphore(p_wait_semaphores[i].id)); - } + if (p_swap_chains.size() > 0) { + thread_local LocalVector<VkSwapchainKHR> swapchains; + thread_local LocalVector<uint32_t> image_indices; + thread_local LocalVector<VkResult> results; + swapchains.clear(); + image_indices.clear(); - results.resize(swapchains.size()); - - VkPresentInfoKHR present_info = {}; - present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.waitSemaphoreCount = wait_semaphores.size(); - present_info.pWaitSemaphores = wait_semaphores.ptr(); - present_info.swapchainCount = swapchains.size(); - present_info.pSwapchains = swapchains.ptr(); - present_info.pImageIndices = image_indices.ptr(); - present_info.pResults = results.ptr(); - device_queue.submit_mutex.lock(); - VkResult err = device_functions.QueuePresentKHR(device_queue.queue, &present_info); - device_queue.submit_mutex.unlock(); - - // Set the index to an invalid value. If any of the swap chains returned out of date, indicate it should be resized the next time it's acquired. - bool any_result_is_out_of_date = false; - for (uint32_t i = 0; i < p_swap_chains.size(); i++) { - SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); - swap_chain->image_index = UINT_MAX; - if (results[i] == VK_ERROR_OUT_OF_DATE_KHR) { - context_driver->surface_set_needs_resize(swap_chain->surface, true); - any_result_is_out_of_date = true; + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + swapchains.push_back(swap_chain->vk_swapchain); + DEV_ASSERT(swap_chain->image_index < swap_chain->images.size()); + image_indices.push_back(swap_chain->image_index); } - } - if (any_result_is_out_of_date || err == VK_ERROR_OUT_OF_DATE_KHR) { - // It is possible for presentation to fail with out of date while acquire might've succeeded previously. This case - // will be considered a silent failure as it can be triggered easily by resizing a window in the OS natively. - return FAILED; - } + results.resize(swapchains.size()); + + VkPresentInfoKHR present_info = {}; + present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present_info.waitSemaphoreCount = wait_semaphores.size(); + present_info.pWaitSemaphores = wait_semaphores.ptr(); + present_info.swapchainCount = swapchains.size(); + present_info.pSwapchains = swapchains.ptr(); + present_info.pImageIndices = image_indices.ptr(); + present_info.pResults = results.ptr(); + + device_queue.submit_mutex.lock(); + err = device_functions.QueuePresentKHR(device_queue.queue, &present_info); + device_queue.submit_mutex.unlock(); + + // Set the index to an invalid value. If any of the swap chains returned out of date, indicate it should be resized the next time it's acquired. + bool any_result_is_out_of_date = false; + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + swap_chain->image_index = UINT_MAX; + if (results[i] == VK_ERROR_OUT_OF_DATE_KHR) { + context_driver->surface_set_needs_resize(swap_chain->surface, true); + any_result_is_out_of_date = true; + } + } - // Handling VK_SUBOPTIMAL_KHR the same as VK_SUCCESS is completely intentional. - // - // Godot does not currently support native rotation in Android when creating the swap chain. It intentionally uses - // VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR instead of the current transform bits available in the surface capabilities. - // Choosing the transform that leads to optimal presentation leads to distortion that makes the application unusable, - // as the rotation of all the content is not handled at the moment. - // - // VK_SUBOPTIMAL_KHR is accepted as a successful case even if it's not the most efficient solution to work around this - // problem. This behavior should not be changed unless the swap chain recreation uses the current transform bits, as - // it'll lead to very low performance in Android by entering an endless loop where it'll always resize the swap chain - // every frame. + if (any_result_is_out_of_date || err == VK_ERROR_OUT_OF_DATE_KHR) { + // It is possible for presentation to fail with out of date while acquire might've succeeded previously. This case + // will be considered a silent failure as it can be triggered easily by resizing a window in the OS natively. + return FAILED; + } - ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR, FAILED); + // Handling VK_SUBOPTIMAL_KHR the same as VK_SUCCESS is completely intentional. + // + // Godot does not currently support native rotation in Android when creating the swap chain. It intentionally uses + // VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR instead of the current transform bits available in the surface capabilities. + // Choosing the transform that leads to optimal presentation leads to distortion that makes the application unusable, + // as the rotation of all the content is not handled at the moment. + // + // VK_SUBOPTIMAL_KHR is accepted as a successful case even if it's not the most efficient solution to work around this + // problem. This behavior should not be changed unless the swap chain recreation uses the current transform bits, as + // it'll lead to very low performance in Android by entering an endless loop where it'll always resize the swap chain + // every frame. + + ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR, FAILED); + } return OK; } @@ -2278,6 +2300,11 @@ void RenderingDeviceDriverVulkan::command_queue_free(CommandQueueID p_cmd_queue) CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id); + // Erase all the semaphores used for presentation. + for (VkSemaphore semaphore : command_queue->present_semaphores) { + vkDestroySemaphore(vk_device, semaphore, nullptr); + } + // Erase all the semaphores used for image acquisition. for (VkSemaphore semaphore : command_queue->image_semaphores) { vkDestroySemaphore(vk_device, semaphore, nullptr); diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 4abaeecd11..70c4cebba5 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -115,6 +115,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VkDevice vk_device = VK_NULL_HANDLE; RenderingContextDriverVulkan *context_driver = nullptr; RenderingContextDriver::Device context_device = {}; + uint32_t frame_count = 1; VkPhysicalDevice physical_device = VK_NULL_HANDLE; VkPhysicalDeviceProperties physical_device_properties = {}; VkPhysicalDeviceFeatures physical_device_features = {}; @@ -276,6 +277,7 @@ public: // ----- QUEUE ----- private: struct CommandQueue { + LocalVector<VkSemaphore> present_semaphores; LocalVector<VkSemaphore> image_semaphores; LocalVector<SwapChain *> image_semaphores_swap_chains; LocalVector<uint32_t> pending_semaphores_for_execute; @@ -284,12 +286,12 @@ private: LocalVector<Pair<Fence *, uint32_t>> image_semaphores_for_fences; uint32_t queue_family = 0; uint32_t queue_index = 0; + uint32_t present_semaphore_index = 0; }; public: virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override final; - virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) override final; - virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) override final; + virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) override final; virtual void command_queue_free(CommandQueueID p_cmd_queue) override final; private: diff --git a/drivers/windows/file_access_windows.cpp b/drivers/windows/file_access_windows.cpp index aae06505cd..dd8bceb573 100644 --- a/drivers/windows/file_access_windows.cpp +++ b/drivers/windows/file_access_windows.cpp @@ -47,7 +47,7 @@ #include <wchar.h> #ifdef _MSC_VER -#define S_ISREG(m) ((m)&_S_IFREG) +#define S_ISREG(m) ((m) & _S_IFREG) #endif void FileAccessWindows::check_errors() const { diff --git a/drivers/winmidi/midi_driver_winmidi.cpp b/drivers/winmidi/midi_driver_winmidi.cpp index cdbab489c4..07f0226c5d 100644 --- a/drivers/winmidi/midi_driver_winmidi.cpp +++ b/drivers/winmidi/midi_driver_winmidi.cpp @@ -36,7 +36,7 @@ void MIDIDriverWinMidi::read(HMIDIIN hMidiIn, UINT wMsg, DWORD_PTR dwInstance, DWORD_PTR dwParam1, DWORD_PTR dwParam2) { if (wMsg == MIM_DATA) { - receive_input_packet((uint64_t)dwParam2, (uint8_t *)&dwParam1, 3); + receive_input_packet((int)dwInstance, (uint64_t)dwParam2, (uint8_t *)&dwParam1, 3); } } @@ -44,7 +44,7 @@ Error MIDIDriverWinMidi::open() { for (UINT i = 0; i < midiInGetNumDevs(); i++) { HMIDIIN midi_in; - MMRESULT res = midiInOpen(&midi_in, i, (DWORD_PTR)read, (DWORD_PTR)this, CALLBACK_FUNCTION); + MMRESULT res = midiInOpen(&midi_in, i, (DWORD_PTR)read, (DWORD_PTR)i, CALLBACK_FUNCTION); if (res == MMSYSERR_NOERROR) { midiInStart(midi_in); connected_sources.insert(i, midi_in); |