summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/d3d12/rendering_device_driver_d3d12.cpp922
-rw-r--r--drivers/d3d12/rendering_device_driver_d3d12.h31
-rw-r--r--drivers/vulkan/rendering_device_driver_vulkan.cpp121
-rw-r--r--drivers/vulkan/rendering_device_driver_vulkan.h1
-rw-r--r--servers/rendering/rendering_device.cpp354
-rw-r--r--servers/rendering/rendering_device.h39
-rw-r--r--servers/rendering/rendering_device_driver.cpp2
-rw-r--r--servers/rendering/rendering_device_driver.h26
-rw-r--r--servers/rendering/rendering_device_graph.cpp141
-rw-r--r--servers/rendering/rendering_device_graph.h26
10 files changed, 1207 insertions, 456 deletions
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp
index 9407826ebf..08ee12991a 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.cpp
+++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp
@@ -538,15 +538,6 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso
#endif
ResourceInfo::States *res_states = p_resource->states_ptr;
-
- if (p_new_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
- if (unlikely(!res_states->xfamily_fallback.subresources_dirty.is_empty())) {
- uint32_t subres_qword = p_subresource / 64;
- uint64_t subres_mask = (uint64_t(1) << (p_subresource % 64));
- res_states->xfamily_fallback.subresources_dirty[subres_qword] |= subres_mask;
- }
- }
-
D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource];
// Transitions can be considered redundant if the current state has all the bits of the new state.
@@ -869,7 +860,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
// but also if you give a rounded size at that point because it will extend beyond the
// memory of the resource. Therefore, it seems the only way is to create it with a
// rounded size.
- CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
+ CD3DX12_RESOURCE_DESC1 resource_desc = CD3DX12_RESOURCE_DESC1::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
if (p_usage.has_flag(RDD::BUFFER_USAGE_STORAGE_BIT)) {
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
} else {
@@ -878,7 +869,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
- D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
switch (p_allocation_type) {
case MEMORY_ALLOCATION_TYPE_CPU: {
bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT);
@@ -886,7 +876,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
if (is_src && !is_dst) {
// Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM.
allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
- initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
}
if (is_dst && !is_src) {
// Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads.
@@ -904,13 +893,27 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
ComPtr<ID3D12Resource> buffer;
ComPtr<D3D12MA::Allocation> allocation;
- HRESULT res = allocator->CreateResource(
- &allocation_desc,
- &resource_desc,
- initial_state,
- nullptr,
- allocation.GetAddressOf(),
- IID_PPV_ARGS(buffer.GetAddressOf()));
+ HRESULT res;
+ if (barrier_capabilities.enhanced_barriers_supported) {
+ res = allocator->CreateResource3(
+ &allocation_desc,
+ &resource_desc,
+ D3D12_BARRIER_LAYOUT_UNDEFINED,
+ nullptr,
+ 0,
+ nullptr,
+ allocation.GetAddressOf(),
+ IID_PPV_ARGS(buffer.GetAddressOf()));
+ } else {
+ res = allocator->CreateResource(
+ &allocation_desc,
+ reinterpret_cast<const D3D12_RESOURCE_DESC *>(&resource_desc),
+ D3D12_RESOURCE_STATE_COMMON,
+ nullptr,
+ allocation.GetAddressOf(),
+ IID_PPV_ARGS(buffer.GetAddressOf()));
+ }
+
ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + vformat("0x%08ux", (uint64_t)res) + ".");
// Bookkeep.
@@ -919,11 +922,10 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
buf_info->resource = buffer.Get();
buf_info->owner_info.resource = buffer;
buf_info->owner_info.allocation = allocation;
- buf_info->owner_info.states.subresource_states.push_back(initial_state);
+ buf_info->owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_COMMON);
buf_info->states_ptr = &buf_info->owner_info.states;
buf_info->size = p_size;
buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
- buf_info->flags.is_for_upload = allocation_desc.HeapType == D3D12_HEAP_TYPE_UPLOAD;
return BufferID(buf_info);
}
@@ -1052,8 +1054,7 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, BitFi
if (p_aspect_bits.has_flag(TEXTURE_ASPECT_DEPTH_BIT)) {
DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX);
aspect = TEXTURE_ASPECT_DEPTH;
- }
- if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) {
+ } else if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) {
DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX);
aspect = TEXTURE_ASPECT_STENCIL;
}
@@ -1080,6 +1081,10 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, Textu
}
}
+UINT RenderingDeviceDriverD3D12::_compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset) {
+ return D3D12CalcSubresource(p_layers.mipmap, p_layers.base_layer + p_layer_offset, _compute_plane_slice(p_texture->format, p_layers.aspect), p_texture->desc.MipLevels, p_texture->desc.ArraySize());
+}
+
void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info) {
uint32_t planes = 1;
if ((p_tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
@@ -1117,6 +1122,64 @@ void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo
}
}
+bool RenderingDeviceDriverD3D12::_unordered_access_supported_by_format(DataFormat p_format) {
+ switch (p_format) {
+ case DATA_FORMAT_R4G4_UNORM_PACK8:
+ case DATA_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case DATA_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case DATA_FORMAT_R5G6B5_UNORM_PACK16:
+ case DATA_FORMAT_B5G6R5_UNORM_PACK16:
+ case DATA_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case DATA_FORMAT_B5G5R5A1_UNORM_PACK16:
+ case DATA_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case DATA_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SNORM_PACK32:
+ case DATA_FORMAT_A8B8G8R8_USCALED_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SSCALED_PACK32:
+ case DATA_FORMAT_A8B8G8R8_UINT_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SINT_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SRGB_PACK32:
+ case DATA_FORMAT_A2R10G10B10_UNORM_PACK32:
+ case DATA_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case DATA_FORMAT_A2R10G10B10_USCALED_PACK32:
+ case DATA_FORMAT_A2R10G10B10_SSCALED_PACK32:
+ case DATA_FORMAT_A2R10G10B10_UINT_PACK32:
+ case DATA_FORMAT_A2R10G10B10_SINT_PACK32:
+ case DATA_FORMAT_A2B10G10R10_UNORM_PACK32:
+ case DATA_FORMAT_A2B10G10R10_SNORM_PACK32:
+ case DATA_FORMAT_A2B10G10R10_USCALED_PACK32:
+ case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32:
+ case DATA_FORMAT_A2B10G10R10_UINT_PACK32:
+ case DATA_FORMAT_A2B10G10R10_SINT_PACK32:
+ case DATA_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ case DATA_FORMAT_X8_D24_UNORM_PACK32:
+ case DATA_FORMAT_R10X6_UNORM_PACK16:
+ case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16:
+ case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
+ case DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
+ case DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
+ case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+ case DATA_FORMAT_R12X4_UNORM_PACK16:
+ case DATA_FORMAT_R12X4G12X4_UNORM_2PACK16:
+ case DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
+ case DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
+ case DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
+ case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+ return false;
+ default:
+ return true;
+ }
+}
+
RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p_format, const TextureView &p_view) {
// Using D3D12_RESOURCE_DESC1. Thanks to the layout, it's sliceable down to D3D12_RESOURCE_DESC if needed.
CD3DX12_RESOURCE_DESC1 resource_desc = {};
@@ -1137,12 +1200,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
resource_desc.Format = RD_TO_D3D12_FORMAT[p_format.format].family;
// If views of different families are wanted, special setup is needed for proper sharing among them.
- // Two options here:
- // 1. If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA).
- // 2. Otherwise, fall back to an approach based on having multiple versions of the resource and copying as needed. [[CROSS_FAMILY_FALLBACK]]
+ // If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA).
if (p_format.shareable_formats.size() && format_capabilities.relaxed_casting_supported) {
relaxed_casting_available = true;
- relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size());
+ relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size() + 1);
relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format;
relaxed_casting_format_count++;
}
@@ -1156,9 +1217,9 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
if (RD_TO_D3D12_FORMAT[curr_format].family != RD_TO_D3D12_FORMAT[p_format.format].family) {
cross_family_sharing = true;
- if (!relaxed_casting_available) {
- break;
- }
+ }
+
+ if (relaxed_casting_available) {
relaxed_casting_formats[relaxed_casting_format_count] = RD_TO_D3D12_FORMAT[curr_format].general_format;
relaxed_casting_format_count++;
}
@@ -1185,7 +1246,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
if ((p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
} else {
- if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT)) {
+ if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT) && _unordered_access_supported_by_format(p_format.format)) {
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // For clearing via UAV.
}
}
@@ -1242,17 +1303,19 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
D3D12_CLEAR_VALUE *clear_value_ptr = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : nullptr;
{
HRESULT res = E_FAIL;
- if (cross_family_sharing && relaxed_casting_available) {
+ if (barrier_capabilities.enhanced_barriers_supported || (cross_family_sharing && relaxed_casting_available)) {
+ // Create with undefined layout if enhanced barriers are supported. Leave as common otherwise for interop with legacy barriers.
+ D3D12_BARRIER_LAYOUT initial_layout = barrier_capabilities.enhanced_barriers_supported ? D3D12_BARRIER_LAYOUT_UNDEFINED : D3D12_BARRIER_LAYOUT_COMMON;
res = allocator->CreateResource3(
&allocation_desc,
&resource_desc,
- D3D12_BARRIER_LAYOUT_COMMON, // Needed for barrier interop.
+ initial_layout,
clear_value_ptr,
relaxed_casting_format_count,
relaxed_casting_formats,
allocation.GetAddressOf(),
IID_PPV_ARGS(main_texture.GetAddressOf()));
- initial_state = D3D12_RESOURCE_STATE_COMMON; // Needed for barrier interop.
+ initial_state = D3D12_RESOURCE_STATE_COMMON;
} else {
res = allocator->CreateResource(
&allocation_desc,
@@ -1353,7 +1416,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
tex_info->mipmaps = resource_desc.MipLevels;
tex_info->view_descs.srv = srv_desc;
tex_info->view_descs.uav = uav_desc;
- if ((p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) {
+
+ if (!barrier_capabilities.enhanced_barriers_supported && (p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) {
+ // Fallback to clear resources when they're first used in a uniform set. Not necessary if enhanced barriers
+ // are supported, as the discard flag will be used instead when transitioning from an undefined layout.
textures_pending_clear.add(&tex_info->pending_clear);
}
@@ -1380,45 +1446,8 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex
ComPtr<ID3D12Resource> new_texture;
ComPtr<D3D12MA::Allocation> new_allocation;
- ID3D12Resource *resource = nullptr;
+ ID3D12Resource *resource = owner_tex_info->resource;
CD3DX12_RESOURCE_DESC new_tex_resource_desc = owner_tex_info->desc;
- bool cross_family = RD_TO_D3D12_FORMAT[p_view.format].family != RD_TO_D3D12_FORMAT[owner_tex_info->format].family;
- if (cross_family && !format_capabilities.relaxed_casting_supported) {
- // [[CROSS_FAMILY_FALLBACK]].
- // We have to create a new texture of the alternative format.
-
- D3D12MA::ALLOCATION_DESC allocation_desc = {};
- allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
- allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
-
- if (p_slice_type != -1) {
-#ifdef DEV_ENABLED
- // Actual slicing is not contemplated. If ever needed, let's at least realize.
- if (p_slice_type != -1) {
- uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers;
- uint32_t slice_subresorce_count = p_mipmaps * p_layers;
- DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count);
- }
-#endif
- new_tex_resource_desc.DepthOrArraySize = p_layers;
- new_tex_resource_desc.MipLevels = p_mipmaps;
- }
- new_tex_resource_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].family;
- new_tex_resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; // Alternative formats can only be used as SRVs.
-
- HRESULT res = allocator->CreateResource(
- &allocation_desc,
- &new_tex_resource_desc,
- D3D12_RESOURCE_STATE_COPY_DEST,
- nullptr,
- new_allocation.GetAddressOf(),
- IID_PPV_ARGS(new_texture.GetAddressOf()));
- ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), vformat("D3D12MA::CreateResource failed with error 0x%08ux.", (uint64_t)res));
-
- resource = new_texture.Get();
- } else {
- resource = owner_tex_info->resource;
- }
// Describe views.
@@ -1528,58 +1557,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex
TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);
tex_info->resource = resource;
- if (new_texture.Get()) {
- // [[CROSS_FAMILY_FALLBACK]].
-
- DEV_ASSERT(cross_family && !format_capabilities.relaxed_casting_supported);
-
- uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers;
-#ifdef DEV_ENABLED
- // Actual slicing is not contemplated. If ever needed, let's at least realize.
- if (p_slice_type != -1) {
- uint32_t slice_subresorce_count = p_mipmaps * p_layers;
- DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count);
- }
-#endif
-
- tex_info->owner_info.resource = new_texture;
- tex_info->owner_info.allocation = new_allocation;
- tex_info->owner_info.states.subresource_states.resize(new_texture_subresorce_count);
- for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) {
- tex_info->owner_info.states.subresource_states[i] = D3D12_RESOURCE_STATE_COPY_DEST;
- }
- tex_info->states_ptr = &tex_info->owner_info.states;
-
- ResourceInfo::States::CrossFamillyFallback &xfamily = owner_tex_info->owner_info.states.xfamily_fallback;
- if (xfamily.subresources_dirty.is_empty()) {
- uint32_t items_required = STEPIFY(new_texture_subresorce_count, sizeof(uint64_t)) / sizeof(uint64_t);
- xfamily.subresources_dirty.resize(items_required);
- memset(xfamily.subresources_dirty.ptr(), 255, sizeof(uint64_t) * xfamily.subresources_dirty.size());
-
- // Create buffer for non-direct copy if it's a format not supporting reinterpret-copy.
- DEV_ASSERT(!xfamily.interim_buffer.Get());
- if (owner_tex_info->format == DATA_FORMAT_R16_UINT && p_view.format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) {
- uint32_t row_pitch = STEPIFY(owner_tex_info->desc.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
- uint32_t buffer_size = sizeof(uint16_t) * row_pitch * owner_tex_info->desc.Height * owner_tex_info->desc.Depth();
- CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(buffer_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
- resource_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
-
- D3D12MA::ALLOCATION_DESC allocation_desc = {};
- allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
-
- HRESULT res = allocator->CreateResource(
- &allocation_desc,
- &resource_desc,
- D3D12_RESOURCE_STATE_COPY_SOURCE, // Makes the code that makes the copy easier.
- nullptr,
- xfamily.interim_buffer_alloc.GetAddressOf(),
- IID_PPV_ARGS(xfamily.interim_buffer.GetAddressOf()));
- ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
- }
- }
- } else {
- tex_info->states_ptr = owner_tex_info->states_ptr;
- }
+ tex_info->states_ptr = owner_tex_info->states_ptr;
tex_info->format = p_view.format;
tex_info->desc = new_tex_resource_desc;
if (p_slice_type == -1) {
@@ -1710,6 +1688,28 @@ BitField<RDD::TextureUsageBits> RenderingDeviceDriverD3D12::texture_get_usages_s
return supported;
}
+bool RenderingDeviceDriverD3D12::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {
+ r_raw_reinterpretation = false;
+
+ if (format_capabilities.relaxed_casting_supported) {
+ // Relaxed casting is supported, there should be no need to check for format family compatibility.
+ return true;
+ } else {
+ TextureInfo *tex_info = (TextureInfo *)p_texture.id;
+ if (tex_info->format == DATA_FORMAT_R16_UINT && p_format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) {
+ // Specific cases that require buffer reinterpretation.
+ r_raw_reinterpretation = true;
+ return false;
+ } else if (RD_TO_D3D12_FORMAT[tex_info->format].family != RD_TO_D3D12_FORMAT[p_format].family) {
+ // Format family is different but copying resources directly is possible.
+ return false;
+ } else {
+ // Format family is the same and the view can just cast the format.
+ return true;
+ }
+ }
+}
+
/*****************/
/**** SAMPLER ****/
/*****************/
@@ -1842,20 +1842,328 @@ void RenderingDeviceDriverD3D12::vertex_format_free(VertexFormatID p_vertex_form
/**** BARRIERS ****/
/******************/
-void RenderingDeviceDriverD3D12::command_pipeline_barrier(
- CommandBufferID p_cmd_buffer,
- BitField<RDD::PipelineStageBits> p_src_stages,
- BitField<RDD::PipelineStageBits> p_dst_stages,
+static D3D12_BARRIER_ACCESS _rd_texture_layout_access_mask(RDD::TextureLayout p_texture_layout) {
+ switch (p_texture_layout) {
+ case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_RENDER_TARGET;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
+ case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_COPY_SOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_COPY_DEST;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_RESOLVE_DEST;
+ case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
+ default:
+ return D3D12_BARRIER_ACCESS_NO_ACCESS;
+ }
+}
+
+static void _rd_access_to_d3d12_and_mask(BitField<RDD::BarrierAccessBits> p_access, RDD::TextureLayout p_texture_layout, D3D12_BARRIER_ACCESS &r_access, D3D12_BARRIER_SYNC &r_sync_mask) {
+ r_access = D3D12_BARRIER_ACCESS_COMMON;
+ r_sync_mask = D3D12_BARRIER_SYNC_NONE;
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT;
+ r_sync_mask |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_INDEX_BUFFER;
+ r_sync_mask |= D3D12_BARRIER_SYNC_INDEX_INPUT | D3D12_BARRIER_SYNC_DRAW;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER;
+ r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_UNIFORM_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER;
+ r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING |
+ D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_COPY_SOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_COPY;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_COPY_DEST;
+ r_sync_mask |= D3D12_BARRIER_SYNC_COPY;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RESOLVE_DEST;
+ r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+
+ const D3D12_BARRIER_SYNC unordered_access_mask = D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING |
+ D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ r_sync_mask |= unordered_access_mask;
+ }
+
+ // These access bits only have compatibility with certain layouts unlike in Vulkan where they imply specific operations in the same layout.
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ r_sync_mask |= unordered_access_mask;
+ } else if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT)) {
+ if (p_texture_layout == RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL) {
+ // Unordered access must be enforced if the texture is using the storage layout.
+ r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ r_sync_mask |= unordered_access_mask;
+ } else {
+ r_access |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL;
+ } else if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL;
+ }
+}
+
+static void _rd_stages_to_d3d12(BitField<RDD::PipelineStageBits> p_stages, D3D12_BARRIER_SYNC &r_sync) {
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+ r_sync = D3D12_BARRIER_SYNC_ALL;
+ } else {
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_INDEX_INPUT;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
+ // There's no granularity for tessellation or geometry stages. The specification defines it as part of vertex shading.
+ r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT)) {
+ // Covers both read and write operations for depth stencil.
+ r_sync |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_RENDER_TARGET;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_COPY;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_RESOLVE;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_DRAW;
+ }
+ }
+}
+
+static void _rd_stages_and_access_to_d3d12(BitField<RDD::PipelineStageBits> p_stages, RDD::TextureLayout p_texture_layout, BitField<RDD::BarrierAccessBits> p_access, D3D12_BARRIER_SYNC &r_sync, D3D12_BARRIER_ACCESS &r_access) {
+ D3D12_BARRIER_SYNC sync_mask;
+ r_sync = D3D12_BARRIER_SYNC_NONE;
+
+ if (p_texture_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) {
+ // Undefined texture layouts are a special case where no access bits or synchronization scopes are allowed.
+ r_access = D3D12_BARRIER_ACCESS_NO_ACCESS;
+ return;
+ }
+
+ // Convert access bits to the D3D12 barrier access bits.
+ _rd_access_to_d3d12_and_mask(p_access, p_texture_layout, r_access, sync_mask);
+
+ if (p_texture_layout != RDD::TEXTURE_LAYOUT_MAX) {
+ // Only allow the access bits compatible with the texture layout.
+ r_access &= _rd_texture_layout_access_mask(p_texture_layout);
+ }
+
+ // Convert stage bits to the D3D12 synchronization scope bits.
+ _rd_stages_to_d3d12(p_stages, r_sync);
+
+ // Only enable synchronization stages compatible with the access bits that were used.
+ r_sync &= sync_mask;
+
+ if (r_sync == D3D12_BARRIER_SYNC_NONE) {
+ if (p_access.is_empty()) {
+ // No valid synchronization scope was defined and no access in particular is required.
+ r_access = D3D12_BARRIER_ACCESS_NO_ACCESS;
+ } else {
+ // Access is required but the synchronization scope wasn't compatible. We fall back to the global synchronization scope and access.
+ r_sync = D3D12_BARRIER_SYNC_ALL;
+ r_access = D3D12_BARRIER_ACCESS_COMMON;
+ }
+ }
+}
+
+static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::TextureLayout p_texture_layout) {
+ switch (p_texture_layout) {
+ case RDD::TEXTURE_LAYOUT_UNDEFINED:
+ return D3D12_BARRIER_LAYOUT_UNDEFINED;
+ case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS;
+ case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_RENDER_TARGET;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ;
+ case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_SHADER_RESOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_COPY_SOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_COPY_DEST;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_RESOLVE_DEST;
+ case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE;
+ default:
+ DEV_ASSERT(false && "Unknown texture layout.");
+ return D3D12_BARRIER_LAYOUT_UNDEFINED;
+ }
+}
+
+void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_buffer,
+ BitField<PipelineStageBits> p_src_stages,
+ BitField<PipelineStageBits> p_dst_stages,
VectorView<RDD::MemoryBarrier> p_memory_barriers,
VectorView<RDD::BufferBarrier> p_buffer_barriers,
VectorView<RDD::TextureBarrier> p_texture_barriers) {
- if (p_src_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT) && p_dst_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
- // Looks like the intent is a full barrier.
- // In the resource barriers world, we can force a full barrier by discarding some resource, as per
- // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#synchronous-copy-discard-and-resolve.
- const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
- cmd_buf_info->cmd_list->DiscardResource(frames[frame_idx].aux_resource->GetResource(), nullptr);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ // Enhanced barriers are a requirement for this function.
+ return;
+ }
+
+ if (p_memory_barriers.size() == 0 && p_buffer_barriers.size() == 0 && p_texture_barriers.size() == 0) {
+ // At least one barrier must be present in the arguments.
+ return;
}
+
+ // The command list must support the required interface.
+ const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffer.id);
+ ID3D12GraphicsCommandList7 *cmd_list_7 = nullptr;
+ HRESULT res = cmd_buf_info->cmd_list->QueryInterface(IID_PPV_ARGS(&cmd_list_7));
+ ERR_FAIL_COND(FAILED(res));
+
+ // Convert the RDD barriers to D3D12 enhanced barriers.
+ thread_local LocalVector<D3D12_GLOBAL_BARRIER> global_barriers;
+ thread_local LocalVector<D3D12_BUFFER_BARRIER> buffer_barriers;
+ thread_local LocalVector<D3D12_TEXTURE_BARRIER> texture_barriers;
+ global_barriers.clear();
+ buffer_barriers.clear();
+ texture_barriers.clear();
+
+ D3D12_GLOBAL_BARRIER global_barrier = {};
+ for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {
+ const MemoryBarrier &memory_barrier = p_memory_barriers[i];
+ _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.src_access, global_barrier.SyncBefore, global_barrier.AccessBefore);
+ _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.dst_access, global_barrier.SyncAfter, global_barrier.AccessAfter);
+ global_barriers.push_back(global_barrier);
+ }
+
+ D3D12_BUFFER_BARRIER buffer_barrier_d3d12 = {};
+ buffer_barrier_d3d12.Offset = 0;
+ buffer_barrier_d3d12.Size = UINT64_MAX; // The specification says this must be the size of the buffer barrier.
+ for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) {
+ const BufferBarrier &buffer_barrier_rd = p_buffer_barriers[i];
+ const BufferInfo *buffer_info = (const BufferInfo *)(buffer_barrier_rd.buffer.id);
+ _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.src_access, buffer_barrier_d3d12.SyncBefore, buffer_barrier_d3d12.AccessBefore);
+ _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.dst_access, buffer_barrier_d3d12.SyncAfter, buffer_barrier_d3d12.AccessAfter);
+ buffer_barrier_d3d12.pResource = buffer_info->resource;
+ buffer_barriers.push_back(buffer_barrier_d3d12);
+ }
+
+ D3D12_TEXTURE_BARRIER texture_barrier_d3d12 = {};
+ for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {
+ const TextureBarrier &texture_barrier_rd = p_texture_barriers[i];
+ const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id);
+ _rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore);
+ _rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter);
+ texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout);
+ texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout);
+ texture_barrier_d3d12.pResource = texture_info->resource;
+ texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap;
+ texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count;
+ texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer;
+ texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count;
+ texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect);
+ texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format);
+ texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE;
+ texture_barriers.push_back(texture_barrier_d3d12);
+ }
+
+ // Define the barrier groups and execute.
+ D3D12_BARRIER_GROUP barrier_groups[3] = {};
+ barrier_groups[0].Type = D3D12_BARRIER_TYPE_GLOBAL;
+ barrier_groups[1].Type = D3D12_BARRIER_TYPE_BUFFER;
+ barrier_groups[2].Type = D3D12_BARRIER_TYPE_TEXTURE;
+ barrier_groups[0].NumBarriers = global_barriers.size();
+ barrier_groups[1].NumBarriers = buffer_barriers.size();
+ barrier_groups[2].NumBarriers = texture_barriers.size();
+ barrier_groups[0].pGlobalBarriers = global_barriers.ptr();
+ barrier_groups[1].pBufferBarriers = buffer_barriers.ptr();
+ barrier_groups[2].pTextureBarriers = texture_barriers.ptr();
+ cmd_list_7->Barrier(ARRAY_SIZE(barrier_groups), barrier_groups);
}
/****************/
@@ -3464,6 +3772,8 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect
zstd_size = STEPIFY(zstd_size, 4);
read_offset += zstd_size;
ERR_FAIL_COND_V(read_offset > binsize, ShaderID());
+
+ r_shader_desc.stages.push_back(ShaderStage(stage));
}
const uint8_t *root_sig_data_ptr = binptr + read_offset;
@@ -3795,6 +4105,10 @@ void RenderingDeviceDriverD3D12::uniform_set_free(UniformSetID p_uniform_set) {
// ----- COMMANDS -----
void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
+ if (barrier_capabilities.enhanced_barriers_supported) {
+ return;
+ }
+
// Perform pending blackouts.
{
SelfList<TextureInfo> *E = textures_pending_clear.first();
@@ -3802,7 +4116,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff
TextureSubresourceRange subresources;
subresources.layer_count = E->self()->layers;
subresources.mipmap_count = E->self()->mipmaps;
- command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_GENERAL, Color(), subresources);
+ command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_UNDEFINED, Color(), subresources);
SelfList<TextureInfo> *next = E->next();
E->remove_from_list();
@@ -3935,34 +4249,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff
for (uint32_t i = 0; i < tex_info->layers; i++) {
for (uint32_t j = 0; j < tex_info->mipmaps; j++) {
uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize());
-
- if ((wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) {
- // [[CROSS_FAMILY_FALLBACK]].
- if (tex_info->owner_info.resource && tex_info->main_texture && tex_info->main_texture != tex_info) {
- uint32_t subres_qword = subresource / 64;
- uint64_t subres_mask = (uint64_t(1) << (subresource % 64));
- if ((tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] & subres_mask)) {
- // Prepare for copying the write-to texture to this one, if out-of-date.
- _resource_transition_batch(tex_info->main_texture, subresource, planes, D3D12_RESOURCE_STATE_COPY_SOURCE);
- _resource_transition_batch(tex_info, subresource, planes, D3D12_RESOURCE_STATE_COPY_DEST);
-
- CommandBufferInfo::FamilyFallbackCopy ffc;
- ffc.texture = tex_info;
- ffc.subresource = subresource;
- ffc.mipmap = j;
- ffc.dst_wanted_state = wanted_state;
-
- CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
- cmd_buf_info->family_fallback_copies.resize(cmd_buf_info->family_fallback_copies.size() + 1);
- cmd_buf_info->family_fallback_copies[cmd_buf_info->family_fallback_copy_count] = ffc;
- cmd_buf_info->family_fallback_copy_count++;
-
- tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] &= ~subres_mask;
- }
- continue;
- }
- }
-
_resource_transition_batch(tex_info, subresource, planes, wanted_state);
}
}
@@ -3974,55 +4260,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff
if (p_set_index == shader_info_in->sets.size() - 1) {
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
_resource_transitions_flush(cmd_buf_info->cmd_list.Get());
-
- // [[CROSS_FAMILY_FALLBACK]].
- for (uint32_t i = 0; i < cmd_buf_info->family_fallback_copy_count; i++) {
- const CommandBufferInfo::FamilyFallbackCopy &ffc = cmd_buf_info->family_fallback_copies[i];
-
- D3D12_TEXTURE_COPY_LOCATION dst_tex = {};
- dst_tex.pResource = ffc.texture->resource;
- dst_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
- dst_tex.SubresourceIndex = ffc.subresource;
-
- D3D12_TEXTURE_COPY_LOCATION src_tex = {};
- src_tex.pResource = ffc.texture->main_texture->resource;
- src_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
- src_tex.SubresourceIndex = ffc.subresource;
-
- const ResourceInfo::States::CrossFamillyFallback &xfamily = ffc.texture->main_texture->owner_info.states.xfamily_fallback;
- if (xfamily.interim_buffer.Get()) {
- // Must copy via a buffer due to reinterpret-copy known not to be available for these data types.
- D3D12_TEXTURE_COPY_LOCATION buf_loc = {};
- buf_loc.pResource = xfamily.interim_buffer.Get();
- buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
- buf_loc.PlacedFootprint.Offset = 0;
- buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->main_texture->desc.Format;
- buf_loc.PlacedFootprint.Footprint.Width = MAX(1u, ffc.texture->main_texture->desc.Width >> ffc.mipmap);
- buf_loc.PlacedFootprint.Footprint.Height = MAX(1u, ffc.texture->main_texture->desc.Height >> ffc.mipmap);
- buf_loc.PlacedFootprint.Footprint.Depth = MAX(1u, (uint32_t)ffc.texture->main_texture->desc.Depth() >> ffc.mipmap);
- buf_loc.PlacedFootprint.Footprint.RowPitch = STEPIFY(buf_loc.PlacedFootprint.Footprint.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-
- D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST);
- cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier);
-
- cmd_buf_info->cmd_list->CopyTextureRegion(&buf_loc, 0, 0, 0, &src_tex, nullptr);
-
- barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
- cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier);
-
- buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->desc.Format;
- cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &buf_loc, nullptr);
- } else {
- // Direct copy is possible.
- cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &src_tex, nullptr);
- }
-
- // Set the specific SRV state we wanted from the beginning to the alternative version of the texture.
- _resource_transition_batch(ffc.texture, ffc.subresource, 1, ffc.dst_wanted_state);
- }
- cmd_buf_info->family_fallback_copy_count = 0;
-
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
}
}
@@ -4299,8 +4536,10 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff
}
}
- _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
@@ -4340,9 +4579,11 @@ void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffe
BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id;
BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id;
- _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
- _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
+ _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
for (uint32_t i = 0; i < p_regions.size(); i++) {
cmd_buf_info->cmd_list->CopyBufferRegion(buf_loc_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size);
@@ -4354,43 +4595,37 @@ void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buff
TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id;
TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id;
- for (uint32_t i = 0; i < p_regions.size(); i++) {
- UINT src_subresource = D3D12CalcSubresource(
- p_regions[i].src_subresources.mipmap,
- p_regions[i].src_subresources.base_layer,
- _compute_plane_slice(src_tex_info->format, p_regions[i].src_subresources.aspect),
- src_tex_info->desc.MipLevels,
- src_tex_info->desc.ArraySize());
- _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
-
- UINT dst_subresource = D3D12CalcSubresource(
- p_regions[i].dst_subresources.mipmap,
- p_regions[i].dst_subresources.base_layer,
- _compute_plane_slice(dst_tex_info->format, p_regions[i].dst_subresources.aspect),
- dst_tex_info->desc.MipLevels,
- dst_tex_info->desc.ArraySize());
- _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ // Batch all barrier transitions for the textures before performing the copies.
+ for (uint32_t i = 0; i < p_regions.size(); i++) {
+ uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count);
+ for (uint32_t j = 0; j < layer_count; j++) {
+ UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j);
+ UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j);
+ _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
+ _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ }
+ }
_resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
- CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource);
- CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource);
-
- CD3DX12_BOX src_box(
- p_regions[i].src_offset.x,
- p_regions[i].src_offset.y,
- p_regions[i].src_offset.z,
- p_regions[i].src_offset.x + p_regions[i].size.x,
- p_regions[i].src_offset.y + p_regions[i].size.y,
- p_regions[i].src_offset.z + p_regions[i].size.z);
-
- cmd_buf_info->cmd_list->CopyTextureRegion(
- &dst_location,
- p_regions[i].dst_offset.x,
- p_regions[i].dst_offset.y,
- p_regions[i].dst_offset.z,
- &src_location,
- &src_box);
+ CD3DX12_BOX src_box;
+ for (uint32_t i = 0; i < p_regions.size(); i++) {
+ uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count);
+ for (uint32_t j = 0; j < layer_count; j++) {
+ UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j);
+ UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j);
+ CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource);
+ CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource);
+ src_box.left = p_regions[i].src_offset.x;
+ src_box.top = p_regions[i].src_offset.y;
+ src_box.front = p_regions[i].src_offset.z;
+ src_box.right = p_regions[i].src_offset.x + p_regions[i].size.x;
+ src_box.bottom = p_regions[i].src_offset.y + p_regions[i].size.y;
+ src_box.back = p_regions[i].src_offset.z + p_regions[i].size.z;
+ cmd_buf_info->cmd_list->CopyTextureRegion(&dst_location, p_regions[i].dst_offset.x, p_regions[i].dst_offset.y, p_regions[i].dst_offset.z, &src_location, &src_box);
+ }
}
}
@@ -4400,12 +4635,12 @@ void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_b
TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id;
UINT src_subresource = D3D12CalcSubresource(p_src_mipmap, p_src_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize());
- _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
-
UINT dst_subresource = D3D12CalcSubresource(p_dst_mipmap, p_dst_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize());
- _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
-
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
+ _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
cmd_buf_info->cmd_list->ResolveSubresource(dst_tex_info->resource, dst_subresource, src_tex_info->resource, src_subresource, RD_TO_D3D12_FORMAT[src_tex_info->format].general_format);
}
@@ -4446,7 +4681,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
}
}
- _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET);
+ }
for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) {
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false);
@@ -4464,7 +4701,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
frames[frame_idx].desc_heap_walkers.rtv.advance();
}
- } else {
+ } else if (tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) {
// Clear via UAV.
_command_check_descriptor_sets(p_cmd_buffer);
@@ -4489,7 +4726,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
}
}
- _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+ }
for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) {
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = _make_ranged_uav_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false);
@@ -4510,6 +4749,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
(UINT)p_color.get_b8(),
(UINT)p_color.get_a8(),
};
+
cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint(
frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(),
frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(),
@@ -4521,6 +4761,8 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
frames[frame_idx].desc_heap_walkers.resources.advance();
frames[frame_idx].desc_heap_walkers.aux.advance();
}
+ } else {
+ ERR_FAIL_MSG("Cannot clear texture because its format does not support UAV writes. You'll need to update its contents through another method.");
}
}
@@ -4528,8 +4770,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
BufferInfo *buf_info = (BufferInfo *)p_src_buffer.id;
TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id;
-
- if (buf_info->flags.is_for_upload) {
+ if (!barrier_capabilities.enhanced_barriers_supported) {
_resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
}
@@ -4557,19 +4798,21 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID
STEPIFY(p_regions[i].texture_region_size.y, block_h),
p_regions[i].texture_region_size.z);
- for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
- UINT dst_subresource = D3D12CalcSubresource(
- p_regions[i].texture_subresources.mipmap,
- p_regions[i].texture_subresources.base_layer + j,
- _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
- tex_info->desc.MipLevels,
- tex_info->desc.ArraySize());
- CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
+ UINT dst_subresource = D3D12CalcSubresource(
+ p_regions[i].texture_subresources.mipmap,
+ p_regions[i].texture_subresources.base_layer + j,
+ _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
+ tex_info->desc.MipLevels,
+ tex_info->desc.ArraySize());
+ CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource);
- _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
- }
+ _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ }
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
UINT dst_subresource = D3D12CalcSubresource(
@@ -4596,24 +4839,28 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID
TextureInfo *tex_info = (TextureInfo *)p_src_texture.id;
BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id;
- _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ }
uint32_t block_w = 0, block_h = 0;
get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h);
for (uint32_t i = 0; i < p_regions.size(); i++) {
- for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
- UINT src_subresource = D3D12CalcSubresource(
- p_regions[i].texture_subresources.mipmap,
- p_regions[i].texture_subresources.base_layer + j,
- _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
- tex_info->desc.MipLevels,
- tex_info->desc.ArraySize());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
+ UINT src_subresource = D3D12CalcSubresource(
+ p_regions[i].texture_subresources.mipmap,
+ p_regions[i].texture_subresources.base_layer + j,
+ _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
+ tex_info->desc.MipLevels,
+ tex_info->desc.ArraySize());
- _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
- }
+ _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
+ }
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
UINT src_subresource = D3D12CalcSubresource(
@@ -4763,22 +5010,25 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
}
};
- for (uint32_t i = 0; i < fb_info->attachments.size(); i++) {
- TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id;
- if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
- _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET);
- } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
- _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE);
- } else {
- DEV_ASSERT(false);
+ if (fb_info->is_screen || !barrier_capabilities.enhanced_barriers_supported) {
+ // Screen framebuffers must perform this transition even if enhanced barriers are supported.
+ for (uint32_t i = 0; i < fb_info->attachments.size(); i++) {
+ TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id;
+ if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
+ _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET);
+ } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
+ _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE);
+ } else {
+ DEV_ASSERT(false);
+ }
+ }
+ if (fb_info->vrs_attachment) {
+ TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id;
+ _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE);
}
- }
- if (fb_info->vrs_attachment) {
- TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id;
- _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE);
- }
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
cmd_buf_info->render_pass_state.region_rect = CD3DX12_RECT(
p_rect.position.x,
@@ -5140,8 +5390,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect(CommandBuf
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0);
}
@@ -5150,9 +5403,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect_count(Comm
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset);
}
@@ -5160,8 +5416,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect(CommandBufferID p_
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0);
}
@@ -5170,9 +5429,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset);
}
@@ -5191,10 +5453,15 @@ void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBuffe
cmd_buf_info->render_pass_state.vertex_buffer_views[i] = {};
cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i];
cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i];
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
+ }
+ }
- _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
}
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+
cmd_buf_info->render_pass_state.vertex_buffer_count = p_binding_count;
}
@@ -5207,8 +5474,10 @@ void RenderingDeviceDriverD3D12::command_render_bind_index_buffer(CommandBufferI
d3d12_ib_view.SizeInBytes = buffer_info->size - p_offset;
d3d12_ib_view.Format = p_format == INDEX_BUFFER_FORMAT_UINT16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
- _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
cmd_buf_info->cmd_list->IASetIndexBuffer(&d3d12_ib_view);
}
@@ -5604,15 +5873,21 @@ void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferI
void RenderingDeviceDriverD3D12::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->Dispatch(p_x_groups, p_y_groups, p_z_groups);
}
void RenderingDeviceDriverD3D12::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) {
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch.Get(), 1, indirect_buf_info->resource, p_offset, nullptr, 0);
}
@@ -5927,11 +6202,7 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) {
uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
switch (p_trait) {
case API_TRAIT_HONORS_PIPELINE_BARRIERS:
- // TODO:
- // 1. Map fine/Vulkan/enhanced barriers to legacy barriers as closely as possible
- // so there's still some advantage even without enhanced barriers available.
- // 2. Implement enhanced barriers and return true where available.
- return 0;
+ return barrier_capabilities.enhanced_barriers_supported;
case API_TRAIT_SHADER_CHANGE_INVALIDATION:
return (uint64_t)SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH;
case API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT:
@@ -5940,6 +6211,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
return D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR:
return false;
+ case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
+ return false;
default:
return RenderingDeviceDriver::api_trait_get(p_trait);
}
@@ -6082,6 +6355,8 @@ Error RenderingDeviceDriverD3D12::_initialize_device() {
// These happen due to how D3D12MA manages buffers; seems benign.
D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_HAS_NO_RESOURCE,
D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS,
+ // Seemingly a false positive.
+ D3D12_MESSAGE_ID_DATA_STATIC_WHILE_SET_AT_EXECUTE_DESCRIPTOR_INVALID_DATA_CHANGE,
};
D3D12_INFO_QUEUE_FILTER filter = {};
@@ -6231,6 +6506,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12));
if (SUCCEEDED(res)) {
format_capabilities.relaxed_casting_supported = options12.RelaxedFormatCastingSupported;
+ barrier_capabilities.enhanced_barriers_supported = options12.EnhancedBarriersSupported;
}
if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) {
@@ -6263,7 +6539,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
#if 0
print_verbose("- Relaxed casting supported");
#else
- // Certain configurations (Windows 11 with an updated Nvida driver) crash when using relaxed casting.
+ // Certain configurations (Windows 11 with an updated NVIDIA driver) crash when using relaxed casting.
// Therefore, we disable it temporarily until we can assure that it's reliable.
// There are fallbacks in place that work in every case, if less efficient.
format_capabilities.relaxed_casting_supported = false;
@@ -6363,10 +6639,6 @@ Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) {
frames[i].desc_heap_walkers.samplers = frames[i].desc_heaps.samplers.make_walker();
frames[i].desc_heap_walkers.aux = frames[i].desc_heaps.aux.make_walker();
frames[i].desc_heap_walkers.rtv = frames[i].desc_heaps.rtv.make_walker();
-
- ID3D12Resource *resource = nullptr;
- HRESULT res = allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COMMON, nullptr, &frames[frame_idx].aux_resource, IID_PPV_ARGS(&resource));
- ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
}
return OK;
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h
index 8e1223bdaa..e2cb5d08ba 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.h
+++ b/drivers/d3d12/rendering_device_driver_d3d12.h
@@ -122,6 +122,10 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
bool relaxed_casting_supported = false;
};
+ struct BarrierCapabilities {
+ bool enhanced_barriers_supported = false;
+ };
+
RenderingContextDriverD3D12 *context_driver = nullptr;
RenderingContextDriver::Device context_device;
ComPtr<IDXGIAdapter> adapter;
@@ -136,6 +140,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
ShaderCapabilities shader_capabilities;
StorageBufferCapabilities storage_buffer_capabilities;
FormatCapabilities format_capabilities;
+ BarrierCapabilities barrier_capabilities;
String pipeline_cache_id;
class DescriptorsHeap {
@@ -218,11 +223,6 @@ private:
// As many subresources as mipmaps * layers; planes (for depth-stencil) are tracked together.
TightLocalVector<D3D12_RESOURCE_STATES> subresource_states; // Used only if not a view.
uint32_t last_batch_with_uav_barrier = 0;
- struct CrossFamillyFallback {
- TightLocalVector<uint64_t> subresources_dirty;
- ComPtr<ID3D12Resource> interim_buffer;
- ComPtr<D3D12MA::Allocation> interim_buffer_alloc;
- } xfamily_fallback; // [[CROSS_FAMILY_FALLBACK]].
};
ID3D12Resource *resource = nullptr; // Non-null even if not owned.
@@ -275,7 +275,6 @@ private:
uint64_t size = 0;
struct {
bool usable_as_uav : 1;
- bool is_for_upload : 1;
} flags = {};
};
@@ -317,10 +316,14 @@ private:
UINT _compute_component_mapping(const TextureView &p_view);
UINT _compute_plane_slice(DataFormat p_format, BitField<TextureAspectBits> p_aspect_bits);
UINT _compute_plane_slice(DataFormat p_format, TextureAspect p_aspect);
+ UINT _compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset);
struct CommandBufferInfo;
void _discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info);
+protected:
+ virtual bool _unordered_access_supported_by_format(DataFormat p_format);
+
public:
virtual TextureID texture_create(const TextureFormat &p_format, const TextureView &p_view) override final;
virtual TextureID texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil) override final;
@@ -332,6 +335,7 @@ public:
virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final;
virtual void texture_unmap(TextureID p_texture) override final;
virtual BitField<TextureUsageBits> texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final;
+ virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final;
private:
TextureID _texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps);
@@ -367,8 +371,8 @@ public:
virtual void command_pipeline_barrier(
CommandBufferID p_cmd_buffer,
- BitField<RDD::PipelineStageBits> p_src_stages,
- BitField<RDD::PipelineStageBits> p_dst_stages,
+ BitField<PipelineStageBits> p_src_stages,
+ BitField<PipelineStageBits> p_dst_stages,
VectorView<RDD::MemoryBarrier> p_memory_barriers,
VectorView<RDD::BufferBarrier> p_buffer_barriers,
VectorView<RDD::TextureBarrier> p_texture_barriers) override final;
@@ -465,16 +469,6 @@ private:
RenderPassState render_pass_state;
bool descriptor_heaps_set = false;
-
- // [[CROSS_FAMILY_FALLBACK]].
- struct FamilyFallbackCopy {
- TextureInfo *texture = nullptr;
- uint32_t subresource = 0;
- uint32_t mipmap = 0;
- D3D12_RESOURCE_STATES dst_wanted_state = {};
- };
- LocalVector<FamilyFallbackCopy> family_fallback_copies;
- uint32_t family_fallback_copy_count = 0;
};
public:
@@ -961,7 +955,6 @@ private:
bool rtv = false;
} desc_heaps_exhausted_reported;
CD3DX12_CPU_DESCRIPTOR_HANDLE null_rtv_handle = {}; // For [[MANUAL_SUBPASSES]].
- ComPtr<D3D12MA::Allocation> aux_resource;
uint32_t segment_serial = 0;
#ifdef DEV_ENABLED
diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp
index 896fc6ff91..6e5b969451 100644
--- a/drivers/vulkan/rendering_device_driver_vulkan.cpp
+++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp
@@ -264,6 +264,63 @@ static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = {
VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
};
+static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = {
+ VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED
+ VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL
+ VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL
+};
+
+static VkPipelineStageFlags _rd_to_vk_pipeline_stages(BitField<RDD::PipelineStageBits> p_stages) {
+ VkPipelineStageFlags vk_flags = 0;
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) {
+ // Transfer has been split into copy and resolve bits. Clear them and merge them into one bit.
+ vk_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+ p_stages.clear_flag(RDD::PIPELINE_STAGE_COPY_BIT);
+ p_stages.clear_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT);
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) {
+ // Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified.
+ // Therefore, storage is never cleared with an explicit command.
+ p_stages.clear_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT);
+ }
+
+ // The rest of the flags have compatible numeric values with Vulkan.
+ return VkPipelineStageFlags(p_stages) | vk_flags;
+}
+
+static VkAccessFlags _rd_to_vk_access_flags(BitField<RDD::BarrierAccessBits> p_access) {
+ VkAccessFlags vk_flags = 0;
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) {
+ vk_flags |= VK_ACCESS_TRANSFER_READ_BIT;
+ p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT);
+ p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT);
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) {
+ vk_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
+ p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT);
+ p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT);
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) {
+ // Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified.
+ // Therefore, storage is never cleared with an explicit command.
+ p_access.clear_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT);
+ }
+
+ // The rest of the flags have compatible numeric values with Vulkan.
+ return VkAccessFlags(p_access) | vk_flags;
+}
+
// RDD::CompareOperator == VkCompareOp.
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, VK_COMPARE_OP_NEVER));
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, VK_COMPARE_OP_LESS));
@@ -1334,18 +1391,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_G, VK_COMPONENT_SWIZZLE_G)
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_B, VK_COMPONENT_SWIZZLE_B));
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_A, VK_COMPONENT_SWIZZLE_A));
-// RDD::TextureLayout == VkImageLayout.
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_UNDEFINED));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_PREINITIALIZED, VK_IMAGE_LAYOUT_PREINITIALIZED));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR));
-
// RDD::TextureAspectBits == VkImageAspectFlagBits.
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_DEPTH_BIT));
@@ -1774,6 +1819,11 @@ BitField<RDD::TextureUsageBits> RenderingDeviceDriverVulkan::texture_get_usages_
return supported;
}
+bool RenderingDeviceDriverVulkan::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {
+ r_raw_reinterpretation = false;
+ return true;
+}
+
/*****************/
/**** SAMPLER ****/
/*****************/
@@ -1893,7 +1943,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, V
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT));
@@ -1910,8 +1959,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT,
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT));
-static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_READ_BIT, VK_ACCESS_HOST_READ_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_HOST_WRITE_BIT));
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT));
@@ -1929,8 +1976,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier(
for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {
vk_memory_barriers[i] = {};
vk_memory_barriers[i].sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
- vk_memory_barriers[i].srcAccessMask = (VkPipelineStageFlags)p_memory_barriers[i].src_access;
- vk_memory_barriers[i].dstAccessMask = (VkAccessFlags)p_memory_barriers[i].dst_access;
+ vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access);
+ vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access);
}
VkBufferMemoryBarrier *vk_buffer_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_buffer_barriers.size());
@@ -1939,8 +1986,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier(
vk_buffer_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
vk_buffer_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vk_buffer_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- vk_buffer_barriers[i].srcAccessMask = (VkAccessFlags)p_buffer_barriers[i].src_access;
- vk_buffer_barriers[i].dstAccessMask = (VkAccessFlags)p_buffer_barriers[i].dst_access;
+ vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access);
+ vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access);
vk_buffer_barriers[i].buffer = ((const BufferInfo *)p_buffer_barriers[i].buffer.id)->vk_buffer;
vk_buffer_barriers[i].offset = p_buffer_barriers[i].offset;
vk_buffer_barriers[i].size = p_buffer_barriers[i].size;
@@ -1951,10 +1998,10 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier(
const TextureInfo *tex_info = (const TextureInfo *)p_texture_barriers[i].texture.id;
vk_image_barriers[i] = {};
vk_image_barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
- vk_image_barriers[i].srcAccessMask = (VkAccessFlags)p_texture_barriers[i].src_access;
- vk_image_barriers[i].dstAccessMask = (VkAccessFlags)p_texture_barriers[i].dst_access;
- vk_image_barriers[i].oldLayout = (VkImageLayout)p_texture_barriers[i].prev_layout;
- vk_image_barriers[i].newLayout = (VkImageLayout)p_texture_barriers[i].next_layout;
+ vk_image_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].src_access);
+ vk_image_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].dst_access);
+ vk_image_barriers[i].oldLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].prev_layout];
+ vk_image_barriers[i].newLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].next_layout];
vk_image_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vk_image_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vk_image_barriers[i].image = tex_info->vk_view_create_info.image;
@@ -1984,8 +2031,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier(
vkCmdPipelineBarrier(
(VkCommandBuffer)p_cmd_buffer.id,
- (VkPipelineStageFlags)p_src_stages,
- (VkPipelineStageFlags)p_dst_stages,
+ _rd_to_vk_pipeline_stages(p_src_stages),
+ _rd_to_vk_pipeline_stages(p_dst_stages),
0,
p_memory_barriers.size(), vk_memory_barriers,
p_buffer_barriers.size(), vk_buffer_barriers,
@@ -3726,7 +3773,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buf
const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id;
const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id;
- vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, dst_tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, p_regions.size(), vk_copy_regions);
+ vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions);
}
void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {
@@ -3746,7 +3793,7 @@ void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_
vk_resolve.extent.height = MAX(1u, src_tex_info->vk_create_info.extent.height >> p_src_mipmap);
vk_resolve.extent.depth = MAX(1u, src_tex_info->vk_create_info.extent.depth >> p_src_mipmap);
- vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, dst_tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, 1, &vk_resolve);
+ vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve);
}
void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) {
@@ -3757,7 +3804,7 @@ void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_
_texture_subresource_range_to_vk(p_subresources, &vk_subresources);
const TextureInfo *tex_info = (const TextureInfo *)p_texture.id;
- vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, (VkImageLayout)p_texture_layout, &vk_color, 1, &vk_subresources);
+ vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources);
}
void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) {
@@ -3768,7 +3815,7 @@ void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID
const BufferInfo *buf_info = (const BufferInfo *)p_src_buffer.id;
const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id;
- vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, (VkImageLayout)p_dst_texture_layout, p_regions.size(), vk_copy_regions);
+ vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions);
}
void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) {
@@ -3779,7 +3826,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID
const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id;
const BufferInfo *buf_info = (const BufferInfo *)p_dst_buffer.id;
- vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, (VkImageLayout)p_src_texture_layout, buf_info->vk_buffer, p_regions.size(), vk_copy_regions);
+ vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions);
}
/******************/
@@ -3925,7 +3972,7 @@ static void _attachment_reference_to_vk(const RDD::AttachmentReference &p_attach
*r_vk_attachment_reference = {};
r_vk_attachment_reference->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
r_vk_attachment_reference->attachment = p_attachment_reference.attachment;
- r_vk_attachment_reference->layout = (VkImageLayout)p_attachment_reference.layout;
+ r_vk_attachment_reference->layout = RD_TO_VK_LAYOUT[p_attachment_reference.layout];
r_vk_attachment_reference->aspectMask = (VkImageAspectFlags)p_attachment_reference.aspect;
}
@@ -3944,8 +3991,8 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att
vk_attachments[i].storeOp = (VkAttachmentStoreOp)p_attachments[i].store_op;
vk_attachments[i].stencilLoadOp = (VkAttachmentLoadOp)p_attachments[i].stencil_load_op;
vk_attachments[i].stencilStoreOp = (VkAttachmentStoreOp)p_attachments[i].stencil_store_op;
- vk_attachments[i].initialLayout = (VkImageLayout)p_attachments[i].initial_layout;
- vk_attachments[i].finalLayout = (VkImageLayout)p_attachments[i].final_layout;
+ vk_attachments[i].initialLayout = RD_TO_VK_LAYOUT[p_attachments[i].initial_layout];
+ vk_attachments[i].finalLayout = RD_TO_VK_LAYOUT[p_attachments[i].final_layout];
}
VkSubpassDescription2KHR *vk_subpasses = ALLOCA_ARRAY(VkSubpassDescription2KHR, p_subpasses.size());
@@ -4009,10 +4056,10 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att
vk_subpass_dependencies[i].sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2;
vk_subpass_dependencies[i].srcSubpass = p_subpass_dependencies[i].src_subpass;
vk_subpass_dependencies[i].dstSubpass = p_subpass_dependencies[i].dst_subpass;
- vk_subpass_dependencies[i].srcStageMask = (VkPipelineStageFlags)p_subpass_dependencies[i].src_stages;
- vk_subpass_dependencies[i].dstStageMask = (VkPipelineStageFlags)p_subpass_dependencies[i].dst_stages;
- vk_subpass_dependencies[i].srcAccessMask = (VkAccessFlags)p_subpass_dependencies[i].src_access;
- vk_subpass_dependencies[i].dstAccessMask = (VkAccessFlags)p_subpass_dependencies[i].dst_access;
+ vk_subpass_dependencies[i].srcStageMask = _rd_to_vk_pipeline_stages(p_subpass_dependencies[i].src_stages);
+ vk_subpass_dependencies[i].dstStageMask = _rd_to_vk_pipeline_stages(p_subpass_dependencies[i].dst_stages);
+ vk_subpass_dependencies[i].srcAccessMask = _rd_to_vk_access_flags(p_subpass_dependencies[i].src_access);
+ vk_subpass_dependencies[i].dstAccessMask = _rd_to_vk_access_flags(p_subpass_dependencies[i].dst_access);
}
VkRenderPassCreateInfo2KHR create_info = {};
diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h
index e70019962a..2e7d818b13 100644
--- a/drivers/vulkan/rendering_device_driver_vulkan.h
+++ b/drivers/vulkan/rendering_device_driver_vulkan.h
@@ -210,6 +210,7 @@ public:
virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final;
virtual void texture_unmap(TextureID p_texture) override final;
virtual BitField<TextureUsageBits> texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final;
+ virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final;
/*****************/
/**** SAMPLER ****/
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index 15e1731823..474fdb387f 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -824,8 +824,8 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture
}
if (texture.draw_tracker != nullptr) {
- // Draw tracker can assume the texture will be in transfer destination.
- texture.draw_tracker->usage = RDG::RESOURCE_USAGE_TRANSFER_TO;
+ // Draw tracker can assume the texture will be in copy destination.
+ texture.draw_tracker->usage = RDG::RESOURCE_USAGE_COPY_TO;
}
}
@@ -847,8 +847,11 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with
// Create view.
Texture texture = *src_texture;
+ texture.shared_fallback = nullptr;
RDD::TextureView tv;
+ bool create_shared = true;
+ bool raw_reintepretation = false;
if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) {
tv.format = texture.format;
} else {
@@ -857,13 +860,47 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with
ERR_FAIL_COND_V_MSG(!texture.allowed_shared_formats.has(p_view.format_override), RID(),
"Format override is not in the list of allowed shareable formats for original texture.");
tv.format = p_view.format_override;
+ create_shared = driver->texture_can_make_shared_with_format(texture.driver_id, p_view.format_override, raw_reintepretation);
}
tv.swizzle_r = p_view.swizzle_r;
tv.swizzle_g = p_view.swizzle_g;
tv.swizzle_b = p_view.swizzle_b;
tv.swizzle_a = p_view.swizzle_a;
- texture.driver_id = driver->texture_create_shared(texture.driver_id, tv);
+ if (create_shared) {
+ texture.driver_id = driver->texture_create_shared(texture.driver_id, tv);
+ } else {
+ // The regular view will use the same format as the main texture.
+ RDD::TextureView regular_view = tv;
+ regular_view.format = src_texture->format;
+ texture.driver_id = driver->texture_create_shared(texture.driver_id, regular_view);
+
+ // Create the independent texture for the alias.
+ RDD::TextureFormat alias_format = texture.texture_format();
+ alias_format.format = tv.format;
+ alias_format.usage_bits = TEXTURE_USAGE_SAMPLING_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT;
+
+ _texture_check_shared_fallback(src_texture);
+ _texture_check_shared_fallback(&texture);
+
+ texture.shared_fallback->texture = driver->texture_create(alias_format, tv);
+ texture.shared_fallback->raw_reinterpretation = raw_reintepretation;
+ texture_memory += driver->texture_get_allocation_size(texture.shared_fallback->texture);
+
+ RDG::ResourceTracker *tracker = RDG::resource_tracker_create();
+ tracker->texture_driver_id = texture.shared_fallback->texture;
+ tracker->texture_subresources = texture.barrier_range();
+ tracker->texture_usage = alias_format.usage_bits;
+ tracker->reference_count = 1;
+ texture.shared_fallback->texture_tracker = tracker;
+ texture.shared_fallback->revision = 0;
+
+ if (raw_reintepretation && src_texture->shared_fallback->buffer.id == 0) {
+ // For shared textures of the same size, we create the buffer on the main texture if it doesn't have it already.
+ _texture_create_reinterpret_buffer(src_texture);
+ }
+ }
+
ERR_FAIL_COND_V(!texture.driver_id, RID());
texture.slice_trackers.clear();
@@ -965,6 +1002,7 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view,
}
Texture texture = *src_texture;
+ texture.shared_fallback = nullptr;
get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height);
texture.mipmaps = p_mipmaps;
@@ -979,6 +1017,8 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view,
}
RDD::TextureView tv;
+ bool create_shared = true;
+ bool raw_reintepretation = false;
if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) {
tv.format = texture.format;
} else {
@@ -987,7 +1027,9 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view,
ERR_FAIL_COND_V_MSG(!texture.allowed_shared_formats.has(p_view.format_override), RID(),
"Format override is not in the list of allowed shareable formats for original texture.");
tv.format = p_view.format_override;
+ create_shared = driver->texture_can_make_shared_with_format(texture.driver_id, p_view.format_override, raw_reintepretation);
}
+
tv.swizzle_r = p_view.swizzle_r;
tv.swizzle_g = p_view.swizzle_g;
tv.swizzle_b = p_view.swizzle_b;
@@ -1000,7 +1042,47 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view,
"Specified layer must be a multiple of 6.");
}
- texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, tv, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps);
+ if (create_shared) {
+ texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, tv, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps);
+ } else {
+ // The regular view will use the same format as the main texture.
+ RDD::TextureView regular_view = tv;
+ regular_view.format = src_texture->format;
+ texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, regular_view, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps);
+
+ // Create the independent texture for the slice.
+ RDD::TextureSubresourceRange slice_range = texture.barrier_range();
+ slice_range.base_mipmap = 0;
+ slice_range.base_layer = 0;
+
+ RDD::TextureFormat slice_format = texture.texture_format();
+ slice_format.width = MAX(texture.width >> p_mipmap, 1U);
+ slice_format.height = MAX(texture.height >> p_mipmap, 1U);
+ slice_format.depth = MAX(texture.depth >> p_mipmap, 1U);
+ slice_format.format = tv.format;
+ slice_format.usage_bits = TEXTURE_USAGE_SAMPLING_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT;
+
+ _texture_check_shared_fallback(src_texture);
+ _texture_check_shared_fallback(&texture);
+
+ texture.shared_fallback->texture = driver->texture_create(slice_format, tv);
+ texture.shared_fallback->raw_reinterpretation = raw_reintepretation;
+ texture_memory += driver->texture_get_allocation_size(texture.shared_fallback->texture);
+
+ RDG::ResourceTracker *tracker = RDG::resource_tracker_create();
+ tracker->texture_driver_id = texture.shared_fallback->texture;
+ tracker->texture_subresources = slice_range;
+ tracker->texture_usage = slice_format.usage_bits;
+ tracker->reference_count = 1;
+ texture.shared_fallback->texture_tracker = tracker;
+ texture.shared_fallback->revision = 0;
+
+ if (raw_reintepretation && src_texture->shared_fallback->buffer.id == 0) {
+ // For shared texture slices, we create the buffer on the slice if the source texture has no reinterpretation buffer.
+ _texture_create_reinterpret_buffer(&texture);
+ }
+ }
+
ERR_FAIL_COND_V(!texture.driver_id, RID());
const Rect2i slice_rect(p_mipmap, p_layer, p_mipmaps, slice_layers);
@@ -1093,15 +1175,18 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve
// When using the setup queue directly, we transition the texture to the optimal layout.
RDD::TextureBarrier tb;
tb.texture = texture->driver_id;
- tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT;
+ tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
- tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
tb.subresources.layer_count = 1;
- driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb);
+ driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_COPY_BIT, {}, {}, tb);
+ } else if (!p_use_setup_queue) {
+ // Indicate the texture will get modified for the shared texture fallback.
+ _texture_update_shared_fallback(p_texture, texture, true);
}
uint32_t mipmap_offset = 0;
@@ -1199,7 +1284,7 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve
copy_region.texture_region_size = Vector3i(region_logic_w, region_logic_h, 1);
if (p_use_setup_queue) {
- driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, copy_region);
+ driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region);
} else {
RDG::RecordedBufferToTextureCopy buffer_to_texture_copy;
buffer_to_texture_copy.from_buffer = staging_buffer_blocks[staging_buffer_current].driver_id;
@@ -1221,14 +1306,14 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve
// If the texture does not have a tracker, it means it must be transitioned to the sampling state.
RDD::TextureBarrier tb;
tb.texture = texture->driver_id;
- tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT;
- tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
+ tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
tb.subresources.layer_count = 1;
- driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
+ driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
} else if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty()) {
if (_texture_make_mutable(texture, p_texture)) {
// The texture must be mutable to be used as a copy destination.
@@ -1241,6 +1326,186 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve
return OK;
}
+void RenderingDevice::_texture_check_shared_fallback(Texture *p_texture) {
+ if (p_texture->shared_fallback == nullptr) {
+ p_texture->shared_fallback = memnew(Texture::SharedFallback);
+ }
+}
+
+void RenderingDevice::_texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing) {
+ if (p_texture->shared_fallback == nullptr) {
+ // This texture does not use any of the shared texture fallbacks.
+ return;
+ }
+
+ if (p_texture->owner.is_valid()) {
+ Texture *owner_texture = texture_owner.get_or_null(p_texture->owner);
+ ERR_FAIL_NULL(owner_texture);
+ if (p_for_writing) {
+ // Only the main texture is used for writing when using the shared fallback.
+ owner_texture->shared_fallback->revision++;
+ } else if (p_texture->shared_fallback->revision != owner_texture->shared_fallback->revision) {
+ // Copy the contents of the main texture into the shared texture fallback slice. Update the revision.
+ _texture_copy_shared(p_texture->owner, owner_texture, p_texture_rid, p_texture);
+ p_texture->shared_fallback->revision = owner_texture->shared_fallback->revision;
+ }
+ } else if (p_for_writing) {
+ // Increment the revision of the texture so shared texture fallback slices must be updated.
+ p_texture->shared_fallback->revision++;
+ }
+}
+
+void RenderingDevice::_texture_free_shared_fallback(Texture *p_texture) {
+ if (p_texture->shared_fallback != nullptr) {
+ if (p_texture->shared_fallback->texture_tracker != nullptr) {
+ RDG::resource_tracker_free(p_texture->shared_fallback->texture_tracker);
+ }
+
+ if (p_texture->shared_fallback->buffer_tracker != nullptr) {
+ RDG::resource_tracker_free(p_texture->shared_fallback->buffer_tracker);
+ }
+
+ if (p_texture->shared_fallback->texture.id != 0) {
+ texture_memory -= driver->texture_get_allocation_size(p_texture->shared_fallback->texture);
+ driver->texture_free(p_texture->shared_fallback->texture);
+ }
+
+ if (p_texture->shared_fallback->buffer.id != 0) {
+ buffer_memory -= driver->buffer_get_allocation_size(p_texture->shared_fallback->buffer);
+ driver->buffer_free(p_texture->shared_fallback->buffer);
+ }
+
+ memdelete(p_texture->shared_fallback);
+ p_texture->shared_fallback = nullptr;
+ }
+}
+
+void RenderingDevice::_texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture) {
+ // The only type of copying allowed is from the main texture to the slice texture, as slice textures are not allowed to be used for writing when using this fallback.
+ DEV_ASSERT(p_src_texture != nullptr);
+ DEV_ASSERT(p_dst_texture != nullptr);
+ DEV_ASSERT(p_src_texture->owner.is_null());
+ DEV_ASSERT(p_dst_texture->owner == p_src_texture_rid);
+
+ bool src_made_mutable = _texture_make_mutable(p_src_texture, p_src_texture_rid);
+ bool dst_made_mutable = _texture_make_mutable(p_dst_texture, p_dst_texture_rid);
+ if (src_made_mutable || dst_made_mutable) {
+ draw_graph.add_synchronization();
+ }
+
+ if (p_dst_texture->shared_fallback->raw_reinterpretation) {
+ // If one of the textures is a main texture and they have a reinterpret buffer, we prefer using that as it's guaranteed to be big enough to hold
+ // anything and it's how the shared textures that don't use slices are created.
+ bool src_has_buffer = p_src_texture->shared_fallback->buffer.id != 0;
+ bool dst_has_buffer = p_dst_texture->shared_fallback->buffer.id != 0;
+ bool from_src = p_src_texture->owner.is_null() && src_has_buffer;
+ bool from_dst = p_dst_texture->owner.is_null() && dst_has_buffer;
+ if (!from_src && !from_dst) {
+ // If neither texture passed the condition, we just pick whichever texture has a reinterpretation buffer.
+ from_src = src_has_buffer;
+ from_dst = dst_has_buffer;
+ }
+
+ // Pick the buffer and tracker to use from the right texture.
+ RDD::BufferID shared_buffer;
+ RDG::ResourceTracker *shared_buffer_tracker = nullptr;
+ if (from_src) {
+ shared_buffer = p_src_texture->shared_fallback->buffer;
+ shared_buffer_tracker = p_src_texture->shared_fallback->buffer_tracker;
+ } else if (from_dst) {
+ shared_buffer = p_dst_texture->shared_fallback->buffer;
+ shared_buffer_tracker = p_dst_texture->shared_fallback->buffer_tracker;
+ } else {
+ DEV_ASSERT(false && "This path should not be reachable.");
+ }
+
+ // FIXME: When using reinterpretation buffers, the only texture aspect supported is color. Depth or stencil contents won't get copied.
+ RDD::BufferTextureCopyRegion get_data_region;
+ RDG::RecordedBufferToTextureCopy update_copy;
+ RDD::TextureCopyableLayout first_copyable_layout;
+ RDD::TextureCopyableLayout copyable_layout;
+ RDD::TextureSubresource texture_subresource;
+ texture_subresource.aspect = RDD::TEXTURE_ASPECT_COLOR;
+ texture_subresource.layer = 0;
+ texture_subresource.mipmap = 0;
+ driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, &first_copyable_layout);
+
+ // Copying each mipmap from main texture to a buffer and then to the slice texture.
+ thread_local LocalVector<RDD::BufferTextureCopyRegion> get_data_vector;
+ thread_local LocalVector<RDG::RecordedBufferToTextureCopy> update_vector;
+ get_data_vector.clear();
+ update_vector.clear();
+ for (uint32_t i = 0; i < p_dst_texture->mipmaps; i++) {
+ driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, &copyable_layout);
+
+ uint32_t mipmap = p_dst_texture->base_mipmap + i;
+ get_data_region.buffer_offset = copyable_layout.offset - first_copyable_layout.offset;
+ get_data_region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT;
+ get_data_region.texture_subresources.base_layer = p_dst_texture->base_layer;
+ get_data_region.texture_subresources.mipmap = mipmap;
+ get_data_region.texture_subresources.layer_count = p_dst_texture->layers;
+ get_data_region.texture_region_size.x = MAX(1U, p_src_texture->width >> mipmap);
+ get_data_region.texture_region_size.y = MAX(1U, p_src_texture->height >> mipmap);
+ get_data_region.texture_region_size.z = MAX(1U, p_src_texture->depth >> mipmap);
+ get_data_vector.push_back(get_data_region);
+
+ update_copy.from_buffer = shared_buffer;
+ update_copy.region.buffer_offset = get_data_region.buffer_offset;
+ update_copy.region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT;
+ update_copy.region.texture_subresources.base_layer = texture_subresource.layer;
+ update_copy.region.texture_subresources.mipmap = texture_subresource.mipmap;
+ update_copy.region.texture_subresources.layer_count = get_data_region.texture_subresources.layer_count;
+ update_copy.region.texture_region_size.x = get_data_region.texture_region_size.x;
+ update_copy.region.texture_region_size.y = get_data_region.texture_region_size.y;
+ update_copy.region.texture_region_size.z = get_data_region.texture_region_size.z;
+ update_vector.push_back(update_copy);
+
+ texture_subresource.mipmap++;
+ }
+
+ draw_graph.add_texture_get_data(p_src_texture->driver_id, p_src_texture->draw_tracker, shared_buffer, get_data_vector, shared_buffer_tracker);
+ draw_graph.add_texture_update(p_dst_texture->shared_fallback->texture, p_dst_texture->shared_fallback->texture_tracker, update_vector, shared_buffer_tracker);
+ } else {
+ // Raw reinterpretation is not required. Use a regular texture copy.
+ RDD::TextureCopyRegion copy_region;
+ copy_region.src_subresources.aspect = p_src_texture->read_aspect_flags;
+ copy_region.src_subresources.base_layer = p_dst_texture->base_layer;
+ copy_region.src_subresources.layer_count = p_dst_texture->layers;
+ copy_region.dst_subresources.aspect = p_dst_texture->read_aspect_flags;
+ copy_region.dst_subresources.base_layer = 0;
+ copy_region.dst_subresources.layer_count = copy_region.src_subresources.layer_count;
+
+ // Copying each mipmap from main texture to to the slice texture.
+ thread_local LocalVector<RDD::TextureCopyRegion> region_vector;
+ region_vector.clear();
+ for (uint32_t i = 0; i < p_dst_texture->mipmaps; i++) {
+ uint32_t mipmap = p_dst_texture->base_mipmap + i;
+ copy_region.src_subresources.mipmap = mipmap;
+ copy_region.dst_subresources.mipmap = i;
+ copy_region.size.x = MAX(1U, p_src_texture->width >> mipmap);
+ copy_region.size.y = MAX(1U, p_src_texture->height >> mipmap);
+ copy_region.size.z = MAX(1U, p_src_texture->depth >> mipmap);
+ region_vector.push_back(copy_region);
+ }
+
+ draw_graph.add_texture_copy(p_src_texture->driver_id, p_src_texture->draw_tracker, p_dst_texture->shared_fallback->texture, p_dst_texture->shared_fallback->texture_tracker, region_vector);
+ }
+}
+
+void RenderingDevice::_texture_create_reinterpret_buffer(Texture *p_texture) {
+ uint64_t row_pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP);
+ uint64_t transfer_alignment = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT);
+ uint32_t pixel_bytes = get_image_format_pixel_size(p_texture->format);
+ uint32_t row_pitch = STEPIFY(p_texture->width * pixel_bytes, row_pitch_step);
+ uint64_t buffer_size = STEPIFY(pixel_bytes * row_pitch * p_texture->height * p_texture->depth, transfer_alignment);
+ p_texture->shared_fallback->buffer = driver->buffer_create(buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU);
+ buffer_memory += driver->buffer_get_allocation_size(p_texture->shared_fallback->buffer);
+
+ RDG::ResourceTracker *tracker = RDG::resource_tracker_create();
+ tracker->buffer_driver_id = p_texture->shared_fallback->buffer;
+ p_texture->shared_fallback->buffer_tracker = tracker;
+}
+
Vector<uint8_t> RenderingDevice::_texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d) {
uint32_t width, height, depth;
uint32_t tight_mip_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth);
@@ -1535,6 +1800,9 @@ Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const
copy_region.size = p_size;
+ // Indicate the texture will get modified for the shared texture fallback.
+ _texture_update_shared_fallback(p_to_texture, dst_tex, true);
+
// The textures must be mutable to be used in the copy operation.
bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture);
bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture);
@@ -1578,6 +1846,9 @@ Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_
ERR_FAIL_COND_V_MSG(src_tex->read_aspect_flags != dst_tex->read_aspect_flags, ERR_INVALID_PARAMETER,
"Source and destination texture must be of the same type (color or depth).");
+ // Indicate the texture will get modified for the shared texture fallback.
+ _texture_update_shared_fallback(p_to_texture, dst_tex, true);
+
// The textures must be mutable to be used in the resolve operation.
bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture);
bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture);
@@ -1620,6 +1891,9 @@ Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32
range.base_layer = src_tex->base_layer + p_base_layer;
range.layer_count = p_layers;
+ // Indicate the texture will get modified for the shared texture fallback.
+ _texture_update_shared_fallback(p_texture, src_tex, true);
+
if (_texture_make_mutable(src_tex, p_texture)) {
// The texture must be mutable to be used as a clear destination.
draw_graph.add_synchronization();
@@ -2526,6 +2800,14 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vector<u
return id;
}
+void RenderingDevice::_uniform_set_update_shared(UniformSet *p_uniform_set) {
+ for (UniformSet::SharedTexture shared : p_uniform_set->shared_textures_to_update) {
+ Texture *texture = texture_owner.get_or_null(shared.texture);
+ ERR_CONTINUE(texture == nullptr);
+ _texture_update_shared_fallback(shared.texture, texture, shared.writing);
+ }
+}
+
RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set) {
_THREAD_SAFE_METHOD_
@@ -2554,6 +2836,7 @@ RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p
Vector<RDG::ResourceTracker *> draw_trackers;
Vector<RDG::ResourceUsage> draw_trackers_usage;
HashMap<RID, RDG::ResourceUsage> untracked_usage;
+ Vector<UniformSet::SharedTexture> shared_textures_to_update;
for (uint32_t i = 0; i < set_uniform_count; i++) {
const ShaderUniform &set_uniform = set_uniforms[i];
@@ -2619,8 +2902,16 @@ RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p
attachable_textures.push_back(attachable_texture);
}
- if (texture->draw_tracker != nullptr) {
- draw_trackers.push_back(texture->draw_tracker);
+ RDD::TextureID driver_id = texture->driver_id;
+ RDG::ResourceTracker *tracker = texture->draw_tracker;
+ if (texture->shared_fallback != nullptr && texture->shared_fallback->texture.id != 0) {
+ driver_id = texture->shared_fallback->texture;
+ tracker = texture->shared_fallback->texture_tracker;
+ shared_textures_to_update.push_back({ false, texture_id });
+ }
+
+ if (tracker != nullptr) {
+ draw_trackers.push_back(tracker);
draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE);
} else {
untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE;
@@ -2629,7 +2920,7 @@ RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p
DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner));
driver_uniform.ids.push_back(*sampler_driver_id);
- driver_uniform.ids.push_back(texture->driver_id);
+ driver_uniform.ids.push_back(driver_id);
}
} break;
case UNIFORM_TYPE_TEXTURE: {
@@ -2656,8 +2947,16 @@ RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p
attachable_textures.push_back(attachable_texture);
}
- if (texture->draw_tracker != nullptr) {
- draw_trackers.push_back(texture->draw_tracker);
+ RDD::TextureID driver_id = texture->driver_id;
+ RDG::ResourceTracker *tracker = texture->draw_tracker;
+ if (texture->shared_fallback != nullptr && texture->shared_fallback->texture.id != 0) {
+ driver_id = texture->shared_fallback->texture;
+ tracker = texture->shared_fallback->texture_tracker;
+ shared_textures_to_update.push_back({ false, texture_id });
+ }
+
+ if (tracker != nullptr) {
+ draw_trackers.push_back(tracker);
draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE);
} else {
untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE;
@@ -2665,7 +2964,7 @@ RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p
DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner));
- driver_uniform.ids.push_back(texture->driver_id);
+ driver_uniform.ids.push_back(driver_id);
}
} break;
case UNIFORM_TYPE_IMAGE: {
@@ -2687,6 +2986,10 @@ RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(),
"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform.");
+ if (texture->owner.is_null() && texture->shared_fallback != nullptr) {
+ shared_textures_to_update.push_back({ true, texture_id });
+ }
+
if (_texture_make_mutable(texture, texture_id)) {
// The texture must be mutable as a layout transition will be required.
draw_graph.add_synchronization();
@@ -2872,6 +3175,7 @@ RID RenderingDevice::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p
uniform_set.draw_trackers = draw_trackers;
uniform_set.draw_trackers_usage = draw_trackers_usage;
uniform_set.untracked_usage = untracked_usage;
+ uniform_set.shared_textures_to_update = shared_textures_to_update;
uniform_set.shader_set = p_shader_set;
uniform_set.shader_id = p_shader;
@@ -3344,12 +3648,16 @@ Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer,
for (int i = 0; i < p_framebuffer->texture_ids.size(); i++) {
RDD::RenderPassClearValue clear_value;
- Texture *texture = texture_owner.get_or_null(p_framebuffer->texture_ids[i]);
+ RID texture_rid = p_framebuffer->texture_ids[i];
+ Texture *texture = texture_owner.get_or_null(texture_rid);
if (!texture) {
color_index++;
continue;
}
+ // Indicate the texture will get modified for the shared texture fallback.
+ _texture_update_shared_fallback(texture_rid, texture, true);
+
if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
if (color_index < p_clear_colors.size()) {
ERR_FAIL_INDEX_V(color_index, p_clear_colors.size(), ERR_BUG); // A bug.
@@ -3813,6 +4121,8 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint
draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i);
UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set);
+ _uniform_set_update_shared(uniform_set);
+
draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage);
dl->state.sets[i].bound = true;
@@ -4219,6 +4529,8 @@ void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_g
draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i);
UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set);
+ _uniform_set_update_shared(uniform_set);
+
draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage);
cl->state.sets[i].bound = true;
@@ -4326,6 +4638,8 @@ void RenderingDevice::compute_list_dispatch_indirect(ComputeListID p_list, RID p
draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i);
UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set);
+ _uniform_set_update_shared(uniform_set);
+
draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage);
cl->state.sets[i].bound = true;
@@ -4417,6 +4731,7 @@ bool RenderingDevice::_texture_make_mutable(Texture *p_texture, RID p_texture_id
draw_tracker->parent = owner_texture->draw_tracker;
draw_tracker->texture_driver_id = p_texture->driver_id;
draw_tracker->texture_subresources = p_texture->barrier_range();
+ draw_tracker->texture_usage = p_texture->usage_flags;
draw_tracker->texture_slice_or_dirty_rect = p_texture->slice_rect;
owner_texture->slice_trackers[p_texture->slice_rect] = draw_tracker;
}
@@ -4438,6 +4753,7 @@ bool RenderingDevice::_texture_make_mutable(Texture *p_texture, RID p_texture_id
p_texture->draw_tracker = RDG::resource_tracker_create();
p_texture->draw_tracker->texture_driver_id = p_texture->driver_id;
p_texture->draw_tracker->texture_subresources = p_texture->barrier_range();
+ p_texture->draw_tracker->texture_usage = p_texture->usage_flags;
p_texture->draw_tracker->reference_count = 1;
if (p_texture_id.is_valid()) {
@@ -4830,6 +5146,8 @@ void RenderingDevice::_free_pending_resources(int p_frame) {
WARN_PRINT("Deleted a texture while it was bound.");
}
+ _texture_free_shared_fallback(texture);
+
texture_memory -= driver->texture_get_allocation_size(texture->driver_id);
driver->texture_free(texture->driver_id);
diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h
index 42773fc347..25ba066ceb 100644
--- a/servers/rendering/rendering_device.h
+++ b/servers/rendering/rendering_device.h
@@ -207,6 +207,15 @@ public:
// for a framebuffer to render into it.
struct Texture {
+ struct SharedFallback {
+ uint32_t revision = 1;
+ RDD::TextureID texture;
+ RDG::ResourceTracker *texture_tracker = nullptr;
+ RDD::BufferID buffer;
+ RDG::ResourceTracker *buffer_tracker = nullptr;
+ bool raw_reinterpretation = false;
+ };
+
RDD::TextureID driver_id;
TextureType type = TEXTURE_TYPE_MAX;
@@ -235,6 +244,7 @@ public:
RDG::ResourceTracker *draw_tracker = nullptr;
HashMap<Rect2i, RDG::ResourceTracker *> slice_trackers;
+ SharedFallback *shared_fallback = nullptr;
RDD::TextureSubresourceRange barrier_range() const {
RDD::TextureSubresourceRange r;
@@ -245,6 +255,22 @@ public:
r.layer_count = layers;
return r;
}
+
+ TextureFormat texture_format() const {
+ TextureFormat tf;
+ tf.format = format;
+ tf.width = width;
+ tf.height = height;
+ tf.depth = depth;
+ tf.array_layers = layers;
+ tf.mipmaps = mipmaps;
+ tf.texture_type = type;
+ tf.samples = samples;
+ tf.usage_bits = usage_flags;
+ tf.shareable_formats = allowed_shared_formats;
+ tf.is_resolve_buffer = is_resolve_buffer;
+ return tf;
+ }
};
RID_Owner<Texture> texture_owner;
@@ -252,6 +278,11 @@ public:
Vector<uint8_t> _texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d = false);
Error _texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, bool p_use_setup_queue, bool p_validate_can_update);
+ void _texture_check_shared_fallback(Texture *p_texture);
+ void _texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing);
+ void _texture_free_shared_fallback(Texture *p_texture);
+ void _texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture);
+ void _texture_create_reinterpret_buffer(Texture *p_texture);
public:
struct TextureView {
@@ -916,16 +947,24 @@ private:
RID texture;
};
+ struct SharedTexture {
+ uint32_t writing = 0;
+ RID texture;
+ };
+
LocalVector<AttachableTexture> attachable_textures; // Used for validation.
Vector<RDG::ResourceTracker *> draw_trackers;
Vector<RDG::ResourceUsage> draw_trackers_usage;
HashMap<RID, RDG::ResourceUsage> untracked_usage;
+ LocalVector<SharedTexture> shared_textures_to_update;
InvalidationCallback invalidated_callback = nullptr;
void *invalidated_callback_userdata = nullptr;
};
RID_Owner<UniformSet> uniform_set_owner;
+ void _uniform_set_update_shared(UniformSet *p_uniform_set);
+
public:
RID uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set);
bool uniform_set_is_valid(RID p_uniform_set);
diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp
index be74467340..3b8e3efeb8 100644
--- a/servers/rendering/rendering_device_driver.cpp
+++ b/servers/rendering/rendering_device_driver.cpp
@@ -372,6 +372,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) {
return 1;
case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR:
return 1;
+ case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
+ return true;
default:
ERR_FAIL_V(0);
}
diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h
index e9464ba321..51cefb1888 100644
--- a/servers/rendering/rendering_device_driver.h
+++ b/servers/rendering/rendering_device_driver.h
@@ -220,15 +220,17 @@ public:
enum TextureLayout {
TEXTURE_LAYOUT_UNDEFINED,
- TEXTURE_LAYOUT_GENERAL,
+ TEXTURE_LAYOUT_STORAGE_OPTIMAL,
TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
- TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL,
- TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL,
- TEXTURE_LAYOUT_PREINITIALIZED,
- TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL = 1000164003,
+ TEXTURE_LAYOUT_COPY_SRC_OPTIMAL,
+ TEXTURE_LAYOUT_COPY_DST_OPTIMAL,
+ TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL,
+ TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL,
+ TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL,
+ TEXTURE_LAYOUT_MAX
};
enum TextureAspect {
@@ -284,6 +286,7 @@ public:
virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) = 0;
virtual void texture_unmap(TextureID p_texture) = 0;
virtual BitField<TextureUsageBits> texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) = 0;
+ virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) = 0;
/*****************/
/**** SAMPLER ****/
@@ -317,10 +320,12 @@ public:
PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = (1 << 9),
PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = (1 << 10),
PIPELINE_STAGE_COMPUTE_SHADER_BIT = (1 << 11),
- PIPELINE_STAGE_TRANSFER_BIT = (1 << 12),
+ PIPELINE_STAGE_COPY_BIT = (1 << 12),
PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT = (1 << 13),
+ PIPELINE_STAGE_RESOLVE_BIT = (1 << 14),
PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15),
PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16),
+ PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17),
};
enum BarrierAccessBits {
@@ -335,13 +340,16 @@ public:
BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = (1 << 8),
BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = (1 << 9),
BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = (1 << 10),
- BARRIER_ACCESS_TRANSFER_READ_BIT = (1 << 11),
- BARRIER_ACCESS_TRANSFER_WRITE_BIT = (1 << 12),
+ BARRIER_ACCESS_COPY_READ_BIT = (1 << 11),
+ BARRIER_ACCESS_COPY_WRITE_BIT = (1 << 12),
BARRIER_ACCESS_HOST_READ_BIT = (1 << 13),
BARRIER_ACCESS_HOST_WRITE_BIT = (1 << 14),
BARRIER_ACCESS_MEMORY_READ_BIT = (1 << 15),
BARRIER_ACCESS_MEMORY_WRITE_BIT = (1 << 16),
BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT = (1 << 23),
+ BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 24),
+ BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 25),
+ BARRIER_ACCESS_STORAGE_CLEAR_BIT = (1 << 27),
};
struct MemoryBarrier {
@@ -735,7 +743,9 @@ public:
API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT,
API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP,
API_TRAIT_SECONDARY_VIEWPORT_SCISSOR,
+ API_TRAIT_CLEARS_WITH_COPY_ENGINE,
};
+
enum ShaderChangeInvalidation {
SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS,
// What Vulkan does.
diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp
index c7de5c67cb..221ec72e4a 100644
--- a/servers/rendering/rendering_device_graph.cpp
+++ b/servers/rendering/rendering_device_graph.cpp
@@ -36,7 +36,8 @@
#define PRINT_COMMAND_RECORDING 0
RenderingDeviceGraph::RenderingDeviceGraph() {
- // Default initialization.
+ driver_honors_barriers = false;
+ driver_clears_with_copy_engine = false;
}
RenderingDeviceGraph::~RenderingDeviceGraph() {
@@ -44,7 +45,8 @@ RenderingDeviceGraph::~RenderingDeviceGraph() {
bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) {
switch (p_usage) {
- case RESOURCE_USAGE_TRANSFER_FROM:
+ case RESOURCE_USAGE_COPY_FROM:
+ case RESOURCE_USAGE_RESOLVE_FROM:
case RESOURCE_USAGE_UNIFORM_BUFFER_READ:
case RESOURCE_USAGE_INDIRECT_BUFFER_READ:
case RESOURCE_USAGE_TEXTURE_BUFFER_READ:
@@ -54,7 +56,8 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) {
case RESOURCE_USAGE_TEXTURE_SAMPLE:
case RESOURCE_USAGE_STORAGE_IMAGE_READ:
return false;
- case RESOURCE_USAGE_TRANSFER_TO:
+ case RESOURCE_USAGE_COPY_TO:
+ case RESOURCE_USAGE_RESOLVE_TO:
case RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE:
case RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE:
case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE:
@@ -69,15 +72,19 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) {
RDD::TextureLayout RenderingDeviceGraph::_usage_to_image_layout(ResourceUsage p_usage) {
switch (p_usage) {
- case RESOURCE_USAGE_TRANSFER_FROM:
- return RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL;
- case RESOURCE_USAGE_TRANSFER_TO:
- return RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ case RESOURCE_USAGE_COPY_FROM:
+ return RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL;
+ case RESOURCE_USAGE_COPY_TO:
+ return RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
+ case RESOURCE_USAGE_RESOLVE_FROM:
+ return RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL;
+ case RESOURCE_USAGE_RESOLVE_TO:
+ return RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL;
case RESOURCE_USAGE_TEXTURE_SAMPLE:
return RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
case RESOURCE_USAGE_STORAGE_IMAGE_READ:
case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE:
- return RDD::TEXTURE_LAYOUT_GENERAL;
+ return RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL;
case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE:
return RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE:
@@ -97,10 +104,14 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage
switch (p_usage) {
case RESOURCE_USAGE_NONE:
return RDD::BarrierAccessBits(0);
- case RESOURCE_USAGE_TRANSFER_FROM:
- return RDD::BARRIER_ACCESS_TRANSFER_READ_BIT;
- case RESOURCE_USAGE_TRANSFER_TO:
- return RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT;
+ case RESOURCE_USAGE_COPY_FROM:
+ return RDD::BARRIER_ACCESS_COPY_READ_BIT;
+ case RESOURCE_USAGE_COPY_TO:
+ return RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
+ case RESOURCE_USAGE_RESOLVE_FROM:
+ return RDD::BARRIER_ACCESS_RESOLVE_READ_BIT;
+ case RESOURCE_USAGE_RESOLVE_TO:
+ return RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT;
case RESOURCE_USAGE_UNIFORM_BUFFER_READ:
return RDD::BARRIER_ACCESS_UNIFORM_READ_BIT;
case RESOURCE_USAGE_INDIRECT_BUFFER_READ:
@@ -818,26 +829,27 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC
} break;
case RecordedCommand::TYPE_TEXTURE_CLEAR: {
const RecordedTextureClearCommand *texture_clear_command = reinterpret_cast<const RecordedTextureClearCommand *>(command);
- driver->command_clear_color_texture(r_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range);
+ driver->command_clear_color_texture(r_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range);
} break;
case RecordedCommand::TYPE_TEXTURE_COPY: {
const RecordedTextureCopyCommand *texture_copy_command = reinterpret_cast<const RecordedTextureCopyCommand *>(command);
- driver->command_copy_texture(r_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_copy_command->region);
+ const VectorView<RDD::TextureCopyRegion> command_texture_copy_regions_view(texture_copy_command->texture_copy_regions(), texture_copy_command->texture_copy_regions_count);
+ driver->command_copy_texture(r_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, command_texture_copy_regions_view);
} break;
case RecordedCommand::TYPE_TEXTURE_GET_DATA: {
const RecordedTextureGetDataCommand *texture_get_data_command = reinterpret_cast<const RecordedTextureGetDataCommand *>(command);
const VectorView<RDD::BufferTextureCopyRegion> command_buffer_texture_copy_regions_view(texture_get_data_command->buffer_texture_copy_regions(), texture_get_data_command->buffer_texture_copy_regions_count);
- driver->command_copy_texture_to_buffer(r_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view);
+ driver->command_copy_texture_to_buffer(r_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view);
} break;
case RecordedCommand::TYPE_TEXTURE_RESOLVE: {
const RecordedTextureResolveCommand *texture_resolve_command = reinterpret_cast<const RecordedTextureResolveCommand *>(command);
- driver->command_resolve_texture(r_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap);
+ driver->command_resolve_texture(r_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap);
} break;
case RecordedCommand::TYPE_TEXTURE_UPDATE: {
const RecordedTextureUpdateCommand *texture_update_command = reinterpret_cast<const RecordedTextureUpdateCommand *>(command);
const RecordedBufferToTextureCopy *command_buffer_to_texture_copies = texture_update_command->buffer_to_texture_copies();
for (uint32_t j = 0; j < texture_update_command->buffer_to_texture_copies_count; j++) {
- driver->command_copy_buffer_to_texture(r_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, command_buffer_to_texture_copies[j].region);
+ driver->command_copy_buffer_to_texture(r_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, command_buffer_to_texture_copies[j].region);
}
} break;
case RecordedCommand::TYPE_CAPTURE_TIMESTAMP: {
@@ -1271,6 +1283,7 @@ void RenderingDeviceGraph::initialize(RDD *p_driver, RenderingContextDriver::Dev
}
driver_honors_barriers = driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS);
+ driver_clears_with_copy_engine = driver->api_trait_get(RDD::API_TRAIT_CLEARS_WITH_COPY_ENGINE);
}
void RenderingDeviceGraph::finalize() {
@@ -1321,12 +1334,12 @@ void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker
int32_t command_index;
RecordedBufferClearCommand *command = static_cast<RecordedBufferClearCommand *>(_allocate_command(sizeof(RecordedBufferClearCommand), command_index));
command->type = RecordedCommand::TYPE_BUFFER_CLEAR;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
command->buffer = p_dst;
command->offset = p_offset;
command->size = p_size;
- ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO;
+ ResourceUsage usage = RESOURCE_USAGE_COPY_TO;
_add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command);
}
@@ -1337,13 +1350,13 @@ void RenderingDeviceGraph::add_buffer_copy(RDD::BufferID p_src, ResourceTracker
int32_t command_index;
RecordedBufferCopyCommand *command = static_cast<RecordedBufferCopyCommand *>(_allocate_command(sizeof(RecordedBufferCopyCommand), command_index));
command->type = RecordedCommand::TYPE_BUFFER_COPY;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
command->source = p_src;
command->destination = p_dst;
command->region = p_region;
ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker };
- ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM };
+ ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM };
_add_command_to_graph(trackers, usages, p_src_tracker != nullptr ? 2 : 1, command_index, command);
}
@@ -1352,13 +1365,13 @@ void RenderingDeviceGraph::add_buffer_get_data(RDD::BufferID p_src, ResourceTrac
int32_t command_index;
RecordedBufferGetDataCommand *command = static_cast<RecordedBufferGetDataCommand *>(_allocate_command(sizeof(RecordedBufferGetDataCommand), command_index));
command->type = RecordedCommand::TYPE_BUFFER_GET_DATA;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
command->source = p_src;
command->destination = p_dst;
command->region = p_region;
if (p_src_tracker != nullptr) {
- ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM;
+ ResourceUsage usage = RESOURCE_USAGE_COPY_FROM;
_add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command);
} else {
_add_command_to_graph(nullptr, nullptr, 0, command_index, command);
@@ -1373,7 +1386,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke
int32_t command_index;
RecordedBufferUpdateCommand *command = static_cast<RecordedBufferUpdateCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_BUFFER_UPDATE;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
command->destination = p_dst;
command->buffer_copies_count = p_buffer_copies.size();
@@ -1382,7 +1395,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke
buffer_copies[i] = p_buffer_copies[i];
}
- ResourceUsage buffer_usage = RESOURCE_USAGE_TRANSFER_TO;
+ ResourceUsage buffer_usage = RESOURCE_USAGE_COPY_TO;
_add_command_to_graph(&p_dst_tracker, &buffer_usage, 1, command_index, command);
}
@@ -1710,40 +1723,60 @@ void RenderingDeviceGraph::add_texture_clear(RDD::TextureID p_dst, ResourceTrack
int32_t command_index;
RecordedTextureClearCommand *command = static_cast<RecordedTextureClearCommand *>(_allocate_command(sizeof(RecordedTextureClearCommand), command_index));
command->type = RecordedCommand::TYPE_TEXTURE_CLEAR;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->texture = p_dst;
command->color = p_color;
command->range = p_range;
- ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO;
+ ResourceUsage usage;
+ if (driver_clears_with_copy_engine) {
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
+ usage = RESOURCE_USAGE_COPY_TO;
+ } else {
+ // If the driver is uncapable of using the copy engine for clearing the image (e.g. D3D12), we must either transition the
+ // resource to a render target or a storage image as that's the only two ways it can perform the operation.
+ if (p_dst_tracker->texture_usage & RDD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
+ command->self_stages = RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ usage = RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE;
+ } else {
+ command->self_stages = RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT;
+ usage = RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE;
+ }
+ }
+
_add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command);
}
-void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region) {
+void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView<RDD::TextureCopyRegion> p_texture_copy_regions) {
DEV_ASSERT(p_src_tracker != nullptr);
DEV_ASSERT(p_dst_tracker != nullptr);
int32_t command_index;
- RecordedTextureCopyCommand *command = static_cast<RecordedTextureCopyCommand *>(_allocate_command(sizeof(RecordedTextureCopyCommand), command_index));
+ uint64_t command_size = sizeof(RecordedTextureCopyCommand) + p_texture_copy_regions.size() * sizeof(RDD::TextureCopyRegion);
+ RecordedTextureCopyCommand *command = static_cast<RecordedTextureCopyCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_TEXTURE_COPY;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
command->from_texture = p_src;
command->to_texture = p_dst;
- command->region = p_region;
+ command->texture_copy_regions_count = p_texture_copy_regions.size();
+
+ RDD::TextureCopyRegion *texture_copy_regions = command->texture_copy_regions();
+ for (uint32_t i = 0; i < command->texture_copy_regions_count; i++) {
+ texture_copy_regions[i] = p_texture_copy_regions[i];
+ }
ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker };
- ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM };
+ ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM };
_add_command_to_graph(trackers, usages, 2, command_index, command);
}
-void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView<RDD::BufferTextureCopyRegion> p_buffer_texture_copy_regions) {
+void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView<RDD::BufferTextureCopyRegion> p_buffer_texture_copy_regions, ResourceTracker *p_dst_tracker) {
DEV_ASSERT(p_src_tracker != nullptr);
int32_t command_index;
uint64_t command_size = sizeof(RecordedTextureGetDataCommand) + p_buffer_texture_copy_regions.size() * sizeof(RDD::BufferTextureCopyRegion);
RecordedTextureGetDataCommand *command = static_cast<RecordedTextureGetDataCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_TEXTURE_GET_DATA;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
command->from_texture = p_src;
command->to_buffer = p_dst;
command->buffer_texture_copy_regions_count = p_buffer_texture_copy_regions.size();
@@ -1753,8 +1786,15 @@ void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTr
buffer_texture_copy_regions[i] = p_buffer_texture_copy_regions[i];
}
- ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM;
- _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command);
+ if (p_dst_tracker != nullptr) {
+ // Add the optional destination tracker if it was provided.
+ ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker };
+ ResourceUsage usages[2] = { RESOURCE_USAGE_COPY_TO, RESOURCE_USAGE_COPY_FROM };
+ _add_command_to_graph(trackers, usages, 2, command_index, command);
+ } else {
+ ResourceUsage usage = RESOURCE_USAGE_COPY_FROM;
+ _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command);
+ }
}
void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {
@@ -1764,7 +1804,7 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra
int32_t command_index;
RecordedTextureResolveCommand *command = static_cast<RecordedTextureResolveCommand *>(_allocate_command(sizeof(RecordedTextureResolveCommand), command_index));
command->type = RecordedCommand::TYPE_TEXTURE_RESOLVE;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_RESOLVE_BIT;
command->from_texture = p_src;
command->to_texture = p_dst;
command->src_layer = p_src_layer;
@@ -1773,18 +1813,18 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra
command->dst_mipmap = p_dst_mipmap;
ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker };
- ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM };
+ ResourceUsage usages[2] = { RESOURCE_USAGE_RESOLVE_TO, RESOURCE_USAGE_RESOLVE_FROM };
_add_command_to_graph(trackers, usages, 2, command_index, command);
}
-void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView<RecordedBufferToTextureCopy> p_buffer_copies) {
+void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView<RecordedBufferToTextureCopy> p_buffer_copies, VectorView<ResourceTracker *> p_buffer_trackers) {
DEV_ASSERT(p_dst_tracker != nullptr);
int32_t command_index;
uint64_t command_size = sizeof(RecordedTextureUpdateCommand) + p_buffer_copies.size() * sizeof(RecordedBufferToTextureCopy);
RecordedTextureUpdateCommand *command = static_cast<RecordedTextureUpdateCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_TEXTURE_UPDATE;
- command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
+ command->self_stages = RDD::PIPELINE_STAGE_COPY_BIT;
command->to_texture = p_dst;
command->buffer_to_texture_copies_count = p_buffer_copies.size();
@@ -1793,8 +1833,25 @@ void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTrac
buffer_to_texture_copies[i] = p_buffer_copies[i];
}
- ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO;
- _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command);
+ if (p_buffer_trackers.size() > 0) {
+ // Add the optional buffer trackers if they were provided.
+ thread_local LocalVector<ResourceTracker *> trackers;
+ thread_local LocalVector<ResourceUsage> usages;
+ trackers.clear();
+ usages.clear();
+ for (uint32_t i = 0; i < p_buffer_trackers.size(); i++) {
+ trackers.push_back(p_buffer_trackers[i]);
+ usages.push_back(RESOURCE_USAGE_COPY_FROM);
+ }
+
+ trackers.push_back(p_dst_tracker);
+ usages.push_back(RESOURCE_USAGE_COPY_TO);
+
+ _add_command_to_graph(trackers.ptr(), usages.ptr(), trackers.size(), command_index, command);
+ } else {
+ ResourceUsage usage = RESOURCE_USAGE_COPY_TO;
+ _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command);
+ }
}
void RenderingDeviceGraph::add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index) {
diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h
index a96382e0cc..baa15f63f6 100644
--- a/servers/rendering/rendering_device_graph.h
+++ b/servers/rendering/rendering_device_graph.h
@@ -129,8 +129,10 @@ public:
enum ResourceUsage {
RESOURCE_USAGE_NONE,
- RESOURCE_USAGE_TRANSFER_FROM,
- RESOURCE_USAGE_TRANSFER_TO,
+ RESOURCE_USAGE_COPY_FROM,
+ RESOURCE_USAGE_COPY_TO,
+ RESOURCE_USAGE_RESOLVE_FROM,
+ RESOURCE_USAGE_RESOLVE_TO,
RESOURCE_USAGE_UNIFORM_BUFFER_READ,
RESOURCE_USAGE_INDIRECT_BUFFER_READ,
RESOURCE_USAGE_TEXTURE_BUFFER_READ,
@@ -161,6 +163,7 @@ public:
RDD::BufferID buffer_driver_id;
RDD::TextureID texture_driver_id;
RDD::TextureSubresourceRange texture_subresources;
+ uint32_t texture_usage = 0;
int32_t texture_slice_command_index = -1;
ResourceTracker *parent = nullptr;
ResourceTracker *dirty_shared_list = nullptr;
@@ -337,7 +340,15 @@ private:
struct RecordedTextureCopyCommand : RecordedCommand {
RDD::TextureID from_texture;
RDD::TextureID to_texture;
- RDD::TextureCopyRegion region;
+ uint32_t texture_copy_regions_count = 0;
+
+ _FORCE_INLINE_ RDD::TextureCopyRegion *texture_copy_regions() {
+ return reinterpret_cast<RDD::TextureCopyRegion *>(&this[1]);
+ }
+
+ _FORCE_INLINE_ const RDD::TextureCopyRegion *texture_copy_regions() const {
+ return reinterpret_cast<const RDD::TextureCopyRegion *>(&this[1]);
+ }
};
struct RecordedTextureGetDataCommand : RecordedCommand {
@@ -596,7 +607,8 @@ private:
int32_t command_synchronization_index = -1;
bool command_synchronization_pending = false;
BarrierGroup barrier_group;
- bool driver_honors_barriers = false;
+ bool driver_honors_barriers : 1;
+ bool driver_clears_with_copy_engine : 1;
WorkaroundsState workarounds_state;
TightLocalVector<Frame> frames;
uint32_t frame = 0;
@@ -672,10 +684,10 @@ public:
void add_draw_list_usages(VectorView<ResourceTracker *> p_trackers, VectorView<ResourceUsage> p_usages);
void add_draw_list_end();
void add_texture_clear(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, const Color &p_color, const RDD::TextureSubresourceRange &p_range);
- void add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region);
- void add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView<RDD::BufferTextureCopyRegion> p_buffer_texture_copy_regions);
+ void add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView<RDD::TextureCopyRegion> p_texture_copy_regions);
+ void add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView<RDD::BufferTextureCopyRegion> p_buffer_texture_copy_regions, ResourceTracker *p_dst_tracker = nullptr);
void add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap);
- void add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView<RecordedBufferToTextureCopy> p_buffer_copies);
+ void add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView<RecordedBufferToTextureCopy> p_buffer_copies, VectorView<ResourceTracker *> p_buffer_trackers = VectorView<ResourceTracker *>());
void add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index);
void add_synchronization();
void begin_label(const String &p_label_name, const Color &p_color);