summaryrefslogtreecommitdiffstats
path: root/drivers/d3d12/rendering_device_driver_d3d12.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/d3d12/rendering_device_driver_d3d12.cpp')
-rw-r--r--drivers/d3d12/rendering_device_driver_d3d12.cpp922
1 files changed, 597 insertions, 325 deletions
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp
index 9407826ebf..08ee12991a 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.cpp
+++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp
@@ -538,15 +538,6 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso
#endif
ResourceInfo::States *res_states = p_resource->states_ptr;
-
- if (p_new_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
- if (unlikely(!res_states->xfamily_fallback.subresources_dirty.is_empty())) {
- uint32_t subres_qword = p_subresource / 64;
- uint64_t subres_mask = (uint64_t(1) << (p_subresource % 64));
- res_states->xfamily_fallback.subresources_dirty[subres_qword] |= subres_mask;
- }
- }
-
D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource];
// Transitions can be considered redundant if the current state has all the bits of the new state.
@@ -869,7 +860,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
// but also if you give a rounded size at that point because it will extend beyond the
// memory of the resource. Therefore, it seems the only way is to create it with a
// rounded size.
- CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
+ CD3DX12_RESOURCE_DESC1 resource_desc = CD3DX12_RESOURCE_DESC1::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
if (p_usage.has_flag(RDD::BUFFER_USAGE_STORAGE_BIT)) {
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
} else {
@@ -878,7 +869,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
- D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
switch (p_allocation_type) {
case MEMORY_ALLOCATION_TYPE_CPU: {
bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT);
@@ -886,7 +876,6 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
if (is_src && !is_dst) {
// Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM.
allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
- initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
}
if (is_dst && !is_src) {
// Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads.
@@ -904,13 +893,27 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
ComPtr<ID3D12Resource> buffer;
ComPtr<D3D12MA::Allocation> allocation;
- HRESULT res = allocator->CreateResource(
- &allocation_desc,
- &resource_desc,
- initial_state,
- nullptr,
- allocation.GetAddressOf(),
- IID_PPV_ARGS(buffer.GetAddressOf()));
+ HRESULT res;
+ if (barrier_capabilities.enhanced_barriers_supported) {
+ res = allocator->CreateResource3(
+ &allocation_desc,
+ &resource_desc,
+ D3D12_BARRIER_LAYOUT_UNDEFINED,
+ nullptr,
+ 0,
+ nullptr,
+ allocation.GetAddressOf(),
+ IID_PPV_ARGS(buffer.GetAddressOf()));
+ } else {
+ res = allocator->CreateResource(
+ &allocation_desc,
+ reinterpret_cast<const D3D12_RESOURCE_DESC *>(&resource_desc),
+ D3D12_RESOURCE_STATE_COMMON,
+ nullptr,
+ allocation.GetAddressOf(),
+ IID_PPV_ARGS(buffer.GetAddressOf()));
+ }
+
ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + vformat("0x%08ux", (uint64_t)res) + ".");
// Bookkeep.
@@ -919,11 +922,10 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
buf_info->resource = buffer.Get();
buf_info->owner_info.resource = buffer;
buf_info->owner_info.allocation = allocation;
- buf_info->owner_info.states.subresource_states.push_back(initial_state);
+ buf_info->owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_COMMON);
buf_info->states_ptr = &buf_info->owner_info.states;
buf_info->size = p_size;
buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
- buf_info->flags.is_for_upload = allocation_desc.HeapType == D3D12_HEAP_TYPE_UPLOAD;
return BufferID(buf_info);
}
@@ -1052,8 +1054,7 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, BitFi
if (p_aspect_bits.has_flag(TEXTURE_ASPECT_DEPTH_BIT)) {
DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX);
aspect = TEXTURE_ASPECT_DEPTH;
- }
- if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) {
+ } else if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) {
DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX);
aspect = TEXTURE_ASPECT_STENCIL;
}
@@ -1080,6 +1081,10 @@ UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, Textu
}
}
+UINT RenderingDeviceDriverD3D12::_compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset) {
+ return D3D12CalcSubresource(p_layers.mipmap, p_layers.base_layer + p_layer_offset, _compute_plane_slice(p_texture->format, p_layers.aspect), p_texture->desc.MipLevels, p_texture->desc.ArraySize());
+}
+
void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info) {
uint32_t planes = 1;
if ((p_tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
@@ -1117,6 +1122,64 @@ void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo
}
}
+bool RenderingDeviceDriverD3D12::_unordered_access_supported_by_format(DataFormat p_format) {
+ switch (p_format) {
+ case DATA_FORMAT_R4G4_UNORM_PACK8:
+ case DATA_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case DATA_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case DATA_FORMAT_R5G6B5_UNORM_PACK16:
+ case DATA_FORMAT_B5G6R5_UNORM_PACK16:
+ case DATA_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case DATA_FORMAT_B5G5R5A1_UNORM_PACK16:
+ case DATA_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case DATA_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SNORM_PACK32:
+ case DATA_FORMAT_A8B8G8R8_USCALED_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SSCALED_PACK32:
+ case DATA_FORMAT_A8B8G8R8_UINT_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SINT_PACK32:
+ case DATA_FORMAT_A8B8G8R8_SRGB_PACK32:
+ case DATA_FORMAT_A2R10G10B10_UNORM_PACK32:
+ case DATA_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case DATA_FORMAT_A2R10G10B10_USCALED_PACK32:
+ case DATA_FORMAT_A2R10G10B10_SSCALED_PACK32:
+ case DATA_FORMAT_A2R10G10B10_UINT_PACK32:
+ case DATA_FORMAT_A2R10G10B10_SINT_PACK32:
+ case DATA_FORMAT_A2B10G10R10_UNORM_PACK32:
+ case DATA_FORMAT_A2B10G10R10_SNORM_PACK32:
+ case DATA_FORMAT_A2B10G10R10_USCALED_PACK32:
+ case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32:
+ case DATA_FORMAT_A2B10G10R10_UINT_PACK32:
+ case DATA_FORMAT_A2B10G10R10_SINT_PACK32:
+ case DATA_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ case DATA_FORMAT_X8_D24_UNORM_PACK32:
+ case DATA_FORMAT_R10X6_UNORM_PACK16:
+ case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16:
+ case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
+ case DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
+ case DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
+ case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+ case DATA_FORMAT_R12X4_UNORM_PACK16:
+ case DATA_FORMAT_R12X4G12X4_UNORM_2PACK16:
+ case DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
+ case DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
+ case DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
+ case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
+ case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+ return false;
+ default:
+ return true;
+ }
+}
+
RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p_format, const TextureView &p_view) {
// Using D3D12_RESOURCE_DESC1. Thanks to the layout, it's sliceable down to D3D12_RESOURCE_DESC if needed.
CD3DX12_RESOURCE_DESC1 resource_desc = {};
@@ -1137,12 +1200,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
resource_desc.Format = RD_TO_D3D12_FORMAT[p_format.format].family;
// If views of different families are wanted, special setup is needed for proper sharing among them.
- // Two options here:
- // 1. If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA).
- // 2. Otherwise, fall back to an approach based on having multiple versions of the resource and copying as needed. [[CROSS_FAMILY_FALLBACK]]
+ // If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA).
if (p_format.shareable_formats.size() && format_capabilities.relaxed_casting_supported) {
relaxed_casting_available = true;
- relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size());
+ relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size() + 1);
relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format;
relaxed_casting_format_count++;
}
@@ -1156,9 +1217,9 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
if (RD_TO_D3D12_FORMAT[curr_format].family != RD_TO_D3D12_FORMAT[p_format.format].family) {
cross_family_sharing = true;
- if (!relaxed_casting_available) {
- break;
- }
+ }
+
+ if (relaxed_casting_available) {
relaxed_casting_formats[relaxed_casting_format_count] = RD_TO_D3D12_FORMAT[curr_format].general_format;
relaxed_casting_format_count++;
}
@@ -1185,7 +1246,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
if ((p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
} else {
- if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT)) {
+ if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT) && _unordered_access_supported_by_format(p_format.format)) {
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // For clearing via UAV.
}
}
@@ -1242,17 +1303,19 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
D3D12_CLEAR_VALUE *clear_value_ptr = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : nullptr;
{
HRESULT res = E_FAIL;
- if (cross_family_sharing && relaxed_casting_available) {
+ if (barrier_capabilities.enhanced_barriers_supported || (cross_family_sharing && relaxed_casting_available)) {
+ // Create with undefined layout if enhanced barriers are supported. Leave as common otherwise for interop with legacy barriers.
+ D3D12_BARRIER_LAYOUT initial_layout = barrier_capabilities.enhanced_barriers_supported ? D3D12_BARRIER_LAYOUT_UNDEFINED : D3D12_BARRIER_LAYOUT_COMMON;
res = allocator->CreateResource3(
&allocation_desc,
&resource_desc,
- D3D12_BARRIER_LAYOUT_COMMON, // Needed for barrier interop.
+ initial_layout,
clear_value_ptr,
relaxed_casting_format_count,
relaxed_casting_formats,
allocation.GetAddressOf(),
IID_PPV_ARGS(main_texture.GetAddressOf()));
- initial_state = D3D12_RESOURCE_STATE_COMMON; // Needed for barrier interop.
+ initial_state = D3D12_RESOURCE_STATE_COMMON;
} else {
res = allocator->CreateResource(
&allocation_desc,
@@ -1353,7 +1416,10 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
tex_info->mipmaps = resource_desc.MipLevels;
tex_info->view_descs.srv = srv_desc;
tex_info->view_descs.uav = uav_desc;
- if ((p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) {
+
+ if (!barrier_capabilities.enhanced_barriers_supported && (p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) {
+ // Fallback to clear resources when they're first used in a uniform set. Not necessary if enhanced barriers
+ // are supported, as the discard flag will be used instead when transitioning from an undefined layout.
textures_pending_clear.add(&tex_info->pending_clear);
}
@@ -1380,45 +1446,8 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex
ComPtr<ID3D12Resource> new_texture;
ComPtr<D3D12MA::Allocation> new_allocation;
- ID3D12Resource *resource = nullptr;
+ ID3D12Resource *resource = owner_tex_info->resource;
CD3DX12_RESOURCE_DESC new_tex_resource_desc = owner_tex_info->desc;
- bool cross_family = RD_TO_D3D12_FORMAT[p_view.format].family != RD_TO_D3D12_FORMAT[owner_tex_info->format].family;
- if (cross_family && !format_capabilities.relaxed_casting_supported) {
- // [[CROSS_FAMILY_FALLBACK]].
- // We have to create a new texture of the alternative format.
-
- D3D12MA::ALLOCATION_DESC allocation_desc = {};
- allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
- allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
-
- if (p_slice_type != -1) {
-#ifdef DEV_ENABLED
- // Actual slicing is not contemplated. If ever needed, let's at least realize.
- if (p_slice_type != -1) {
- uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers;
- uint32_t slice_subresorce_count = p_mipmaps * p_layers;
- DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count);
- }
-#endif
- new_tex_resource_desc.DepthOrArraySize = p_layers;
- new_tex_resource_desc.MipLevels = p_mipmaps;
- }
- new_tex_resource_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].family;
- new_tex_resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; // Alternative formats can only be used as SRVs.
-
- HRESULT res = allocator->CreateResource(
- &allocation_desc,
- &new_tex_resource_desc,
- D3D12_RESOURCE_STATE_COPY_DEST,
- nullptr,
- new_allocation.GetAddressOf(),
- IID_PPV_ARGS(new_texture.GetAddressOf()));
- ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), vformat("D3D12MA::CreateResource failed with error 0x%08ux.", (uint64_t)res));
-
- resource = new_texture.Get();
- } else {
- resource = owner_tex_info->resource;
- }
// Describe views.
@@ -1528,58 +1557,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex
TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);
tex_info->resource = resource;
- if (new_texture.Get()) {
- // [[CROSS_FAMILY_FALLBACK]].
-
- DEV_ASSERT(cross_family && !format_capabilities.relaxed_casting_supported);
-
- uint32_t new_texture_subresorce_count = owner_tex_info->mipmaps * owner_tex_info->layers;
-#ifdef DEV_ENABLED
- // Actual slicing is not contemplated. If ever needed, let's at least realize.
- if (p_slice_type != -1) {
- uint32_t slice_subresorce_count = p_mipmaps * p_layers;
- DEV_ASSERT(new_texture_subresorce_count == slice_subresorce_count);
- }
-#endif
-
- tex_info->owner_info.resource = new_texture;
- tex_info->owner_info.allocation = new_allocation;
- tex_info->owner_info.states.subresource_states.resize(new_texture_subresorce_count);
- for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) {
- tex_info->owner_info.states.subresource_states[i] = D3D12_RESOURCE_STATE_COPY_DEST;
- }
- tex_info->states_ptr = &tex_info->owner_info.states;
-
- ResourceInfo::States::CrossFamillyFallback &xfamily = owner_tex_info->owner_info.states.xfamily_fallback;
- if (xfamily.subresources_dirty.is_empty()) {
- uint32_t items_required = STEPIFY(new_texture_subresorce_count, sizeof(uint64_t)) / sizeof(uint64_t);
- xfamily.subresources_dirty.resize(items_required);
- memset(xfamily.subresources_dirty.ptr(), 255, sizeof(uint64_t) * xfamily.subresources_dirty.size());
-
- // Create buffer for non-direct copy if it's a format not supporting reinterpret-copy.
- DEV_ASSERT(!xfamily.interim_buffer.Get());
- if (owner_tex_info->format == DATA_FORMAT_R16_UINT && p_view.format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) {
- uint32_t row_pitch = STEPIFY(owner_tex_info->desc.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
- uint32_t buffer_size = sizeof(uint16_t) * row_pitch * owner_tex_info->desc.Height * owner_tex_info->desc.Depth();
- CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(STEPIFY(buffer_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));
- resource_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
-
- D3D12MA::ALLOCATION_DESC allocation_desc = {};
- allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
-
- HRESULT res = allocator->CreateResource(
- &allocation_desc,
- &resource_desc,
- D3D12_RESOURCE_STATE_COPY_SOURCE, // Makes the code that makes the copy easier.
- nullptr,
- xfamily.interim_buffer_alloc.GetAddressOf(),
- IID_PPV_ARGS(xfamily.interim_buffer.GetAddressOf()));
- ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
- }
- }
- } else {
- tex_info->states_ptr = owner_tex_info->states_ptr;
- }
+ tex_info->states_ptr = owner_tex_info->states_ptr;
tex_info->format = p_view.format;
tex_info->desc = new_tex_resource_desc;
if (p_slice_type == -1) {
@@ -1710,6 +1688,28 @@ BitField<RDD::TextureUsageBits> RenderingDeviceDriverD3D12::texture_get_usages_s
return supported;
}
+bool RenderingDeviceDriverD3D12::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {
+ r_raw_reinterpretation = false;
+
+ if (format_capabilities.relaxed_casting_supported) {
+ // Relaxed casting is supported, there should be no need to check for format family compatibility.
+ return true;
+ } else {
+ TextureInfo *tex_info = (TextureInfo *)p_texture.id;
+ if (tex_info->format == DATA_FORMAT_R16_UINT && p_format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) {
+ // Specific cases that require buffer reinterpretation.
+ r_raw_reinterpretation = true;
+ return false;
+ } else if (RD_TO_D3D12_FORMAT[tex_info->format].family != RD_TO_D3D12_FORMAT[p_format].family) {
+ // Format family is different but copying resources directly is possible.
+ return false;
+ } else {
+ // Format family is the same and the view can just cast the format.
+ return true;
+ }
+ }
+}
+
/*****************/
/**** SAMPLER ****/
/*****************/
@@ -1842,20 +1842,328 @@ void RenderingDeviceDriverD3D12::vertex_format_free(VertexFormatID p_vertex_form
/**** BARRIERS ****/
/******************/
-void RenderingDeviceDriverD3D12::command_pipeline_barrier(
- CommandBufferID p_cmd_buffer,
- BitField<RDD::PipelineStageBits> p_src_stages,
- BitField<RDD::PipelineStageBits> p_dst_stages,
+static D3D12_BARRIER_ACCESS _rd_texture_layout_access_mask(RDD::TextureLayout p_texture_layout) {
+ switch (p_texture_layout) {
+ case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_RENDER_TARGET;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
+ case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_COPY_SOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_COPY_DEST;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_RESOLVE_DEST;
+ case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
+ default:
+ return D3D12_BARRIER_ACCESS_NO_ACCESS;
+ }
+}
+
+static void _rd_access_to_d3d12_and_mask(BitField<RDD::BarrierAccessBits> p_access, RDD::TextureLayout p_texture_layout, D3D12_BARRIER_ACCESS &r_access, D3D12_BARRIER_SYNC &r_sync_mask) {
+ r_access = D3D12_BARRIER_ACCESS_COMMON;
+ r_sync_mask = D3D12_BARRIER_SYNC_NONE;
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT;
+ r_sync_mask |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_INDEX_BUFFER;
+ r_sync_mask |= D3D12_BARRIER_SYNC_INDEX_INPUT | D3D12_BARRIER_SYNC_DRAW;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER;
+ r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_UNIFORM_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER;
+ r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING |
+ D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_COPY_SOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_COPY;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_COPY_DEST;
+ r_sync_mask |= D3D12_BARRIER_SYNC_COPY;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RESOLVE_DEST;
+ r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+
+ const D3D12_BARRIER_SYNC unordered_access_mask = D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING |
+ D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ r_sync_mask |= unordered_access_mask;
+ }
+
+ // These access bits only have compatibility with certain layouts unlike in Vulkan where they imply specific operations in the same layout.
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ r_sync_mask |= unordered_access_mask;
+ } else if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT)) {
+ if (p_texture_layout == RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL) {
+ // Unordered access must be enforced if the texture is using the storage layout.
+ r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+ r_sync_mask |= unordered_access_mask;
+ } else {
+ r_access |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;
+ }
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET;
+ }
+
+ if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL;
+ } else if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT)) {
+ r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
+ r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL;
+ }
+}
+
+static void _rd_stages_to_d3d12(BitField<RDD::PipelineStageBits> p_stages, D3D12_BARRIER_SYNC &r_sync) {
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+ r_sync = D3D12_BARRIER_SYNC_ALL;
+ } else {
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_INDEX_INPUT;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
+ // There's no granularity for tessellation or geometry stages. The specification defines it as part of vertex shading.
+ r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT)) {
+ // Covers both read and write operations for depth stencil.
+ r_sync |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_RENDER_TARGET;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_COPY;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_RESOLVE;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
+ }
+
+ if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT)) {
+ r_sync |= D3D12_BARRIER_SYNC_DRAW;
+ }
+ }
+}
+
+static void _rd_stages_and_access_to_d3d12(BitField<RDD::PipelineStageBits> p_stages, RDD::TextureLayout p_texture_layout, BitField<RDD::BarrierAccessBits> p_access, D3D12_BARRIER_SYNC &r_sync, D3D12_BARRIER_ACCESS &r_access) {
+ D3D12_BARRIER_SYNC sync_mask;
+ r_sync = D3D12_BARRIER_SYNC_NONE;
+
+ if (p_texture_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) {
+ // Undefined texture layouts are a special case where no access bits or synchronization scopes are allowed.
+ r_access = D3D12_BARRIER_ACCESS_NO_ACCESS;
+ return;
+ }
+
+ // Convert access bits to the D3D12 barrier access bits.
+ _rd_access_to_d3d12_and_mask(p_access, p_texture_layout, r_access, sync_mask);
+
+ if (p_texture_layout != RDD::TEXTURE_LAYOUT_MAX) {
+ // Only allow the access bits compatible with the texture layout.
+ r_access &= _rd_texture_layout_access_mask(p_texture_layout);
+ }
+
+ // Convert stage bits to the D3D12 synchronization scope bits.
+ _rd_stages_to_d3d12(p_stages, r_sync);
+
+ // Only enable synchronization stages compatible with the access bits that were used.
+ r_sync &= sync_mask;
+
+ if (r_sync == D3D12_BARRIER_SYNC_NONE) {
+ if (p_access.is_empty()) {
+ // No valid synchronization scope was defined and no access in particular is required.
+ r_access = D3D12_BARRIER_ACCESS_NO_ACCESS;
+ } else {
+ // Access is required but the synchronization scope wasn't compatible. We fall back to the global synchronization scope and access.
+ r_sync = D3D12_BARRIER_SYNC_ALL;
+ r_access = D3D12_BARRIER_ACCESS_COMMON;
+ }
+ }
+}
+
+static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::TextureLayout p_texture_layout) {
+ switch (p_texture_layout) {
+ case RDD::TEXTURE_LAYOUT_UNDEFINED:
+ return D3D12_BARRIER_LAYOUT_UNDEFINED;
+ case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS;
+ case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_RENDER_TARGET;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;
+ case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ;
+ case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_SHADER_RESOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_COPY_SOURCE;
+ case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_COPY_DEST;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE;
+ case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_RESOLVE_DEST;
+ case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL:
+ return D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE;
+ default:
+ DEV_ASSERT(false && "Unknown texture layout.");
+ return D3D12_BARRIER_LAYOUT_UNDEFINED;
+ }
+}
+
+void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_buffer,
+ BitField<PipelineStageBits> p_src_stages,
+ BitField<PipelineStageBits> p_dst_stages,
VectorView<RDD::MemoryBarrier> p_memory_barriers,
VectorView<RDD::BufferBarrier> p_buffer_barriers,
VectorView<RDD::TextureBarrier> p_texture_barriers) {
- if (p_src_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT) && p_dst_stages.has_flag(PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
- // Looks like the intent is a full barrier.
- // In the resource barriers world, we can force a full barrier by discarding some resource, as per
- // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#synchronous-copy-discard-and-resolve.
- const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
- cmd_buf_info->cmd_list->DiscardResource(frames[frame_idx].aux_resource->GetResource(), nullptr);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ // Enhanced barriers are a requirement for this function.
+ return;
+ }
+
+ if (p_memory_barriers.size() == 0 && p_buffer_barriers.size() == 0 && p_texture_barriers.size() == 0) {
+ // At least one barrier must be present in the arguments.
+ return;
}
+
+ // The command list must support the required interface.
+ const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffer.id);
+ ID3D12GraphicsCommandList7 *cmd_list_7 = nullptr;
+ HRESULT res = cmd_buf_info->cmd_list->QueryInterface(IID_PPV_ARGS(&cmd_list_7));
+ ERR_FAIL_COND(FAILED(res));
+
+ // Convert the RDD barriers to D3D12 enhanced barriers.
+ thread_local LocalVector<D3D12_GLOBAL_BARRIER> global_barriers;
+ thread_local LocalVector<D3D12_BUFFER_BARRIER> buffer_barriers;
+ thread_local LocalVector<D3D12_TEXTURE_BARRIER> texture_barriers;
+ global_barriers.clear();
+ buffer_barriers.clear();
+ texture_barriers.clear();
+
+ D3D12_GLOBAL_BARRIER global_barrier = {};
+ for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {
+ const MemoryBarrier &memory_barrier = p_memory_barriers[i];
+ _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.src_access, global_barrier.SyncBefore, global_barrier.AccessBefore);
+ _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.dst_access, global_barrier.SyncAfter, global_barrier.AccessAfter);
+ global_barriers.push_back(global_barrier);
+ }
+
+ D3D12_BUFFER_BARRIER buffer_barrier_d3d12 = {};
+ buffer_barrier_d3d12.Offset = 0;
+ buffer_barrier_d3d12.Size = UINT64_MAX; // The specification says this must be the size of the buffer barrier.
+ for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) {
+ const BufferBarrier &buffer_barrier_rd = p_buffer_barriers[i];
+ const BufferInfo *buffer_info = (const BufferInfo *)(buffer_barrier_rd.buffer.id);
+ _rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.src_access, buffer_barrier_d3d12.SyncBefore, buffer_barrier_d3d12.AccessBefore);
+ _rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.dst_access, buffer_barrier_d3d12.SyncAfter, buffer_barrier_d3d12.AccessAfter);
+ buffer_barrier_d3d12.pResource = buffer_info->resource;
+ buffer_barriers.push_back(buffer_barrier_d3d12);
+ }
+
+ D3D12_TEXTURE_BARRIER texture_barrier_d3d12 = {};
+ for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {
+ const TextureBarrier &texture_barrier_rd = p_texture_barriers[i];
+ const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id);
+ _rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore);
+ _rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter);
+ texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout);
+ texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout);
+ texture_barrier_d3d12.pResource = texture_info->resource;
+ texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap;
+ texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count;
+ texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer;
+ texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count;
+ texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect);
+ texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format);
+ texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE;
+ texture_barriers.push_back(texture_barrier_d3d12);
+ }
+
+ // Define the barrier groups and execute.
+ D3D12_BARRIER_GROUP barrier_groups[3] = {};
+ barrier_groups[0].Type = D3D12_BARRIER_TYPE_GLOBAL;
+ barrier_groups[1].Type = D3D12_BARRIER_TYPE_BUFFER;
+ barrier_groups[2].Type = D3D12_BARRIER_TYPE_TEXTURE;
+ barrier_groups[0].NumBarriers = global_barriers.size();
+ barrier_groups[1].NumBarriers = buffer_barriers.size();
+ barrier_groups[2].NumBarriers = texture_barriers.size();
+ barrier_groups[0].pGlobalBarriers = global_barriers.ptr();
+ barrier_groups[1].pBufferBarriers = buffer_barriers.ptr();
+ barrier_groups[2].pTextureBarriers = texture_barriers.ptr();
+ cmd_list_7->Barrier(ARRAY_SIZE(barrier_groups), barrier_groups);
}
/****************/
@@ -3464,6 +3772,8 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect
zstd_size = STEPIFY(zstd_size, 4);
read_offset += zstd_size;
ERR_FAIL_COND_V(read_offset > binsize, ShaderID());
+
+ r_shader_desc.stages.push_back(ShaderStage(stage));
}
const uint8_t *root_sig_data_ptr = binptr + read_offset;
@@ -3795,6 +4105,10 @@ void RenderingDeviceDriverD3D12::uniform_set_free(UniformSetID p_uniform_set) {
// ----- COMMANDS -----
void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
+ if (barrier_capabilities.enhanced_barriers_supported) {
+ return;
+ }
+
// Perform pending blackouts.
{
SelfList<TextureInfo> *E = textures_pending_clear.first();
@@ -3802,7 +4116,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff
TextureSubresourceRange subresources;
subresources.layer_count = E->self()->layers;
subresources.mipmap_count = E->self()->mipmaps;
- command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_GENERAL, Color(), subresources);
+ command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_UNDEFINED, Color(), subresources);
SelfList<TextureInfo> *next = E->next();
E->remove_from_list();
@@ -3935,34 +4249,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff
for (uint32_t i = 0; i < tex_info->layers; i++) {
for (uint32_t j = 0; j < tex_info->mipmaps; j++) {
uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize());
-
- if ((wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) {
- // [[CROSS_FAMILY_FALLBACK]].
- if (tex_info->owner_info.resource && tex_info->main_texture && tex_info->main_texture != tex_info) {
- uint32_t subres_qword = subresource / 64;
- uint64_t subres_mask = (uint64_t(1) << (subresource % 64));
- if ((tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] & subres_mask)) {
- // Prepare for copying the write-to texture to this one, if out-of-date.
- _resource_transition_batch(tex_info->main_texture, subresource, planes, D3D12_RESOURCE_STATE_COPY_SOURCE);
- _resource_transition_batch(tex_info, subresource, planes, D3D12_RESOURCE_STATE_COPY_DEST);
-
- CommandBufferInfo::FamilyFallbackCopy ffc;
- ffc.texture = tex_info;
- ffc.subresource = subresource;
- ffc.mipmap = j;
- ffc.dst_wanted_state = wanted_state;
-
- CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
- cmd_buf_info->family_fallback_copies.resize(cmd_buf_info->family_fallback_copies.size() + 1);
- cmd_buf_info->family_fallback_copies[cmd_buf_info->family_fallback_copy_count] = ffc;
- cmd_buf_info->family_fallback_copy_count++;
-
- tex_info->main_texture->states_ptr->xfamily_fallback.subresources_dirty[subres_qword] &= ~subres_mask;
- }
- continue;
- }
- }
-
_resource_transition_batch(tex_info, subresource, planes, wanted_state);
}
}
@@ -3974,55 +4260,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff
if (p_set_index == shader_info_in->sets.size() - 1) {
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
_resource_transitions_flush(cmd_buf_info->cmd_list.Get());
-
- // [[CROSS_FAMILY_FALLBACK]].
- for (uint32_t i = 0; i < cmd_buf_info->family_fallback_copy_count; i++) {
- const CommandBufferInfo::FamilyFallbackCopy &ffc = cmd_buf_info->family_fallback_copies[i];
-
- D3D12_TEXTURE_COPY_LOCATION dst_tex = {};
- dst_tex.pResource = ffc.texture->resource;
- dst_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
- dst_tex.SubresourceIndex = ffc.subresource;
-
- D3D12_TEXTURE_COPY_LOCATION src_tex = {};
- src_tex.pResource = ffc.texture->main_texture->resource;
- src_tex.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
- src_tex.SubresourceIndex = ffc.subresource;
-
- const ResourceInfo::States::CrossFamillyFallback &xfamily = ffc.texture->main_texture->owner_info.states.xfamily_fallback;
- if (xfamily.interim_buffer.Get()) {
- // Must copy via a buffer due to reinterpret-copy known not to be available for these data types.
- D3D12_TEXTURE_COPY_LOCATION buf_loc = {};
- buf_loc.pResource = xfamily.interim_buffer.Get();
- buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
- buf_loc.PlacedFootprint.Offset = 0;
- buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->main_texture->desc.Format;
- buf_loc.PlacedFootprint.Footprint.Width = MAX(1u, ffc.texture->main_texture->desc.Width >> ffc.mipmap);
- buf_loc.PlacedFootprint.Footprint.Height = MAX(1u, ffc.texture->main_texture->desc.Height >> ffc.mipmap);
- buf_loc.PlacedFootprint.Footprint.Depth = MAX(1u, (uint32_t)ffc.texture->main_texture->desc.Depth() >> ffc.mipmap);
- buf_loc.PlacedFootprint.Footprint.RowPitch = STEPIFY(buf_loc.PlacedFootprint.Footprint.Width * sizeof(uint16_t), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-
- D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST);
- cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier);
-
- cmd_buf_info->cmd_list->CopyTextureRegion(&buf_loc, 0, 0, 0, &src_tex, nullptr);
-
- barrier = CD3DX12_RESOURCE_BARRIER::Transition(xfamily.interim_buffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
- cmd_buf_info->cmd_list->ResourceBarrier(1, &barrier);
-
- buf_loc.PlacedFootprint.Footprint.Format = ffc.texture->desc.Format;
- cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &buf_loc, nullptr);
- } else {
- // Direct copy is possible.
- cmd_buf_info->cmd_list->CopyTextureRegion(&dst_tex, 0, 0, 0, &src_tex, nullptr);
- }
-
- // Set the specific SRV state we wanted from the beginning to the alternative version of the texture.
- _resource_transition_batch(ffc.texture, ffc.subresource, 1, ffc.dst_wanted_state);
- }
- cmd_buf_info->family_fallback_copy_count = 0;
-
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
}
}
@@ -4299,8 +4536,10 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff
}
}
- _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
@@ -4340,9 +4579,11 @@ void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffe
BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id;
BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id;
- _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
- _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
+ _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
for (uint32_t i = 0; i < p_regions.size(); i++) {
cmd_buf_info->cmd_list->CopyBufferRegion(buf_loc_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size);
@@ -4354,43 +4595,37 @@ void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buff
TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id;
TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id;
- for (uint32_t i = 0; i < p_regions.size(); i++) {
- UINT src_subresource = D3D12CalcSubresource(
- p_regions[i].src_subresources.mipmap,
- p_regions[i].src_subresources.base_layer,
- _compute_plane_slice(src_tex_info->format, p_regions[i].src_subresources.aspect),
- src_tex_info->desc.MipLevels,
- src_tex_info->desc.ArraySize());
- _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
-
- UINT dst_subresource = D3D12CalcSubresource(
- p_regions[i].dst_subresources.mipmap,
- p_regions[i].dst_subresources.base_layer,
- _compute_plane_slice(dst_tex_info->format, p_regions[i].dst_subresources.aspect),
- dst_tex_info->desc.MipLevels,
- dst_tex_info->desc.ArraySize());
- _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ // Batch all barrier transitions for the textures before performing the copies.
+ for (uint32_t i = 0; i < p_regions.size(); i++) {
+ uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count);
+ for (uint32_t j = 0; j < layer_count; j++) {
+ UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j);
+ UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j);
+ _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
+ _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ }
+ }
_resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
- CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource);
- CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource);
-
- CD3DX12_BOX src_box(
- p_regions[i].src_offset.x,
- p_regions[i].src_offset.y,
- p_regions[i].src_offset.z,
- p_regions[i].src_offset.x + p_regions[i].size.x,
- p_regions[i].src_offset.y + p_regions[i].size.y,
- p_regions[i].src_offset.z + p_regions[i].size.z);
-
- cmd_buf_info->cmd_list->CopyTextureRegion(
- &dst_location,
- p_regions[i].dst_offset.x,
- p_regions[i].dst_offset.y,
- p_regions[i].dst_offset.z,
- &src_location,
- &src_box);
+ CD3DX12_BOX src_box;
+ for (uint32_t i = 0; i < p_regions.size(); i++) {
+ uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count);
+ for (uint32_t j = 0; j < layer_count; j++) {
+ UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j);
+ UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j);
+ CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource);
+ CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource);
+ src_box.left = p_regions[i].src_offset.x;
+ src_box.top = p_regions[i].src_offset.y;
+ src_box.front = p_regions[i].src_offset.z;
+ src_box.right = p_regions[i].src_offset.x + p_regions[i].size.x;
+ src_box.bottom = p_regions[i].src_offset.y + p_regions[i].size.y;
+ src_box.back = p_regions[i].src_offset.z + p_regions[i].size.z;
+ cmd_buf_info->cmd_list->CopyTextureRegion(&dst_location, p_regions[i].dst_offset.x, p_regions[i].dst_offset.y, p_regions[i].dst_offset.z, &src_location, &src_box);
+ }
}
}
@@ -4400,12 +4635,12 @@ void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_b
TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id;
UINT src_subresource = D3D12CalcSubresource(p_src_mipmap, p_src_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize());
- _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
-
UINT dst_subresource = D3D12CalcSubresource(p_dst_mipmap, p_dst_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize());
- _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
-
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
+ _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
cmd_buf_info->cmd_list->ResolveSubresource(dst_tex_info->resource, dst_subresource, src_tex_info->resource, src_subresource, RD_TO_D3D12_FORMAT[src_tex_info->format].general_format);
}
@@ -4446,7 +4681,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
}
}
- _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET);
+ }
for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) {
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false);
@@ -4464,7 +4701,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
frames[frame_idx].desc_heap_walkers.rtv.advance();
}
- } else {
+ } else if (tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) {
// Clear via UAV.
_command_check_descriptor_sets(p_cmd_buffer);
@@ -4489,7 +4726,9 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
}
}
- _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+ }
for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) {
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = _make_ranged_uav_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false);
@@ -4510,6 +4749,7 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
(UINT)p_color.get_b8(),
(UINT)p_color.get_a8(),
};
+
cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint(
frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(),
frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(),
@@ -4521,6 +4761,8 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c
frames[frame_idx].desc_heap_walkers.resources.advance();
frames[frame_idx].desc_heap_walkers.aux.advance();
}
+ } else {
+ ERR_FAIL_MSG("Cannot clear texture because its format does not support UAV writes. You'll need to update its contents through another method.");
}
}
@@ -4528,8 +4770,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
BufferInfo *buf_info = (BufferInfo *)p_src_buffer.id;
TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id;
-
- if (buf_info->flags.is_for_upload) {
+ if (!barrier_capabilities.enhanced_barriers_supported) {
_resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
}
@@ -4557,19 +4798,21 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID
STEPIFY(p_regions[i].texture_region_size.y, block_h),
p_regions[i].texture_region_size.z);
- for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
- UINT dst_subresource = D3D12CalcSubresource(
- p_regions[i].texture_subresources.mipmap,
- p_regions[i].texture_subresources.base_layer + j,
- _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
- tex_info->desc.MipLevels,
- tex_info->desc.ArraySize());
- CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
+ UINT dst_subresource = D3D12CalcSubresource(
+ p_regions[i].texture_subresources.mipmap,
+ p_regions[i].texture_subresources.base_layer + j,
+ _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
+ tex_info->desc.MipLevels,
+ tex_info->desc.ArraySize());
+ CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource);
- _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
- }
+ _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ }
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
UINT dst_subresource = D3D12CalcSubresource(
@@ -4596,24 +4839,28 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID
TextureInfo *tex_info = (TextureInfo *)p_src_texture.id;
BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id;
- _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);
+ }
uint32_t block_w = 0, block_h = 0;
get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h);
for (uint32_t i = 0; i < p_regions.size(); i++) {
- for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
- UINT src_subresource = D3D12CalcSubresource(
- p_regions[i].texture_subresources.mipmap,
- p_regions[i].texture_subresources.base_layer + j,
- _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
- tex_info->desc.MipLevels,
- tex_info->desc.ArraySize());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
+ UINT src_subresource = D3D12CalcSubresource(
+ p_regions[i].texture_subresources.mipmap,
+ p_regions[i].texture_subresources.base_layer + j,
+ _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),
+ tex_info->desc.MipLevels,
+ tex_info->desc.ArraySize());
- _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
- }
+ _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);
+ }
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {
UINT src_subresource = D3D12CalcSubresource(
@@ -4763,22 +5010,25 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
}
};
- for (uint32_t i = 0; i < fb_info->attachments.size(); i++) {
- TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id;
- if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
- _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET);
- } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
- _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE);
- } else {
- DEV_ASSERT(false);
+ if (fb_info->is_screen || !barrier_capabilities.enhanced_barriers_supported) {
+ // Screen framebuffers must perform this transition even if enhanced barriers are supported.
+ for (uint32_t i = 0; i < fb_info->attachments.size(); i++) {
+ TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id;
+ if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
+ _transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET);
+ } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
+ _transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE);
+ } else {
+ DEV_ASSERT(false);
+ }
+ }
+ if (fb_info->vrs_attachment) {
+ TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id;
+ _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE);
}
- }
- if (fb_info->vrs_attachment) {
- TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id;
- _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE);
- }
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
cmd_buf_info->render_pass_state.region_rect = CD3DX12_RECT(
p_rect.position.x,
@@ -5140,8 +5390,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect(CommandBuf
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0);
}
@@ -5150,9 +5403,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect_count(Comm
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset);
}
@@ -5160,8 +5416,11 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect(CommandBufferID p_
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0);
}
@@ -5170,9 +5429,12 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe
_bind_vertex_buffers(cmd_buf_info);
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset);
}
@@ -5191,10 +5453,15 @@ void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBuffe
cmd_buf_info->render_pass_state.vertex_buffer_views[i] = {};
cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i];
cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i];
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
+ }
+ }
- _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
}
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+
cmd_buf_info->render_pass_state.vertex_buffer_count = p_binding_count;
}
@@ -5207,8 +5474,10 @@ void RenderingDeviceDriverD3D12::command_render_bind_index_buffer(CommandBufferI
d3d12_ib_view.SizeInBytes = buffer_info->size - p_offset;
d3d12_ib_view.Format = p_format == INDEX_BUFFER_FORMAT_UINT16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
- _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
cmd_buf_info->cmd_list->IASetIndexBuffer(&d3d12_ib_view);
}
@@ -5604,15 +5873,21 @@ void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferI
void RenderingDeviceDriverD3D12::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->Dispatch(p_x_groups, p_y_groups, p_z_groups);
}
void RenderingDeviceDriverD3D12::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) {
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;
BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;
- _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
- _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ if (!barrier_capabilities.enhanced_barriers_supported) {
+ _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
+ _resource_transitions_flush(cmd_buf_info->cmd_list.Get());
+ }
+
cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch.Get(), 1, indirect_buf_info->resource, p_offset, nullptr, 0);
}
@@ -5927,11 +6202,7 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) {
uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
switch (p_trait) {
case API_TRAIT_HONORS_PIPELINE_BARRIERS:
- // TODO:
- // 1. Map fine/Vulkan/enhanced barriers to legacy barriers as closely as possible
- // so there's still some advantage even without enhanced barriers available.
- // 2. Implement enhanced barriers and return true where available.
- return 0;
+ return barrier_capabilities.enhanced_barriers_supported;
case API_TRAIT_SHADER_CHANGE_INVALIDATION:
return (uint64_t)SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH;
case API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT:
@@ -5940,6 +6211,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
return D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR:
return false;
+ case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
+ return false;
default:
return RenderingDeviceDriver::api_trait_get(p_trait);
}
@@ -6082,6 +6355,8 @@ Error RenderingDeviceDriverD3D12::_initialize_device() {
// These happen due to how D3D12MA manages buffers; seems benign.
D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_HAS_NO_RESOURCE,
D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS,
+ // Seemingly a false positive.
+ D3D12_MESSAGE_ID_DATA_STATIC_WHILE_SET_AT_EXECUTE_DESCRIPTOR_INVALID_DATA_CHANGE,
};
D3D12_INFO_QUEUE_FILTER filter = {};
@@ -6231,6 +6506,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12));
if (SUCCEEDED(res)) {
format_capabilities.relaxed_casting_supported = options12.RelaxedFormatCastingSupported;
+ barrier_capabilities.enhanced_barriers_supported = options12.EnhancedBarriersSupported;
}
if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) {
@@ -6263,7 +6539,7 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
#if 0
print_verbose("- Relaxed casting supported");
#else
- // Certain configurations (Windows 11 with an updated Nvida driver) crash when using relaxed casting.
+ // Certain configurations (Windows 11 with an updated NVIDIA driver) crash when using relaxed casting.
// Therefore, we disable it temporarily until we can assure that it's reliable.
// There are fallbacks in place that work in every case, if less efficient.
format_capabilities.relaxed_casting_supported = false;
@@ -6363,10 +6639,6 @@ Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) {
frames[i].desc_heap_walkers.samplers = frames[i].desc_heaps.samplers.make_walker();
frames[i].desc_heap_walkers.aux = frames[i].desc_heaps.aux.make_walker();
frames[i].desc_heap_walkers.rtv = frames[i].desc_heaps.rtv.make_walker();
-
- ID3D12Resource *resource = nullptr;
- HRESULT res = allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COMMON, nullptr, &frames[frame_idx].aux_resource, IID_PPV_ARGS(&resource));
- ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
}
return OK;