summaryrefslogtreecommitdiffstats
path: root/servers/rendering/rendering_device.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'servers/rendering/rendering_device.cpp')
-rw-r--r--servers/rendering/rendering_device.cpp221
1 files changed, 193 insertions, 28 deletions
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index b287550986..cc67873b24 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -1243,6 +1243,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
TransferWorker *transfer_worker = nullptr;
const uint8_t *read_ptr = p_data.ptr();
uint8_t *write_ptr = nullptr;
+ const RDD::TextureLayout copy_dst_layout = driver->api_trait_get(RDD::API_TRAIT_USE_GENERAL_IN_COPY_QUEUES) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
for (uint32_t pass = 0; pass < 2; pass++) {
const bool copy_pass = (pass == 1);
if (copy_pass) {
@@ -1259,15 +1260,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
write_ptr = driver->buffer_map(transfer_worker->staging_buffer);
ERR_FAIL_NULL_V(write_ptr, ERR_CANT_CREATE);
- write_ptr += staging_worker_offset;
-
if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) {
// Transition the texture to the optimal layout.
RDD::TextureBarrier tb;
tb.texture = texture->driver_id;
tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
- tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
+ tb.next_layout = copy_dst_layout;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
@@ -1302,18 +1301,19 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
if (copy_pass) {
const uint8_t *read_ptr_mipmap_layer = read_ptr_mipmap + (tight_mip_size / depth) * z;
- _copy_region_block_or_regular(read_ptr_mipmap_layer, write_ptr, 0, 0, width, width, height, block_w, block_h, pitch, pixel_size, block_size);
- write_ptr += to_allocate;
+ uint64_t staging_buffer_offset = staging_worker_offset + staging_local_offset;
+ uint8_t *write_ptr_mipmap_layer = write_ptr + staging_buffer_offset;
+ _copy_region_block_or_regular(read_ptr_mipmap_layer, write_ptr_mipmap_layer, 0, 0, width, width, height, block_w, block_h, pitch, pixel_size, block_size);
RDD::BufferTextureCopyRegion copy_region;
- copy_region.buffer_offset = staging_worker_offset + staging_local_offset;
+ copy_region.buffer_offset = staging_buffer_offset;
copy_region.texture_subresources.aspect = texture->read_aspect_flags;
copy_region.texture_subresources.mipmap = mm_i;
copy_region.texture_subresources.base_layer = p_layer;
copy_region.texture_subresources.layer_count = 1;
copy_region.texture_offset = Vector3i(0, 0, z);
copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1);
- driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region);
+ driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, copy_dst_layout, copy_region);
}
staging_local_offset += to_allocate;
@@ -1332,14 +1332,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
RDD::TextureBarrier tb;
tb.texture = texture->driver_id;
tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
- tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
+ tb.prev_layout = copy_dst_layout;
tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
tb.subresources.layer_count = 1;
-
- driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
+ transfer_worker->texture_barriers.push_back(tb);
}
_release_transfer_worker(transfer_worker);
@@ -3758,6 +3757,15 @@ int RenderingDevice::screen_get_height(DisplayServer::WindowID p_screen) const {
return context->surface_get_height(surface);
}
+int RenderingDevice::screen_get_pre_rotation_degrees(DisplayServer::WindowID p_screen) const {
+ _THREAD_SAFE_METHOD_
+
+ HashMap<DisplayServer::WindowID, RDD::SwapChainID>::ConstIterator it = screen_swap_chains.find(p_screen);
+ ERR_FAIL_COND_V_MSG(it == screen_swap_chains.end(), ERR_CANT_CREATE, "A swap chain was not created for the screen.");
+
+ return driver->swap_chain_get_pre_rotation_degrees(it->value);
+}
+
RenderingDevice::FramebufferFormatID RenderingDevice::screen_get_framebuffer_format(DisplayServer::WindowID p_screen) const {
_THREAD_SAFE_METHOD_
@@ -4441,6 +4449,117 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint
dl->state.draw_count++;
}
+void RenderingDevice::draw_list_draw_indirect(DrawListID p_list, bool p_use_indices, RID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
+ ERR_RENDER_THREAD_GUARD();
+
+ DrawList *dl = _get_draw_list_ptr(p_list);
+ ERR_FAIL_NULL(dl);
+
+ Buffer *buffer = storage_buffer_owner.get_or_null(p_buffer);
+ ERR_FAIL_NULL(buffer);
+
+ ERR_FAIL_COND_MSG(!buffer->usage.has_flag(RDD::BUFFER_USAGE_INDIRECT_BIT), "Buffer provided was not created to do indirect dispatch.");
+
+#ifdef DEBUG_ENABLED
+ ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
+#endif
+
+#ifdef DEBUG_ENABLED
+ ERR_FAIL_COND_MSG(!dl->validation.pipeline_active,
+ "No render pipeline was set before attempting to draw.");
+ if (dl->validation.pipeline_vertex_format != INVALID_ID) {
+ // Pipeline uses vertices, validate format.
+ ERR_FAIL_COND_MSG(dl->validation.vertex_format == INVALID_ID,
+ "No vertex array was bound, and render pipeline expects vertices.");
+ // Make sure format is right.
+ ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format != dl->validation.vertex_format,
+ "The vertex format used to create the pipeline does not match the vertex format bound.");
+ }
+
+ if (dl->validation.pipeline_push_constant_size > 0) {
+ // Using push constants, check that they were supplied.
+ ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied,
+ "The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
+ }
+#endif
+
+#ifdef DEBUG_ENABLED
+ for (uint32_t i = 0; i < dl->state.set_count; i++) {
+ if (dl->state.sets[i].pipeline_expected_format == 0) {
+ // Nothing expected by this pipeline.
+ continue;
+ }
+
+ if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) {
+ if (dl->state.sets[i].uniform_set_format == 0) {
+ ERR_FAIL_MSG(vformat("Uniforms were never supplied for set (%d) at the time of drawing, which are required by the pipeline.", i));
+ } else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) {
+ UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set);
+ ERR_FAIL_MSG(vformat("Uniforms supplied for set (%d):\n%s\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n%s", i, _shader_uniform_debug(us->shader_id, us->shader_set), _shader_uniform_debug(dl->state.pipeline_shader)));
+ } else {
+ ERR_FAIL_MSG(vformat("Uniforms supplied for set (%s, which was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n%s", i, _shader_uniform_debug(dl->state.pipeline_shader)));
+ }
+ }
+ }
+#endif
+
+ // Prepare descriptor sets if the API doesn't use pipeline barriers.
+ if (!driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) {
+ for (uint32_t i = 0; i < dl->state.set_count; i++) {
+ if (dl->state.sets[i].pipeline_expected_format == 0) {
+ // Nothing expected by this pipeline.
+ continue;
+ }
+
+ draw_graph.add_draw_list_uniform_set_prepare_for_use(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i);
+ }
+ }
+
+ // Bind descriptor sets.
+ for (uint32_t i = 0; i < dl->state.set_count; i++) {
+ if (dl->state.sets[i].pipeline_expected_format == 0) {
+ continue; // Nothing expected by this pipeline.
+ }
+ if (!dl->state.sets[i].bound) {
+ // All good, see if this requires re-binding.
+ draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i);
+
+ UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set);
+ _uniform_set_update_shared(uniform_set);
+
+ draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage);
+
+ dl->state.sets[i].bound = true;
+ }
+ }
+
+ if (p_use_indices) {
+#ifdef DEBUG_ENABLED
+ ERR_FAIL_COND_MSG(!dl->validation.index_array_count,
+ "Draw command requested indices, but no index buffer was set.");
+
+ ERR_FAIL_COND_MSG(dl->validation.pipeline_uses_restart_indices != dl->validation.index_buffer_uses_restart_indices,
+ "The usage of restart indices in index buffer does not match the render primitive in the pipeline.");
+#endif
+
+ ERR_FAIL_COND_MSG(p_offset + 20 > buffer->size, "Offset provided (+20) is past the end of buffer.");
+
+ draw_graph.add_draw_list_draw_indexed_indirect(buffer->driver_id, p_offset, p_draw_count, p_stride);
+ } else {
+ ERR_FAIL_COND_MSG(p_offset + 16 > buffer->size, "Offset provided (+16) is past the end of buffer.");
+
+ draw_graph.add_draw_list_draw_indirect(buffer->driver_id, p_offset, p_draw_count, p_stride);
+ }
+
+ dl->state.draw_count++;
+
+ if (buffer->draw_tracker != nullptr) {
+ draw_graph.add_draw_list_usage(buffer->draw_tracker, RDG::RESOURCE_USAGE_INDIRECT_BUFFER_READ);
+ }
+
+ _check_transfer_worker_buffer(buffer);
+}
+
void RenderingDevice::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) {
ERR_RENDER_THREAD_GUARD();
@@ -5052,7 +5171,6 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3
// No existing worker was picked, we create a new one.
transfer_worker = memnew(TransferWorker);
transfer_worker->command_fence = driver->fence_create();
- transfer_worker->command_semaphore = driver->semaphore_create();
transfer_worker->command_pool = driver->command_pool_create(transfer_queue_family, RDD::COMMAND_BUFFER_TYPE_PRIMARY);
transfer_worker->command_buffer = driver->command_buffer_create(transfer_worker->command_pool);
transfer_worker->index = transfer_worker_pool.size();
@@ -5075,7 +5193,7 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3
// If there's not enough bytes to use on the staging buffer, we submit everything pending from the worker and wait for the work to be finished.
if (transfer_worker->recording) {
_end_transfer_worker(transfer_worker);
- _submit_transfer_worker(transfer_worker, false);
+ _submit_transfer_worker(transfer_worker);
}
if (transfer_worker->submitted) {
@@ -5128,12 +5246,12 @@ void RenderingDevice::_end_transfer_worker(TransferWorker *p_transfer_worker) {
p_transfer_worker->recording = false;
}
-void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore) {
- const VectorView<RDD::SemaphoreID> execute_semaphore = p_signal_semaphore ? p_transfer_worker->command_semaphore : VectorView<RDD::SemaphoreID>();
- driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, execute_semaphore, p_transfer_worker->command_fence, {});
- if (p_signal_semaphore) {
+void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores) {
+ driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, p_signal_semaphores, p_transfer_worker->command_fence, {});
+
+ for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) {
// Indicate the frame should wait on these semaphores before executing the main command buffer.
- frames[frame].semaphores_to_wait_on.push_back(p_transfer_worker->command_semaphore);
+ frames[frame].semaphores_to_wait_on.push_back(p_signal_semaphores[i]);
}
p_transfer_worker->submitted = true;
@@ -5153,6 +5271,21 @@ void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worke
MutexLock lock(p_transfer_worker->operations_mutex);
p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted;
}
+
+ if (!p_transfer_worker->texture_barriers.is_empty()) {
+ MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
+ _flush_barriers_for_transfer_worker(p_transfer_worker);
+ }
+}
+
+void RenderingDevice::_flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker) {
+ if (!p_transfer_worker->texture_barriers.is_empty()) {
+ for (uint32_t i = 0; i < p_transfer_worker->texture_barriers.size(); i++) {
+ transfer_worker_pool_texture_barriers.push_back(p_transfer_worker->texture_barriers[i]);
+ }
+
+ p_transfer_worker->texture_barriers.clear();
+ }
}
void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) {
@@ -5194,10 +5327,11 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr
}
}
-void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
+void RenderingDevice::_submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer) {
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
- for (TransferWorker *worker : transfer_worker_pool) {
- if (p_operations_used_by_draw) {
+ for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) {
+ TransferWorker *worker = transfer_worker_pool[i];
+ if (p_draw_command_buffer) {
MutexLock lock(worker->operations_mutex);
if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) {
// The operation used by the draw has already been processed, we don't need to wait on the worker.
@@ -5208,11 +5342,21 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
{
MutexLock lock(worker->thread_mutex);
if (worker->recording) {
+ VectorView<RDD::SemaphoreID> semaphores = p_draw_command_buffer ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
_end_transfer_worker(worker);
- _submit_transfer_worker(worker, true);
+ _submit_transfer_worker(worker, semaphores);
+ }
+
+ if (p_draw_command_buffer) {
+ _flush_barriers_for_transfer_worker(worker);
}
}
}
+
+ if (p_draw_command_buffer && !transfer_worker_pool_texture_barriers.is_empty()) {
+ driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers);
+ transfer_worker_pool_texture_barriers.clear();
+ }
}
void RenderingDevice::_wait_for_transfer_workers() {
@@ -5228,7 +5372,6 @@ void RenderingDevice::_wait_for_transfer_workers() {
void RenderingDevice::_free_transfer_workers() {
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
for (TransferWorker *worker : transfer_worker_pool) {
- driver->semaphore_free(worker->command_semaphore);
driver->fence_free(worker->command_fence);
driver->buffer_free(worker->staging_buffer);
driver->command_pool_free(worker->command_pool);
@@ -5807,10 +5950,10 @@ void RenderingDevice::_end_frame() {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}
- _submit_transfer_workers(true);
-
// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
RDD::CommandBufferID command_buffer = frames[frame].command_buffer;
+ _submit_transfer_workers(command_buffer);
+
draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
driver->command_buffer_end(command_buffer);
driver->end_segment();
@@ -6014,6 +6157,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
present_queue_family = main_queue_family;
}
+ // Use the processor count as the max amount of transfer workers that can be created.
+ transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count();
+
// Create data for all the frames.
for (uint32_t i = 0; i < frames.size(); i++) {
frames[i].index = 0;
@@ -6041,6 +6187,13 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
// Assign the main queue family and command pool to the command buffer pool.
frames[i].command_buffer_pool.pool = frames[i].command_pool;
+
+ // Create the semaphores for the transfer workers.
+ frames[i].transfer_worker_semaphores.resize(transfer_worker_pool_max_size);
+ for (uint32_t j = 0; j < transfer_worker_pool_max_size; j++) {
+ frames[i].transfer_worker_semaphores[j] = driver->semaphore_create();
+ ERR_FAIL_COND_V(!frames[i].transfer_worker_semaphores[j], FAILED);
+ }
}
// Start from frame count, so everything else is immediately old.
@@ -6087,9 +6240,6 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
ERR_FAIL_COND_V(err, FAILED);
}
- // TODO: How should this max size be determined?
- transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count();
-
draw_list = nullptr;
compute_list = nullptr;
@@ -6380,7 +6530,7 @@ void RenderingDevice::finalize() {
}
// Wait for transfer workers to finish.
- _submit_transfer_workers(false);
+ _submit_transfer_workers();
_wait_for_transfer_workers();
// Delete everything the graph has created.
@@ -6452,6 +6602,10 @@ void RenderingDevice::finalize() {
for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) {
driver->semaphore_free(buffer_pool.semaphores[j]);
}
+
+ for (uint32_t j = 0; j < frames[i].transfer_worker_semaphores.size(); j++) {
+ driver->semaphore_free(frames[i].transfer_worker_semaphores[j]);
+ }
}
if (pipeline_cache_enabled) {
@@ -6518,6 +6672,12 @@ void RenderingDevice::finalize() {
ERR_FAIL_COND(reverse_dependency_map.size());
}
+void RenderingDevice::_set_max_fps(int p_max_fps) {
+ for (const KeyValue<DisplayServer::WindowID, RDD::SwapChainID> &it : screen_swap_chains) {
+ driver->swap_chain_set_max_fps(it.value, p_max_fps);
+ }
+}
+
RenderingDevice *RenderingDevice::create_local_device() {
RenderingDevice *rd = memnew(RenderingDevice);
rd->initialize(context);
@@ -6616,6 +6776,7 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("draw_list_set_push_constant", "draw_list", "buffer", "size_bytes"), &RenderingDevice::_draw_list_set_push_constant);
ClassDB::bind_method(D_METHOD("draw_list_draw", "draw_list", "use_indices", "instances", "procedural_vertex_count"), &RenderingDevice::draw_list_draw, DEFVAL(0));
+ ClassDB::bind_method(D_METHOD("draw_list_draw_indirect", "draw_list", "use_indices", "buffer", "offset", "draw_count", "stride"), &RenderingDevice::draw_list_draw_indirect, DEFVAL(0), DEFVAL(1), DEFVAL(0));
ClassDB::bind_method(D_METHOD("draw_list_enable_scissor", "draw_list", "rect"), &RenderingDevice::draw_list_enable_scissor, DEFVAL(Rect2()));
ClassDB::bind_method(D_METHOD("draw_list_disable_scissor", "draw_list"), &RenderingDevice::draw_list_disable_scissor);
@@ -7225,6 +7386,10 @@ void RenderingDevice::_bind_methods() {
BIND_ENUM_CONSTANT(DEBUG_PASS);
}
+void RenderingDevice::make_current() {
+ render_thread_id = Thread::get_caller_id();
+}
+
RenderingDevice::~RenderingDevice() {
finalize();