summaryrefslogtreecommitdiffstats
path: root/servers
diff options
context:
space:
mode:
authorSpartan322 <Megacake1234@gmail.com>2024-10-18 17:26:56 -0400
committerSpartan322 <Megacake1234@gmail.com>2024-10-18 17:26:56 -0400
commitebbe3e8c51fcf89f6b19a74497cedb5a8c448979 (patch)
tree075435b3a1b3ac57a75f1ca90865f61697c053f4 /servers
parentedb8e2b1b2b09236bdcd76cb6c8b40b6fbb2abaf (diff)
parent80f0b33313dae52d072ba2771a88ebcc4f0b4d6d (diff)
downloadredot-engine-ebbe3e8c51fcf89f6b19a74497cedb5a8c448979.tar.gz
Merge commit godotengine/godot@80f0b33313dae52d072ba2771a88ebcc4f0b4d6d
Diffstat (limited to 'servers')
-rw-r--r--servers/rendering/renderer_canvas_cull.cpp6
-rw-r--r--servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl6
-rw-r--r--servers/rendering/rendering_device.cpp81
-rw-r--r--servers/rendering/rendering_device.h12
-rw-r--r--servers/rendering/rendering_device_driver.cpp2
-rw-r--r--servers/rendering/rendering_device_driver.h2
6 files changed, 75 insertions, 34 deletions
diff --git a/servers/rendering/renderer_canvas_cull.cpp b/servers/rendering/renderer_canvas_cull.cpp
index 9ec5d087ba..5fd4d12198 100644
--- a/servers/rendering/renderer_canvas_cull.cpp
+++ b/servers/rendering/renderer_canvas_cull.cpp
@@ -97,7 +97,7 @@ void RendererCanvasCull::_collect_ysort_children(RendererCanvasCull::Item *p_can
}
if (snapping_2d_transforms_to_pixel) {
- child_xform.columns[2] = child_xform.columns[2].round();
+ child_xform.columns[2] = (child_xform.columns[2] + Point2(0.5, 0.5)).floor();
}
r_items[r_index] = child_items[i];
@@ -305,8 +305,8 @@ void RendererCanvasCull::_cull_canvas_item(Item *p_canvas_item, const Transform2
Transform2D parent_xform = p_parent_xform;
if (snapping_2d_transforms_to_pixel) {
- self_xform.columns[2] = self_xform.columns[2].round();
- parent_xform.columns[2] = parent_xform.columns[2].round();
+ self_xform.columns[2] = (self_xform.columns[2] + Point2(0.5, 0.5)).floor();
+ parent_xform.columns[2] = (parent_xform.columns[2] + Point2(0.5, 0.5)).floor();
}
final_xform = parent_xform * self_xform;
diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl
index 9d47711599..2f0e4e0bea 100644
--- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl
+++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl
@@ -105,10 +105,6 @@ layout(location = 6) mediump out vec3 binormal_interp;
layout(location = 7) highp out vec4 diffuse_light_interp;
layout(location = 8) highp out vec4 specular_light_interp;
-layout(constant_id = 9) const bool sc_disable_omni_lights = false;
-layout(constant_id = 10) const bool sc_disable_spot_lights = false;
-layout(constant_id = 12) const bool sc_disable_directional_lights = false;
-
#include "../scene_forward_vertex_lights_inc.glsl"
#endif // !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING)
#ifdef MATERIAL_UNIFORMS_USED
@@ -1606,7 +1602,6 @@ void main() {
#endif
#undef BIAS_FUNC
}
-#endif
if (i < 4) {
shadow0 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << (i * 8);
@@ -1614,6 +1609,7 @@ void main() {
shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8);
}
}
+#endif // SHADOWS_DISABLED
#ifndef USE_VERTEX_LIGHTING
for (uint i = 0; i < scene_data.directional_light_count; i++) {
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index 74fe98f40a..1d01bb8b30 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -1245,6 +1245,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
TransferWorker *transfer_worker = nullptr;
const uint8_t *read_ptr = p_data.ptr();
uint8_t *write_ptr = nullptr;
+ const RDD::TextureLayout copy_dst_layout = driver->api_trait_get(RDD::API_TRAIT_USE_GENERAL_IN_COPY_QUEUES) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
for (uint32_t pass = 0; pass < 2; pass++) {
const bool copy_pass = (pass == 1);
if (copy_pass) {
@@ -1269,7 +1270,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
tb.texture = texture->driver_id;
tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
- tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
+ tb.next_layout = copy_dst_layout;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
@@ -1315,7 +1316,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
copy_region.texture_subresources.layer_count = 1;
copy_region.texture_offset = Vector3i(0, 0, z);
copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1);
- driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region);
+ driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, copy_dst_layout, copy_region);
}
staging_local_offset += to_allocate;
@@ -1334,14 +1335,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
RDD::TextureBarrier tb;
tb.texture = texture->driver_id;
tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
- tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
+ tb.prev_layout = copy_dst_layout;
tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
tb.subresources.layer_count = 1;
-
- driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
+ transfer_worker->texture_barriers.push_back(tb);
}
_release_transfer_worker(transfer_worker);
@@ -5054,7 +5054,6 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3
// No existing worker was picked, we create a new one.
transfer_worker = memnew(TransferWorker);
transfer_worker->command_fence = driver->fence_create();
- transfer_worker->command_semaphore = driver->semaphore_create();
transfer_worker->command_pool = driver->command_pool_create(transfer_queue_family, RDD::COMMAND_BUFFER_TYPE_PRIMARY);
transfer_worker->command_buffer = driver->command_buffer_create(transfer_worker->command_pool);
transfer_worker->index = transfer_worker_pool.size();
@@ -5077,7 +5076,7 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3
// If there's not enough bytes to use on the staging buffer, we submit everything pending from the worker and wait for the work to be finished.
if (transfer_worker->recording) {
_end_transfer_worker(transfer_worker);
- _submit_transfer_worker(transfer_worker, false);
+ _submit_transfer_worker(transfer_worker);
}
if (transfer_worker->submitted) {
@@ -5130,12 +5129,12 @@ void RenderingDevice::_end_transfer_worker(TransferWorker *p_transfer_worker) {
p_transfer_worker->recording = false;
}
-void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore) {
- const VectorView<RDD::SemaphoreID> execute_semaphore = p_signal_semaphore ? p_transfer_worker->command_semaphore : VectorView<RDD::SemaphoreID>();
- driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, execute_semaphore, p_transfer_worker->command_fence, {});
- if (p_signal_semaphore) {
+void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores) {
+ driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, p_signal_semaphores, p_transfer_worker->command_fence, {});
+
+ for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) {
// Indicate the frame should wait on these semaphores before executing the main command buffer.
- frames[frame].semaphores_to_wait_on.push_back(p_transfer_worker->command_semaphore);
+ frames[frame].semaphores_to_wait_on.push_back(p_signal_semaphores[i]);
}
p_transfer_worker->submitted = true;
@@ -5155,6 +5154,21 @@ void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worke
MutexLock lock(p_transfer_worker->operations_mutex);
p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted;
}
+
+ if (!p_transfer_worker->texture_barriers.is_empty()) {
+ MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
+ _flush_barriers_for_transfer_worker(p_transfer_worker);
+ }
+}
+
+void RenderingDevice::_flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker) {
+ if (!p_transfer_worker->texture_barriers.is_empty()) {
+ for (uint32_t i = 0; i < p_transfer_worker->texture_barriers.size(); i++) {
+ transfer_worker_pool_texture_barriers.push_back(p_transfer_worker->texture_barriers[i]);
+ }
+
+ p_transfer_worker->texture_barriers.clear();
+ }
}
void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) {
@@ -5196,10 +5210,11 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr
}
}
-void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
+void RenderingDevice::_submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer) {
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
- for (TransferWorker *worker : transfer_worker_pool) {
- if (p_operations_used_by_draw) {
+ for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) {
+ TransferWorker *worker = transfer_worker_pool[i];
+ if (p_draw_command_buffer) {
MutexLock lock(worker->operations_mutex);
if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) {
// The operation used by the draw has already been processed, we don't need to wait on the worker.
@@ -5210,11 +5225,21 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
{
MutexLock lock(worker->thread_mutex);
if (worker->recording) {
+ VectorView<RDD::SemaphoreID> semaphores = p_draw_command_buffer ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
_end_transfer_worker(worker);
- _submit_transfer_worker(worker, true);
+ _submit_transfer_worker(worker, semaphores);
+ }
+
+ if (p_draw_command_buffer) {
+ _flush_barriers_for_transfer_worker(worker);
}
}
}
+
+ if (p_draw_command_buffer && !transfer_worker_pool_texture_barriers.is_empty()) {
+ driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers);
+ transfer_worker_pool_texture_barriers.clear();
+ }
}
void RenderingDevice::_wait_for_transfer_workers() {
@@ -5230,7 +5255,6 @@ void RenderingDevice::_wait_for_transfer_workers() {
void RenderingDevice::_free_transfer_workers() {
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
for (TransferWorker *worker : transfer_worker_pool) {
- driver->semaphore_free(worker->command_semaphore);
driver->fence_free(worker->command_fence);
driver->buffer_free(worker->staging_buffer);
driver->command_pool_free(worker->command_pool);
@@ -5809,10 +5833,10 @@ void RenderingDevice::_end_frame() {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}
- _submit_transfer_workers(true);
-
// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
RDD::CommandBufferID command_buffer = frames[frame].command_buffer;
+ _submit_transfer_workers(command_buffer);
+
draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
driver->command_buffer_end(command_buffer);
driver->end_segment();
@@ -6016,6 +6040,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
present_queue_family = main_queue_family;
}
+ // Use the processor count as the max amount of transfer workers that can be created.
+ transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count();
+
// Create data for all the frames.
for (uint32_t i = 0; i < frames.size(); i++) {
frames[i].index = 0;
@@ -6043,6 +6070,13 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
// Assign the main queue family and command pool to the command buffer pool.
frames[i].command_buffer_pool.pool = frames[i].command_pool;
+
+ // Create the semaphores for the transfer workers.
+ frames[i].transfer_worker_semaphores.resize(transfer_worker_pool_max_size);
+ for (uint32_t j = 0; j < transfer_worker_pool_max_size; j++) {
+ frames[i].transfer_worker_semaphores[j] = driver->semaphore_create();
+ ERR_FAIL_COND_V(!frames[i].transfer_worker_semaphores[j], FAILED);
+ }
}
// Start from frame count, so everything else is immediately old.
@@ -6089,9 +6123,6 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
ERR_FAIL_COND_V(err, FAILED);
}
- // TODO: How should this max size be determined?
- transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count();
-
draw_list = nullptr;
compute_list = nullptr;
@@ -6382,7 +6413,7 @@ void RenderingDevice::finalize() {
}
// Wait for transfer workers to finish.
- _submit_transfer_workers(false);
+ _submit_transfer_workers();
_wait_for_transfer_workers();
// Delete everything the graph has created.
@@ -6454,6 +6485,10 @@ void RenderingDevice::finalize() {
for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) {
driver->semaphore_free(buffer_pool.semaphores[j]);
}
+
+ for (uint32_t j = 0; j < frames[i].transfer_worker_semaphores.size(); j++) {
+ driver->semaphore_free(frames[i].transfer_worker_semaphores[j]);
+ }
}
if (pipeline_cache_enabled) {
diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h
index 60a8ef1a9c..3d6a8fc4a8 100644
--- a/servers/rendering/rendering_device.h
+++ b/servers/rendering/rendering_device.h
@@ -1269,7 +1269,7 @@ private:
RDD::CommandBufferID command_buffer;
RDD::CommandPoolID command_pool;
RDD::FenceID command_fence;
- RDD::SemaphoreID command_semaphore;
+ LocalVector<RDD::TextureBarrier> texture_barriers;
bool recording = false;
bool submitted = false;
BinaryMutex thread_mutex;
@@ -1283,20 +1283,22 @@ private:
uint32_t transfer_worker_pool_max_size = 1;
LocalVector<uint64_t> transfer_worker_operation_used_by_draw;
LocalVector<uint32_t> transfer_worker_pool_available_list;
+ LocalVector<RDD::TextureBarrier> transfer_worker_pool_texture_barriers;
BinaryMutex transfer_worker_pool_mutex;
ConditionVariable transfer_worker_pool_condition;
TransferWorker *_acquire_transfer_worker(uint32_t p_transfer_size, uint32_t p_required_align, uint32_t &r_staging_offset);
void _release_transfer_worker(TransferWorker *p_transfer_worker);
void _end_transfer_worker(TransferWorker *p_transfer_worker);
- void _submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore);
+ void _submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores = VectorView<RDD::SemaphoreID>());
void _wait_for_transfer_worker(TransferWorker *p_transfer_worker);
+ void _flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker);
void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation);
void _check_transfer_worker_buffer(Buffer *p_buffer);
void _check_transfer_worker_texture(Texture *p_texture);
void _check_transfer_worker_vertex_array(VertexArray *p_vertex_array);
void _check_transfer_worker_index_array(IndexArray *p_index_array);
- void _submit_transfer_workers(bool p_operations_used_by_draw);
+ void _submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer = RDD::CommandBufferID());
void _wait_for_transfer_workers();
void _free_transfer_workers();
@@ -1374,6 +1376,10 @@ private:
// Swap chains prepared for drawing during the frame that must be presented.
LocalVector<RDD::SwapChainID> swap_chains_to_present;
+ // Semaphores the transfer workers can use to wait before rendering the frame.
+ // This must have the same size of the transfer worker pool.
+ TightLocalVector<RDD::SemaphoreID> transfer_worker_semaphores;
+
// Extra command buffer pool used for driver workarounds.
RDG::CommandBufferPool command_buffer_pool;
diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp
index 96876b4945..2ba75adef1 100644
--- a/servers/rendering/rendering_device_driver.cpp
+++ b/servers/rendering/rendering_device_driver.cpp
@@ -376,6 +376,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) {
return 1;
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
return true;
+ case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
+ return false;
default:
ERR_FAIL_V(0);
}
diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h
index 0e83c2c42b..8730540509 100644
--- a/servers/rendering/rendering_device_driver.h
+++ b/servers/rendering/rendering_device_driver.h
@@ -222,6 +222,7 @@ public:
enum TextureLayout {
TEXTURE_LAYOUT_UNDEFINED,
+ TEXTURE_LAYOUT_GENERAL,
TEXTURE_LAYOUT_STORAGE_OPTIMAL,
TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
@@ -752,6 +753,7 @@ public:
API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP,
API_TRAIT_SECONDARY_VIEWPORT_SCISSOR,
API_TRAIT_CLEARS_WITH_COPY_ENGINE,
+ API_TRAIT_USE_GENERAL_IN_COPY_QUEUES,
};
enum ShaderChangeInvalidation {