diff options
Diffstat (limited to 'servers')
-rw-r--r-- | servers/rendering/dummy/rasterizer_dummy.h | 1 | ||||
-rw-r--r-- | servers/rendering/renderer_compositor.h | 1 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/api_context_rd.h | 82 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/renderer_compositor_rd.cpp | 18 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/renderer_compositor_rd.h | 1 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/shader_rd.cpp | 3 | ||||
-rw-r--r-- | servers/rendering/renderer_viewport.cpp | 3 | ||||
-rw-r--r-- | servers/rendering/rendering_context_driver.cpp (renamed from servers/rendering/renderer_rd/api_context_rd.cpp) | 58 | ||||
-rw-r--r-- | servers/rendering/rendering_context_driver.h | 101 | ||||
-rw-r--r-- | servers/rendering/rendering_device.compat.inc | 5 | ||||
-rw-r--r-- | servers/rendering/rendering_device.cpp | 729 | ||||
-rw-r--r-- | servers/rendering/rendering_device.h | 126 | ||||
-rw-r--r-- | servers/rendering/rendering_device_commons.h | 11 | ||||
-rw-r--r-- | servers/rendering/rendering_device_driver.h | 130 | ||||
-rw-r--r-- | servers/rendering/rendering_device_graph.cpp | 29 | ||||
-rw-r--r-- | servers/rendering/rendering_device_graph.h | 3 |
16 files changed, 849 insertions, 452 deletions
diff --git a/servers/rendering/dummy/rasterizer_dummy.h b/servers/rendering/dummy/rasterizer_dummy.h index 929c661009..c61656bc77 100644 --- a/servers/rendering/dummy/rasterizer_dummy.h +++ b/servers/rendering/dummy/rasterizer_dummy.h @@ -86,7 +86,6 @@ public: time += frame_step; } - void prepare_for_blitting_render_targets() override {} void blit_render_targets_to_screen(int p_screen, const BlitToScreen *p_render_targets, int p_amount) override {} void end_viewport(bool p_swap_buffers) override {} diff --git a/servers/rendering/renderer_compositor.h b/servers/rendering/renderer_compositor.h index 13767a3875..3c49e31516 100644 --- a/servers/rendering/renderer_compositor.h +++ b/servers/rendering/renderer_compositor.h @@ -96,7 +96,6 @@ public: virtual void initialize() = 0; virtual void begin_frame(double frame_step) = 0; - virtual void prepare_for_blitting_render_targets() = 0; virtual void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount) = 0; virtual void end_viewport(bool p_swap_buffers) = 0; diff --git a/servers/rendering/renderer_rd/api_context_rd.h b/servers/rendering/renderer_rd/api_context_rd.h deleted file mode 100644 index fd3be80605..0000000000 --- a/servers/rendering/renderer_rd/api_context_rd.h +++ /dev/null @@ -1,82 +0,0 @@ -/**************************************************************************/ -/* api_context_rd.h */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#ifndef API_CONTEXT_RD_H -#define API_CONTEXT_RD_H - -#include "servers/rendering/rendering_device.h" -#include "servers/rendering/rendering_device_driver.h" - -class ApiContextRD { -public: - virtual const char *get_api_name() const = 0; - virtual RenderingDevice::Capabilities get_device_capabilities() const = 0; - virtual const RDD::MultiviewCapabilities &get_multiview_capabilities() const = 0; - - virtual int get_swapchain_image_count() const = 0; - - virtual Error window_create(DisplayServer::WindowID p_window_id, DisplayServer::VSyncMode p_vsync_mode, int p_width, int p_height, const void *p_platform_data) = 0; - virtual void window_resize(DisplayServer::WindowID p_window_id, int p_width, int p_height) = 0; - virtual int window_get_width(DisplayServer::WindowID p_window = 0) = 0; - virtual int window_get_height(DisplayServer::WindowID p_window = 0) = 0; - virtual bool window_is_valid_swapchain(DisplayServer::WindowID p_window = 0) = 0; - virtual void window_destroy(DisplayServer::WindowID p_window_id) = 0; - virtual RDD::RenderPassID window_get_render_pass(DisplayServer::WindowID p_window = 0) = 0; - virtual RDD::FramebufferID window_get_framebuffer(DisplayServer::WindowID p_window = 0) = 0; - - virtual RID local_device_create() = 0; - virtual void local_device_push_command_buffers(RID p_local_device, const RDD::CommandBufferID *p_buffers, int p_count) = 0; - virtual void local_device_sync(RID p_local_device) = 0; - virtual void local_device_free(RID p_local_device) = 0; - - virtual void set_setup_buffer(RDD::CommandBufferID p_command_buffer) = 0; - virtual void append_command_buffer(RDD::CommandBufferID p_command_buffer) = 0; - virtual void flush(bool p_flush_setup = false, bool p_flush_pending = false, bool p_sync = true) = 0; - virtual Error prepare_buffers(RDD::CommandBufferID p_command_buffer) = 0; - virtual void postpare_buffers(RDD::CommandBufferID p_command_buffer) = 0; - virtual Error swap_buffers() = 0; - virtual Error initialize() = 0; - - virtual String get_device_vendor_name() const = 0; - virtual String get_device_name() const = 0; - virtual RDD::DeviceType get_device_type() const = 0; - virtual String get_device_api_version() const = 0; - virtual String get_device_pipeline_cache_uuid() const = 0; - - virtual void set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_mode) = 0; - virtual DisplayServer::VSyncMode get_vsync_mode(DisplayServer::WindowID p_window = 0) const = 0; - - virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) = 0; - virtual bool is_debug_utils_enabled() const = 0; - - virtual ~ApiContextRD(); -}; - -#endif // API_CONTEXT_RD_H diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp index a69877e680..012451a5cd 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -33,16 +33,16 @@ #include "core/config/project_settings.h" #include "core/io/dir_access.h" -void RendererCompositorRD::prepare_for_blitting_render_targets() { - RD::get_singleton()->prepare_screen_for_drawing(); -} - void RendererCompositorRD::blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount) { - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin_for_screen(p_screen); - if (draw_list == RD::INVALID_ID) { - return; // Window is minimized and does not have valid swapchain, skip drawing without printing errors. + Error err = RD::get_singleton()->screen_prepare_for_drawing(p_screen); + if (err != OK) { + // Window is minimized and does not have valid swapchain, skip drawing without printing errors. + return; } + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin_for_screen(p_screen); + ERR_FAIL_COND(draw_list == RD::INVALID_ID); + for (int i = 0; i < p_amount; i++) { RID rd_texture = texture_storage->render_target_get_rd_texture(p_render_targets[i].render_target); ERR_CONTINUE(rd_texture.is_null()); @@ -122,7 +122,7 @@ void RendererCompositorRD::initialize() { blit.shader_version = blit.shader.version_create(); for (int i = 0; i < BLIT_MODE_MAX; i++) { - blit.pipelines[i] = RD::get_singleton()->render_pipeline_create(blit.shader.version_get_shader(blit.shader_version, i), RD::get_singleton()->screen_get_framebuffer_format(), RD::INVALID_ID, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), i == BLIT_MODE_NORMAL_ALPHA ? RenderingDevice::PipelineColorBlendState::create_blend() : RenderingDevice::PipelineColorBlendState::create_disabled(), 0); + blit.pipelines[i] = RD::get_singleton()->render_pipeline_create(blit.shader.version_get_shader(blit.shader_version, i), RD::get_singleton()->screen_get_framebuffer_format(DisplayServer::MAIN_WINDOW_ID), RD::INVALID_ID, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), i == BLIT_MODE_NORMAL_ALPHA ? RenderingDevice::PipelineColorBlendState::create_blend() : RenderingDevice::PipelineColorBlendState::create_disabled(), 0); } //create index array for copy shader @@ -169,7 +169,7 @@ void RendererCompositorRD::set_boot_image(const Ref<Image> &p_image, const Color return; } - RD::get_singleton()->prepare_screen_for_drawing(); + RD::get_singleton()->screen_prepare_for_drawing(DisplayServer::MAIN_WINDOW_ID); RID texture = texture_storage->texture_allocate(); texture_storage->texture_2d_initialize(texture, p_image); diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index 9b073821c2..95c2b812d9 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -120,7 +120,6 @@ public: void initialize(); void begin_frame(double frame_step); - void prepare_for_blitting_render_targets(); void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount); void end_viewport(bool p_swap_buffers) {} diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index db06fc4d21..789c6c2845 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -36,7 +36,6 @@ #include "core/object/worker_thread_pool.h" #include "core/version.h" #include "renderer_compositor_rd.h" -#include "servers/rendering/renderer_rd/api_context_rd.h" #include "servers/rendering/rendering_device.h" #include "thirdparty/misc/smolv.h" @@ -398,7 +397,7 @@ static const uint32_t cache_file_version = 3; String ShaderRD::_get_cache_file_path(Version *p_version, int p_group) { const String &sha1 = _version_get_sha1(p_version); - const String &api_safe_name = String(RD::get_singleton()->get_context()->get_api_name()).validate_filename().to_lower(); + const String &api_safe_name = String(RD::get_singleton()->get_device_api_name()).validate_filename().to_lower(); const String &path = shader_cache_dir.path_join(name).path_join(group_sha256[p_group]).path_join(sha1) + "." + api_safe_name + ".cache"; return path; } diff --git a/servers/rendering/renderer_viewport.cpp b/servers/rendering/renderer_viewport.cpp index 7a2415442d..5bc5f333df 100644 --- a/servers/rendering/renderer_viewport.cpp +++ b/servers/rendering/renderer_viewport.cpp @@ -824,9 +824,6 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) { RENDER_TIMESTAMP("< Render Viewports"); if (p_swap_buffers && !blit_to_screen_list.is_empty()) { - // This needs to be called to make screen swapping more efficient. - RSG::rasterizer->prepare_for_blitting_render_targets(); - for (const KeyValue<int, Vector<BlitToScreen>> &E : blit_to_screen_list) { RSG::rasterizer->blit_render_targets_to_screen(E.key, E.value.ptr(), E.value.size()); } diff --git a/servers/rendering/renderer_rd/api_context_rd.cpp b/servers/rendering/rendering_context_driver.cpp index b5b3cdd88c..19c0b0838c 100644 --- a/servers/rendering/renderer_rd/api_context_rd.cpp +++ b/servers/rendering/rendering_context_driver.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* api_context_rd.cpp */ +/* rendering_context_driver.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -28,6 +28,58 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /**************************************************************************/ -#include "api_context_rd.h" +#include "rendering_context_driver.h" -ApiContextRD::~ApiContextRD() {} +RenderingContextDriver::~RenderingContextDriver() { +} + +RenderingContextDriver::SurfaceID RenderingContextDriver::surface_get_from_window(DisplayServer::WindowID p_window) const { + HashMap<DisplayServer::WindowID, SurfaceID>::ConstIterator it = window_surface_map.find(p_window); + if (it != window_surface_map.end()) { + return it->value; + } else { + return SurfaceID(); + } +} + +Error RenderingContextDriver::window_create(DisplayServer::WindowID p_window, const void *p_platform_data) { + SurfaceID surface = surface_create(p_platform_data); + if (surface != 0) { + window_surface_map[p_window] = surface; + return OK; + } else { + return ERR_CANT_CREATE; + } +} + +void RenderingContextDriver::window_set_size(DisplayServer::WindowID p_window, uint32_t p_width, uint32_t p_height) { + SurfaceID surface = surface_get_from_window(p_window); + if (surface) { + surface_set_size(surface, p_width, p_height); + } +} + +void RenderingContextDriver::window_set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_vsync_mode) { + SurfaceID surface = surface_get_from_window(p_window); + if (surface) { + surface_set_vsync_mode(surface, p_vsync_mode); + } +} + +DisplayServer::VSyncMode RenderingContextDriver::window_get_vsync_mode(DisplayServer::WindowID p_window) const { + SurfaceID surface = surface_get_from_window(p_window); + if (surface) { + return surface_get_vsync_mode(surface); + } else { + return DisplayServer::VSYNC_DISABLED; + } +} + +void RenderingContextDriver::window_destroy(DisplayServer::WindowID p_window) { + SurfaceID surface = surface_get_from_window(p_window); + if (surface) { + surface_destroy(surface); + } + + window_surface_map.erase(p_window); +} diff --git a/servers/rendering/rendering_context_driver.h b/servers/rendering/rendering_context_driver.h new file mode 100644 index 0000000000..df1424da95 --- /dev/null +++ b/servers/rendering/rendering_context_driver.h @@ -0,0 +1,101 @@ +/**************************************************************************/ +/* rendering_context_driver.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef RENDERING_CONTEXT_DRIVER_H +#define RENDERING_CONTEXT_DRIVER_H + +#include "core/object/object.h" +#include "servers/display_server.h" + +class RenderingDeviceDriver; + +class RenderingContextDriver { +public: + typedef uint64_t SurfaceID; + +private: + HashMap<DisplayServer::WindowID, SurfaceID> window_surface_map; + +public: + SurfaceID surface_get_from_window(DisplayServer::WindowID p_window) const; + Error window_create(DisplayServer::WindowID p_window, const void *p_platform_data); + void window_set_size(DisplayServer::WindowID p_window, uint32_t p_width, uint32_t p_height); + void window_set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_vsync_mode); + DisplayServer::VSyncMode window_get_vsync_mode(DisplayServer::WindowID p_window) const; + void window_destroy(DisplayServer::WindowID p_window); + +public: + enum Vendor { + VENDOR_UNKNOWN = 0x0, + VENDOR_AMD = 0x1002, + VENDOR_IMGTEC = 0x1010, + VENDOR_APPLE = 0x106B, + VENDOR_NVIDIA = 0x10DE, + VENDOR_ARM = 0x13B5, + VENDOR_MICROSOFT = 0x1414, + VENDOR_QUALCOMM = 0x5143, + VENDOR_INTEL = 0x8086 + }; + + enum DeviceType { + DEVICE_TYPE_OTHER = 0x0, + DEVICE_TYPE_INTEGRATED_GPU = 0x1, + DEVICE_TYPE_DISCRETE_GPU = 0x2, + DEVICE_TYPE_VIRTUAL_GPU = 0x3, + DEVICE_TYPE_CPU = 0x4, + DEVICE_TYPE_MAX = 0x5 + }; + + struct Device { + String name = "Unknown"; + Vendor vendor = VENDOR_UNKNOWN; + DeviceType type = DEVICE_TYPE_OTHER; + }; + + virtual ~RenderingContextDriver(); + virtual Error initialize() = 0; + virtual const Device &device_get(uint32_t p_device_index) const = 0; + virtual uint32_t device_get_count() const = 0; + virtual bool device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const = 0; + virtual RenderingDeviceDriver *driver_create() = 0; + virtual void driver_free(RenderingDeviceDriver *p_driver) = 0; + virtual SurfaceID surface_create(const void *p_platform_data) = 0; + virtual void surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) = 0; + virtual void surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) = 0; + virtual DisplayServer::VSyncMode surface_get_vsync_mode(SurfaceID p_surface) const = 0; + virtual uint32_t surface_get_width(SurfaceID p_surface) const = 0; + virtual uint32_t surface_get_height(SurfaceID p_surface) const = 0; + virtual void surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) = 0; + virtual bool surface_get_needs_resize(SurfaceID p_surface) const = 0; + virtual void surface_destroy(SurfaceID p_surface) = 0; + virtual bool is_debug_utils_enabled() const = 0; +}; + +#endif // RENDERING_CONTEXT_DRIVER_H diff --git a/servers/rendering/rendering_device.compat.inc b/servers/rendering/rendering_device.compat.inc index edc08e972d..ee9481280a 100644 --- a/servers/rendering/rendering_device.compat.inc +++ b/servers/rendering/rendering_device.compat.inc @@ -117,6 +117,10 @@ Error RenderingDevice::_texture_resolve_multisample_bind_compat_84976(RID p_from return texture_resolve_multisample(p_from_texture, p_to_texture); } +RenderingDevice::FramebufferFormatID RenderingDevice::_screen_get_framebuffer_format_bind_compat_87340() const { + return screen_get_framebuffer_format(DisplayServer::MAIN_WINDOW_ID); +} + void RenderingDevice::_bind_compatibility_methods() { ClassDB::bind_compatibility_method(D_METHOD("shader_create_from_bytecode", "binary_data"), &RenderingDevice::_shader_create_from_bytecode_bind_compat_79606); ClassDB::bind_compatibility_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::_draw_list_end_bind_compat_81356, DEFVAL(7)); @@ -132,6 +136,7 @@ void RenderingDevice::_bind_compatibility_methods() { ClassDB::bind_compatibility_method(D_METHOD("texture_copy", "from_texture", "to_texture", "from_pos", "to_pos", "size", "src_mipmap", "dst_mipmap", "src_layer", "dst_layer", "post_barrier"), &RenderingDevice::_texture_copy_bind_compat_84976, DEFVAL(0x7FFF)); ClassDB::bind_compatibility_method(D_METHOD("texture_clear", "texture", "color", "base_mipmap", "mipmap_count", "base_layer", "layer_count", "post_barrier"), &RenderingDevice::_texture_clear_bind_compat_84976, DEFVAL(0x7FFF)); ClassDB::bind_compatibility_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "post_barrier"), &RenderingDevice::_texture_resolve_multisample_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("screen_get_framebuffer_format"), &RenderingDevice::_screen_get_framebuffer_format_bind_compat_87340); } #endif diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 5d65118159..3290738b40 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -35,7 +35,71 @@ #include "core/config/project_settings.h" #include "core/io/dir_access.h" -#include "servers/rendering/renderer_rd/api_context_rd.h" + +#define FORCE_SEPARATE_PRESENT_QUEUE 0 + +/**************************/ +/**** HELPER FUNCTIONS ****/ +/**************************/ + +static String _get_device_vendor_name(const RenderingContextDriver::Device &p_device) { + switch (p_device.vendor) { + case RenderingContextDriver::VENDOR_AMD: + return "AMD"; + case RenderingContextDriver::VENDOR_IMGTEC: + return "ImgTec"; + case RenderingContextDriver::VENDOR_APPLE: + return "Apple"; + case RenderingContextDriver::VENDOR_NVIDIA: + return "NVIDIA"; + case RenderingContextDriver::VENDOR_ARM: + return "ARM"; + case RenderingContextDriver::VENDOR_MICROSOFT: + return "Microsoft"; + case RenderingContextDriver::VENDOR_QUALCOMM: + return "Qualcomm"; + case RenderingContextDriver::VENDOR_INTEL: + return "Intel"; + default: + return "Unknown"; + } +} + +static String _get_device_type_name(const RenderingContextDriver::Device &p_device) { + switch (p_device.type) { + case RenderingContextDriver::DEVICE_TYPE_INTEGRATED_GPU: + return "Integrated"; + case RenderingContextDriver::DEVICE_TYPE_DISCRETE_GPU: + return "Discrete"; + case RenderingContextDriver::DEVICE_TYPE_VIRTUAL_GPU: + return "Virtual"; + case RenderingContextDriver::DEVICE_TYPE_CPU: + return "CPU"; + case RenderingContextDriver::DEVICE_TYPE_OTHER: + default: + return "Other"; + } +} + +static uint32_t _get_device_type_score(const RenderingContextDriver::Device &p_device) { + switch (p_device.type) { + case RenderingContextDriver::DEVICE_TYPE_INTEGRATED_GPU: + return 4; + case RenderingContextDriver::DEVICE_TYPE_DISCRETE_GPU: + return 5; + case RenderingContextDriver::DEVICE_TYPE_VIRTUAL_GPU: + return 3; + case RenderingContextDriver::DEVICE_TYPE_CPU: + return 2; + case RenderingContextDriver::DEVICE_TYPE_OTHER: + default: + return 1; + } +} + +/**************************/ +/**** RENDERING DEVICE ****/ +/**************************/ // When true, the command graph will attempt to reorder the rendering commands submitted by the user based on the dependencies detected from // the commands automatically. This should improve rendering performance in most scenarios at the cost of some extra CPU overhead. @@ -240,7 +304,7 @@ Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_re // and this frame is not even done. // If this is the main thread, it means the user is likely loading a lot of resources at once,. // Otherwise, the thread should just be blocked until the next frame (currently unimplemented). - r_required_action = STAGING_REQUIRED_ACTION_FLUSH_CURRENT; + r_required_action = STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL; } } else { @@ -249,7 +313,7 @@ Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_re } } - } else if (staging_buffer_blocks[staging_buffer_current].frame_used <= frames_drawn - frame_count) { + } else if (staging_buffer_blocks[staging_buffer_current].frame_used <= frames_drawn - frames.size()) { // This is an old block, which was already processed, let's reuse. staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn; staging_buffer_blocks.write[staging_buffer_current].fill_amount = 0; @@ -268,7 +332,7 @@ Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_re // Let's flush older frames. // The logic here is that if a game is loading a lot of data from the main thread, it will need to be stalled anyway. // If loading from a separate thread, we can block that thread until next frame when more room is made (not currently implemented, though). - r_required_action = STAGING_REQUIRED_ACTION_FLUSH_OLDER; + r_required_action = STAGING_REQUIRED_ACTION_STALL_PREVIOUS; } } @@ -286,9 +350,8 @@ void RenderingDevice::_staging_buffer_execute_required_action(StagingRequiredAct case STAGING_REQUIRED_ACTION_NONE: { // Do nothing. } break; - case STAGING_REQUIRED_ACTION_FLUSH_CURRENT: { - // Flush EVERYTHING including setup commands. IF not immediate, also need to flush the draw commands. - _flush(true); + case STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL: { + _flush_and_stall_for_all_frames(); // Clear the whole staging buffer. for (int i = 0; i < staging_buffer_blocks.size(); i++) { @@ -299,8 +362,8 @@ void RenderingDevice::_staging_buffer_execute_required_action(StagingRequiredAct // Claim for current frame. staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn; } break; - case STAGING_REQUIRED_ACTION_FLUSH_OLDER: { - _flush(false); + case STAGING_REQUIRED_ACTION_STALL_PREVIOUS: { + _stall_for_previous_frames(); for (int i = 0; i < staging_buffer_blocks.size(); i++) { // Clear all blocks but the ones from this frame. @@ -340,7 +403,7 @@ Error RenderingDevice::_buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t return err; } - if (p_use_draw_queue && !command_buffer_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_CURRENT) { + if (p_use_draw_queue && !command_buffer_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL) { if (_buffer_make_mutable(p_buffer, p_buffer_id)) { // The buffer must be mutable to be used as a copy destination. draw_graph.add_synchronization(); @@ -507,7 +570,7 @@ Vector<uint8_t> RenderingDevice::buffer_get_data(RID p_buffer, uint32_t p_offset draw_graph.add_buffer_get_data(buffer->driver_id, buffer->draw_tracker, tmp_buffer, region); // Flush everything so memory can be safely mapped. - _flush(true); + _flush_and_stall_for_all_frames(); uint8_t *buffer_mem = driver->buffer_map(tmp_buffer); ERR_FAIL_NULL_V(buffer_mem, Vector<uint8_t>()); @@ -1078,7 +1141,7 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, required_action, false); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_CURRENT) { + if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL) { if (_texture_make_mutable(texture, p_texture)) { // The texture must be mutable to be used as a copy destination. draw_graph.add_synchronization(); @@ -1314,7 +1377,8 @@ Vector<uint8_t> RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye draw_graph.add_texture_get_data(tex->driver_id, tex->draw_tracker, tmp_buffer, command_buffer_texture_copy_regions_vector); - _flush(true); + // Flush everything so memory can be safely mapped. + _flush_and_stall_for_all_frames(); const uint8_t *read_ptr = driver->buffer_map(tmp_buffer); ERR_FAIL_NULL_V(read_ptr, Vector<uint8_t>()); @@ -2976,7 +3040,7 @@ RID RenderingDevice::render_pipeline_create(RID p_shader, FramebufferFormatID p_ p_specialization_constants); ERR_FAIL_COND_V(!pipeline.driver_id, RID()); - if (pipelines_cache_enabled) { + if (pipeline_cache_enabled) { _update_pipeline_cache(); } @@ -3053,7 +3117,7 @@ RID RenderingDevice::compute_pipeline_create(RID p_shader, const Vector<Pipeline pipeline.driver_id = driver->compute_pipeline_create(shader->driver_id, p_specialization_constants); ERR_FAIL_COND_V(!pipeline.driver_id, RID()); - if (pipelines_cache_enabled) { + if (pipeline_cache_enabled) { _update_pipeline_cache(); } @@ -3084,23 +3148,95 @@ bool RenderingDevice::compute_pipeline_is_valid(RID p_pipeline) { /**** SCREEN ****/ /****************/ +uint32_t RenderingDevice::_get_swap_chain_desired_count() const { + return MAX(2U, uint32_t(GLOBAL_GET("rendering/rendering_device/vsync/swapchain_image_count"))); +} + +Error RenderingDevice::screen_create(DisplayServer::WindowID p_screen) { + _THREAD_SAFE_METHOD_ + + RenderingContextDriver::SurfaceID surface = context->surface_get_from_window(p_screen); + ERR_FAIL_COND_V_MSG(surface == 0, ERR_CANT_CREATE, "A surface was not created for the screen."); + + HashMap<DisplayServer::WindowID, RDD::SwapChainID>::ConstIterator it = screen_swap_chains.find(p_screen); + ERR_FAIL_COND_V_MSG(it != screen_swap_chains.end(), ERR_CANT_CREATE, "A swap chain was already created for the screen."); + + RDD::SwapChainID swap_chain = driver->swap_chain_create(surface); + ERR_FAIL_COND_V_MSG(swap_chain.id == 0, ERR_CANT_CREATE, "Unable to create swap chain."); + + Error err = driver->swap_chain_resize(main_queue, swap_chain, _get_swap_chain_desired_count()); + ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Unable to resize the new swap chain."); + + screen_swap_chains[p_screen] = swap_chain; + + return OK; +} + +Error RenderingDevice::screen_prepare_for_drawing(DisplayServer::WindowID p_screen) { + _THREAD_SAFE_METHOD_ + + HashMap<DisplayServer::WindowID, RDD::SwapChainID>::ConstIterator it = screen_swap_chains.find(p_screen); + ERR_FAIL_COND_V_MSG(it == screen_swap_chains.end(), ERR_CANT_CREATE, "A swap chain was not created for the screen."); + + // Erase the framebuffer corresponding to this screen from the map in case any of the operations fail. + screen_framebuffers.erase(p_screen); + + // If this frame has already queued this swap chain for presentation, we present it and remove it from the pending list. + uint32_t to_present_index = 0; + while (to_present_index < frames[frame].swap_chains_to_present.size()) { + if (frames[frame].swap_chains_to_present[to_present_index] == it->value) { + driver->command_queue_present(present_queue, it->value, {}); + frames[frame].swap_chains_to_present.remove_at(to_present_index); + } else { + to_present_index++; + } + } + + bool resize_required = false; + RDD::FramebufferID framebuffer = driver->swap_chain_acquire_framebuffer(main_queue, it->value, resize_required); + if (resize_required) { + // Flush everything so nothing can be using the swap chain before resizing it. + _flush_and_stall_for_all_frames(); + + Error err = driver->swap_chain_resize(main_queue, it->value, _get_swap_chain_desired_count()); + if (err != OK) { + // Resize is allowed to fail silently because the window can be minimized. + return err; + } + + framebuffer = driver->swap_chain_acquire_framebuffer(main_queue, it->value, resize_required); + } + + ERR_FAIL_COND_V_MSG(framebuffer.id == 0, FAILED, "Unable to acquire framebuffer."); + + // Store the framebuffer that will be used next to draw to this screen. + screen_framebuffers[p_screen] = framebuffer; + frames[frame].swap_chains_to_present.push_back(it->value); + + return OK; +} + int RenderingDevice::screen_get_width(DisplayServer::WindowID p_screen) const { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen"); - return context->window_get_width(p_screen); + RenderingContextDriver::SurfaceID surface = context->surface_get_from_window(p_screen); + ERR_FAIL_COND_V_MSG(surface == 0, 0, "A surface was not created for the screen."); + return context->surface_get_width(surface); } int RenderingDevice::screen_get_height(DisplayServer::WindowID p_screen) const { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen"); - return context->window_get_height(p_screen); + RenderingContextDriver::SurfaceID surface = context->surface_get_from_window(p_screen); + ERR_FAIL_COND_V_MSG(surface == 0, 0, "A surface was not created for the screen."); + return context->surface_get_height(surface); } -RenderingDevice::FramebufferFormatID RenderingDevice::screen_get_framebuffer_format() const { +RenderingDevice::FramebufferFormatID RenderingDevice::screen_get_framebuffer_format(DisplayServer::WindowID p_screen) const { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen"); - DataFormat format = driver->screen_get_format(); + HashMap<DisplayServer::WindowID, RDD::SwapChainID>::ConstIterator it = screen_swap_chains.find(p_screen); + ERR_FAIL_COND_V_MSG(it == screen_swap_chains.end(), FAILED, "Screen was never prepared."); + + DataFormat format = driver->swap_chain_get_format(it->value); ERR_FAIL_COND_V(format == DATA_FORMAT_MAX, INVALID_ID); AttachmentFormat attachment; @@ -3112,33 +3248,54 @@ RenderingDevice::FramebufferFormatID RenderingDevice::screen_get_framebuffer_for return const_cast<RenderingDevice *>(this)->framebuffer_format_create(screen_attachment); } +Error RenderingDevice::screen_free(DisplayServer::WindowID p_screen) { + _THREAD_SAFE_METHOD_ + + HashMap<DisplayServer::WindowID, RDD::SwapChainID>::ConstIterator it = screen_swap_chains.find(p_screen); + ERR_FAIL_COND_V_MSG(it == screen_swap_chains.end(), FAILED, "Screen was never created."); + + // Flush everything so nothing can be using the swap chain before erasing it. + _flush_and_stall_for_all_frames(); + + const DisplayServer::WindowID screen = it->key; + const RDD::SwapChainID swap_chain = it->value; + driver->swap_chain_free(swap_chain); + screen_framebuffers.erase(screen); + screen_swap_chains.erase(screen); + + return OK; +} + /*******************/ /**** DRAW LIST ****/ /*******************/ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayServer::WindowID p_screen, const Color &p_clear_color) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen"); ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); - if (!context->window_is_valid_swapchain(p_screen)) { - return INVALID_ID; - } + RenderingContextDriver::SurfaceID surface = context->surface_get_from_window(p_screen); + HashMap<DisplayServer::WindowID, RDD::SwapChainID>::ConstIterator sc_it = screen_swap_chains.find(p_screen); + HashMap<DisplayServer::WindowID, RDD::FramebufferID>::ConstIterator fb_it = screen_framebuffers.find(p_screen); + ERR_FAIL_COND_V_MSG(surface == 0, 0, "A surface was not created for the screen."); + ERR_FAIL_COND_V_MSG(sc_it == screen_swap_chains.end(), INVALID_ID, "Screen was never prepared."); + ERR_FAIL_COND_V_MSG(fb_it == screen_framebuffers.end(), INVALID_ID, "Framebuffer was never prepared."); - Rect2i viewport = Rect2i(0, 0, context->window_get_width(p_screen), context->window_get_height(p_screen)); + Rect2i viewport = Rect2i(0, 0, context->surface_get_width(surface), context->surface_get_height(surface)); _draw_list_allocate(viewport, 0); #ifdef DEBUG_ENABLED - draw_list_framebuffer_format = screen_get_framebuffer_format(); + draw_list_framebuffer_format = screen_get_framebuffer_format(p_screen); #endif draw_list_subpass_count = 1; RDD::RenderPassClearValue clear_value; clear_value.color = p_clear_color; - draw_graph.add_draw_list_begin(context->window_get_render_pass(p_screen), context->window_get_framebuffer(p_screen), viewport, clear_value, true, false); + RDD::RenderPassID render_pass = driver->swap_chain_get_render_pass(sc_it->value); + draw_graph.add_draw_list_begin(render_pass, fb_it->value, viewport, clear_value, true, false); _draw_list_set_viewport(viewport); _draw_list_set_scissor(viewport); @@ -3775,7 +3932,6 @@ Error RenderingDevice::_draw_list_allocate(const Rect2i &p_viewport, uint32_t p_ draw_list = memnew(DrawList); draw_list->viewport = p_viewport; - draw_list_count = 0; return OK; } @@ -4533,130 +4689,50 @@ void RenderingDevice::draw_command_end_label() { } String RenderingDevice::get_device_vendor_name() const { - return context->get_device_vendor_name(); + return _get_device_vendor_name(device); } String RenderingDevice::get_device_name() const { - return context->get_device_name(); + return device.name; } RenderingDevice::DeviceType RenderingDevice::get_device_type() const { - return context->get_device_type(); + return DeviceType(device.type); } -String RenderingDevice::get_device_api_version() const { - return context->get_device_api_version(); -} - -String RenderingDevice::get_device_pipeline_cache_uuid() const { - return context->get_device_pipeline_cache_uuid(); +String RenderingDevice::get_device_api_name() const { + return driver->get_api_name(); } -void RenderingDevice::_finalize_command_buffers(bool p_postpare) { - if (draw_list) { - ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); - } - - if (compute_list) { - ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); - } - - { // Complete the setup buffer (that needs to be processed before anything else). - draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); - - if (p_postpare) { - context->postpare_buffers(frames[frame].draw_command_buffer); - } - - driver->end_segment(); - driver->command_buffer_end(frames[frame].setup_command_buffer); - driver->command_buffer_end(frames[frame].draw_command_buffer); - } +String RenderingDevice::get_device_api_version() const { + return driver->get_api_version(); } -void RenderingDevice::_begin_frame() { - draw_graph.begin(); - - // Erase pending resources. - _free_pending_resources(frame); - - // Create setup command buffer and set as the setup buffer. - - { - bool ok = driver->command_buffer_begin(frames[frame].setup_command_buffer); - ERR_FAIL_COND(!ok); - ok = driver->command_buffer_begin(frames[frame].draw_command_buffer); - ERR_FAIL_COND(!ok); - - if (local_device.is_null()) { - context->append_command_buffer(frames[frame].draw_command_buffer); - context->set_setup_buffer(frames[frame].setup_command_buffer); // Append now so it's added before everything else. - } - - driver->begin_segment(frames[frame].draw_command_buffer, frame, frames_drawn); - } - - // Advance current frame. - frames_drawn++; - // Advance staging buffer if used. - if (staging_buffer_used) { - staging_buffer_current = (staging_buffer_current + 1) % staging_buffer_blocks.size(); - staging_buffer_used = false; - } - - if (frames[frame].timestamp_count) { - driver->timestamp_query_pool_get_results(frames[frame].timestamp_pool, frames[frame].timestamp_count, frames[frame].timestamp_result_values.ptr()); - driver->command_timestamp_query_pool_reset(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, frames[frame].timestamp_count); - SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); - SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); - } - - frames[frame].timestamp_result_count = frames[frame].timestamp_count; - frames[frame].timestamp_count = 0; - frames[frame].index = Engine::get_singleton()->get_frames_drawn(); +String RenderingDevice::get_device_pipeline_cache_uuid() const { + return driver->get_pipeline_cache_uuid(); } void RenderingDevice::swap_buffers() { - ERR_FAIL_COND_MSG(local_device.is_valid(), "Local devices can't swap buffers."); _THREAD_SAFE_METHOD_ - _finalize_command_buffers(true); - - // Swap buffers. - if (!screen_prepared) { - context->flush(true, true, false); - } else { - screen_prepared = false; - context->swap_buffers(); - } - - frame = (frame + 1) % frame_count; + _end_frame(); + _execute_frame(true); + _present_frame(); + // Advance to the next frame and begin recording again. + frame = (frame + 1) % frames.size(); _begin_frame(); } void RenderingDevice::submit() { _THREAD_SAFE_METHOD_ - - ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync."); - ERR_FAIL_COND_MSG(local_device_processing, "device already submitted, call sync to wait until done."); - - _finalize_command_buffers(false); - - RDD::CommandBufferID command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer }; - context->local_device_push_command_buffers(local_device, command_buffers, 2); - local_device_processing = true; + _end_frame(); + _execute_frame(false); } void RenderingDevice::sync() { _THREAD_SAFE_METHOD_ - - ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync."); - ERR_FAIL_COND_MSG(!local_device_processing, "sync can only be called after a submit"); - - context->local_device_sync(local_device); _begin_frame(); - local_device_processing = false; } void RenderingDevice::_free_pending_resources(int p_frame) { @@ -4741,14 +4817,8 @@ void RenderingDevice::_free_pending_resources(int p_frame) { } } -void RenderingDevice::prepare_screen_for_drawing() { - _THREAD_SAFE_METHOD_ - context->prepare_buffers(frames[frame].draw_command_buffer); - screen_prepared = true; -} - uint32_t RenderingDevice::get_frame_delay() const { - return frame_count; + return frames.size(); } uint64_t RenderingDevice::get_memory_usage(MemoryType p_type) const { @@ -4769,113 +4839,239 @@ uint64_t RenderingDevice::get_memory_usage(MemoryType p_type) const { } } -void RenderingDevice::_flush(bool p_current_frame) { - if (local_device.is_valid() && !p_current_frame) { - return; // Flushing previous frames has no effect with local device. +void RenderingDevice::_begin_frame() { + // Before beginning this frame, wait on the fence if it was signaled to make sure its work is finished. + if (frames[frame].draw_fence_signaled) { + driver->fence_wait(frames[frame].draw_fence); + frames[frame].draw_fence_signaled = false; + } + + // Begin recording on the frame's command buffers. + driver->begin_segment(frame, frames_drawn++); + driver->command_buffer_begin(frames[frame].setup_command_buffer); + driver->command_buffer_begin(frames[frame].draw_command_buffer); + + // Reset the graph. + draw_graph.begin(); + + // Erase pending resources. + _free_pending_resources(frame); + + // Advance staging buffer if used. + if (staging_buffer_used) { + staging_buffer_current = (staging_buffer_current + 1) % staging_buffer_blocks.size(); + staging_buffer_used = false; + } + + if (frames[frame].timestamp_count) { + driver->timestamp_query_pool_get_results(frames[frame].timestamp_pool, frames[frame].timestamp_count, frames[frame].timestamp_result_values.ptr()); + driver->command_timestamp_query_pool_reset(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, frames[frame].timestamp_count); + SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); + SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); + } + + frames[frame].timestamp_result_count = frames[frame].timestamp_count; + frames[frame].timestamp_count = 0; + frames[frame].index = Engine::get_singleton()->get_frames_drawn(); +} + +void RenderingDevice::_end_frame() { + if (draw_list) { + ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); } - // Not doing this crashes RADV (undefined behavior). - if (p_current_frame) { - draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); - driver->end_segment(); - driver->command_buffer_end(frames[frame].setup_command_buffer); - driver->command_buffer_end(frames[frame].draw_command_buffer); - draw_graph.begin(); + if (compute_list) { + ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); } - if (local_device.is_valid()) { - RDD::CommandBufferID command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer }; - context->local_device_push_command_buffers(local_device, command_buffers, 2); - context->local_device_sync(local_device); + draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); + driver->command_buffer_end(frames[frame].setup_command_buffer); + driver->command_buffer_end(frames[frame].draw_command_buffer); + driver->end_segment(); +} - bool ok = driver->command_buffer_begin(frames[frame].setup_command_buffer); - ERR_FAIL_COND(!ok); - ok = driver->command_buffer_begin(frames[frame].draw_command_buffer); - ERR_FAIL_COND(!ok); +void RenderingDevice::_execute_frame(bool p_signal_for_present) { + const bool frame_can_present = !frames[frame].swap_chains_to_present.is_empty(); + const VectorView<RDD::SemaphoreID> execute_draw_semaphore = p_signal_for_present && frame_can_present ? frames[frame].draw_semaphore : VectorView<RDD::SemaphoreID>(); + driver->command_queue_execute(main_queue, frames[frame].setup_command_buffer, {}, frames[frame].setup_semaphore, {}); + driver->command_queue_execute(main_queue, frames[frame].draw_command_buffer, frames[frame].setup_semaphore, execute_draw_semaphore, frames[frame].draw_fence); + frames[frame].draw_fence_signaled = true; +} - driver->begin_segment(frames[frame].draw_command_buffer, frame, frames_drawn); - } else { - context->flush(p_current_frame, p_current_frame); - // Re-create the setup command. - if (p_current_frame) { - bool ok = driver->command_buffer_begin(frames[frame].setup_command_buffer); - ERR_FAIL_COND(!ok); - - context->set_setup_buffer(frames[frame].setup_command_buffer); // Append now so it's added before everything else. - ok = driver->command_buffer_begin(frames[frame].draw_command_buffer); - ERR_FAIL_COND(!ok); - context->append_command_buffer(frames[frame].draw_command_buffer); - - driver->begin_segment(frames[frame].draw_command_buffer, frame, frames_drawn); +void RenderingDevice::_present_frame() { + if (!frames[frame].swap_chains_to_present.is_empty()) { + driver->command_queue_present(present_queue, frames[frame].swap_chains_to_present, frames[frame].draw_semaphore); + frames[frame].swap_chains_to_present.clear(); + } +} + +void RenderingDevice::_stall_for_previous_frames() { + for (uint32_t i = 0; i < frames.size(); i++) { + if (frames[i].draw_fence_signaled) { + driver->fence_wait(frames[i].draw_fence); + frames[i].draw_fence_signaled = false; } } } -void RenderingDevice::initialize(ApiContextRD *p_context, bool p_local_device) { +void RenderingDevice::_flush_and_stall_for_all_frames() { + _stall_for_previous_frames(); + _end_frame(); + _execute_frame(false); + _begin_frame(); +} + +Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServer::WindowID p_main_window) { + Error err; + + RenderingContextDriver::SurfaceID main_surface = 0; + const bool main_instance = (singleton == this) && (p_main_window != DisplayServer::INVALID_WINDOW_ID); + if (p_main_window != DisplayServer::INVALID_WINDOW_ID) { + // Retrieve the surface from the main window if it was specified. + main_surface = p_context->surface_get_from_window(p_main_window); + ERR_FAIL_COND_V(main_surface == 0, FAILED); + } + context = p_context; + driver = context->driver_create(); + + print_verbose("Devices:"); + int32_t device_index = Engine::get_singleton()->get_gpu_index(); + const uint32_t device_count = context->device_get_count(); + const bool detect_device = (device_index < 0) || (device_index >= int32_t(device_count)); + uint32_t device_type_score = 0; + for (uint32_t i = 0; i < device_count; i++) { + RenderingContextDriver::Device device_option = context->device_get(i); + String name = device_option.name; + String vendor = _get_device_vendor_name(device_option); + String type = _get_device_type_name(device_option); + bool present_supported = main_surface != 0 ? context->device_supports_present(i, main_surface) : false; + print_verbose(" #" + itos(i) + ": " + vendor + " " + name + " - " + (present_supported ? "Supported" : "Unsupported") + ", " + type); + if (detect_device && (present_supported || main_surface == 0)) { + // If a window was specified, present must be supported by the device to be available as an option. + // Assign a score for each type of device and prefer the device with the higher score. + uint32_t option_score = _get_device_type_score(device_option); + if (option_score > device_type_score) { + device_index = i; + device_type_score = option_score; + } + } + } - device_capabilities = p_context->get_device_capabilities(); + ERR_FAIL_COND_V_MSG((device_index < 0) || (device_index >= int32_t(device_count)), ERR_CANT_CREATE, "None of the devices supports both graphics and present queues."); - if (p_local_device) { - frame_count = 1; - local_device = context->local_device_create(); - } else { - frame_count = context->get_swapchain_image_count() + 1; // Always need one extra to ensure it's unused at any time, without having to use a fence for this. + uint32_t frame_count = 1; + if (main_surface != 0) { + frame_count = MAX(2U, uint32_t(GLOBAL_GET("rendering/rendering_device/vsync/frame_queue_size"))); } - driver = context->get_driver(local_device); - max_timestamp_query_elements = 256; - frames.resize(frame_count); frame = 0; - // Create setup and frame buffers. - for (int i = 0; i < frame_count; i++) { - frames[i].index = 0; - - // Create command pool, one per frame is recommended. - frames[i].command_pool = driver->command_pool_create(RDD::COMMAND_BUFFER_TYPE_PRIMARY); - ERR_FAIL_COND(!frames[i].command_pool); + frames.resize(frame_count); + max_timestamp_query_elements = 256; - // Create command buffers. - frames[i].setup_command_buffer = driver->command_buffer_create(RDD::COMMAND_BUFFER_TYPE_PRIMARY, frames[i].command_pool); - ERR_CONTINUE(!frames[i].setup_command_buffer); - frames[i].draw_command_buffer = driver->command_buffer_create(RDD::COMMAND_BUFFER_TYPE_PRIMARY, frames[i].command_pool); - ERR_CONTINUE(!frames[i].draw_command_buffer); + device = context->device_get(device_index); + err = driver->initialize(device_index, frame_count); + ERR_FAIL_COND_V_MSG(err != OK, FAILED, "Failed to initialize driver for device."); - { - // Create query pool. - frames[i].timestamp_pool = driver->timestamp_query_pool_create(max_timestamp_query_elements); - frames[i].timestamp_names.resize(max_timestamp_query_elements); - frames[i].timestamp_cpu_values.resize(max_timestamp_query_elements); - frames[i].timestamp_count = 0; - frames[i].timestamp_result_names.resize(max_timestamp_query_elements); - frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements); - frames[i].timestamp_result_values.resize(max_timestamp_query_elements); - frames[i].timestamp_result_count = 0; + if (main_instance) { + // Only the singleton instance with a display should print this information. + String rendering_method; + if (OS::get_singleton()->get_current_rendering_method() == "mobile") { + rendering_method = "Forward Mobile"; + } else { + rendering_method = "Forward+"; } + + // Output our device version. + print_line(vformat("%s %s - %s - Using Device #%d: %s - %s", get_device_api_name(), get_device_api_version(), rendering_method, device_index, _get_device_vendor_name(device), device.name)); } + // Pick the main queue family. It is worth noting we explicitly do not request the transfer bit, as apparently the specification defines + // that the existence of either the graphics or compute bit implies that the queue can also do transfer operations, but it is optional + // to indicate whether it supports them or not with the dedicated transfer bit if either is set. + BitField<RDD::CommandQueueFamilyBits> main_queue_bits; + main_queue_bits.set_flag(RDD::COMMAND_QUEUE_FAMILY_GRAPHICS_BIT); + main_queue_bits.set_flag(RDD::COMMAND_QUEUE_FAMILY_COMPUTE_BIT); + +#if !FORCE_SEPARATE_PRESENT_QUEUE + // Needing to use a separate queue for presentation is an edge case that remains to be seen what hardware triggers it at all. + main_queue_family = driver->command_queue_family_get(main_queue_bits, main_surface); + if (!main_queue_family && (main_surface != 0)) +#endif { - // Begin the first command buffer for the first frame, so - // setting up things can be done in the meantime until swap_buffers(), which is called before advance. - bool ok = driver->command_buffer_begin(frames[0].setup_command_buffer); - ERR_FAIL_COND(!ok); - - ok = driver->command_buffer_begin(frames[0].draw_command_buffer); - ERR_FAIL_COND(!ok); - if (local_device.is_null()) { - context->set_setup_buffer(frames[0].setup_command_buffer); // Append now so it's added before everything else. - context->append_command_buffer(frames[0].draw_command_buffer); - } + // If it was not possible to find a main queue that supports the surface, we attempt to get two different queues instead. + main_queue_family = driver->command_queue_family_get(main_queue_bits); + present_queue_family = driver->command_queue_family_get(BitField<RDD::CommandQueueFamilyBits>(), main_surface); + ERR_FAIL_COND_V(!present_queue_family, FAILED); } - for (int i = 0; i < frame_count; i++) { - // Reset all queries in a query pool before doing any operations with them. + ERR_FAIL_COND_V(!main_queue_family, FAILED); + + // Create the main queue. + main_queue = driver->command_queue_create(main_queue_family, true); + ERR_FAIL_COND_V(!main_queue, FAILED); + + if (present_queue_family) { + // Create the presentation queue. + present_queue = driver->command_queue_create(present_queue_family); + ERR_FAIL_COND_V(!present_queue, FAILED); + } else { + present_queue = main_queue; + } + + // Create data for all the frames. + for (uint32_t i = 0; i < frames.size(); i++) { + frames[i].index = 0; + + // Create command pool, command buffers, semaphores and fences. + frames[i].command_pool = driver->command_pool_create(main_queue_family, RDD::COMMAND_BUFFER_TYPE_PRIMARY); + ERR_FAIL_COND_V(!frames[i].command_pool, FAILED); + frames[i].setup_command_buffer = driver->command_buffer_create(frames[i].command_pool); + ERR_FAIL_COND_V(!frames[i].setup_command_buffer, FAILED); + frames[i].draw_command_buffer = driver->command_buffer_create(frames[i].command_pool); + ERR_FAIL_COND_V(!frames[i].draw_command_buffer, FAILED); + frames[i].setup_semaphore = driver->semaphore_create(); + ERR_FAIL_COND_V(!frames[i].setup_semaphore, FAILED); + frames[i].draw_semaphore = driver->semaphore_create(); + ERR_FAIL_COND_V(!frames[i].draw_semaphore, FAILED); + frames[i].draw_fence = driver->fence_create(); + ERR_FAIL_COND_V(!frames[i].draw_fence, FAILED); + frames[i].draw_fence_signaled = false; + + // Create query pool. + frames[i].timestamp_pool = driver->timestamp_query_pool_create(max_timestamp_query_elements); + frames[i].timestamp_names.resize(max_timestamp_query_elements); + frames[i].timestamp_cpu_values.resize(max_timestamp_query_elements); + frames[i].timestamp_count = 0; + frames[i].timestamp_result_names.resize(max_timestamp_query_elements); + frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements); + frames[i].timestamp_result_values.resize(max_timestamp_query_elements); + frames[i].timestamp_result_count = 0; + } + + // Start from frame count, so everything else is immediately old. + frames_drawn = frames.size(); + + // Initialize recording on the first frame. + driver->begin_segment(frame, frames_drawn++); + driver->command_buffer_begin(frames[0].setup_command_buffer); + driver->command_buffer_begin(frames[0].draw_command_buffer); + + // Create draw graph and start it initialized as well. + draw_graph.initialize(driver, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME); + draw_graph.begin(); + + for (uint32_t i = 0; i < frames.size(); i++) { + // Reset all queries in a query pool before doing any operations with them.. driver->command_timestamp_query_pool_reset(frames[0].setup_command_buffer, frames[i].timestamp_pool, max_timestamp_query_elements); } + // Convert block size from KB. staging_buffer_block_size = GLOBAL_GET("rendering/rendering_device/staging_buffer/block_size_kb"); staging_buffer_block_size = MAX(4u, staging_buffer_block_size); - staging_buffer_block_size *= 1024; // Kb -> bytes. + staging_buffer_block_size *= 1024; + + // Convert staging buffer size from MB. staging_buffer_max_size = GLOBAL_GET("rendering/rendering_device/staging_buffer/max_size_mb"); staging_buffer_max_size = MAX(1u, staging_buffer_max_size); staging_buffer_max_size *= 1024 * 1024; @@ -4884,49 +5080,50 @@ void RenderingDevice::initialize(ApiContextRD *p_context, bool p_local_device) { // Validate enough blocks. staging_buffer_max_size = staging_buffer_block_size * 4; } + texture_upload_region_size_px = GLOBAL_GET("rendering/rendering_device/staging_buffer/texture_upload_region_size_px"); texture_upload_region_size_px = nearest_power_of_2_templated(texture_upload_region_size_px); - frames_drawn = frame_count; // Start from frame count, so everything else is immediately old. - // Ensure current staging block is valid and at least one per frame exists. staging_buffer_current = 0; staging_buffer_used = false; - for (int i = 0; i < frame_count; i++) { + for (uint32_t i = 0; i < frames.size(); i++) { // Staging was never used, create a block. - Error err = _insert_staging_block(); + err = _insert_staging_block(); ERR_CONTINUE(err != OK); } draw_list = nullptr; - draw_list_count = 0; - compute_list = nullptr; - pipelines_cache_file_path = "user://vulkan/pipelines"; - pipelines_cache_file_path += "." + context->get_device_name().validate_filename().replace(" ", "_").to_lower(); - if (Engine::get_singleton()->is_editor_hint()) { - pipelines_cache_file_path += ".editor"; - } - pipelines_cache_file_path += ".cache"; + if (main_instance) { + // Only the instance that is not a local device and is also the singleton is allowed to manage a pipeline cache. + pipeline_cache_file_path = "user://vulkan/pipelines"; + pipeline_cache_file_path += "." + device.name.validate_filename().replace(" ", "_").to_lower(); + if (Engine::get_singleton()->is_editor_hint()) { + pipeline_cache_file_path += ".editor"; + } + + pipeline_cache_file_path += ".cache"; - Vector<uint8_t> cache_data = _load_pipeline_cache(); - pipelines_cache_enabled = driver->pipeline_cache_create(cache_data); - if (pipelines_cache_enabled) { - pipelines_cache_size = driver->pipeline_cache_query_size(); - print_verbose(vformat("Startup PSO cache (%.1f MiB)", pipelines_cache_size / (1024.0f * 1024.0f))); + Vector<uint8_t> cache_data = _load_pipeline_cache(); + pipeline_cache_enabled = driver->pipeline_cache_create(cache_data); + if (pipeline_cache_enabled) { + pipeline_cache_size = driver->pipeline_cache_query_size(); + print_verbose(vformat("Startup PSO cache (%.1f MiB)", pipeline_cache_size / (1024.0f * 1024.0f))); + } } - draw_graph.initialize(driver, frame_count, SECONDARY_COMMAND_BUFFERS_PER_FRAME); + return OK; } Vector<uint8_t> RenderingDevice::_load_pipeline_cache() { - DirAccess::make_dir_recursive_absolute(pipelines_cache_file_path.get_base_dir()); + DirAccess::make_dir_recursive_absolute(pipeline_cache_file_path.get_base_dir()); - if (FileAccess::exists(pipelines_cache_file_path)) { + if (FileAccess::exists(pipeline_cache_file_path)) { Error file_error; - Vector<uint8_t> file_data = FileAccess::get_file_as_bytes(pipelines_cache_file_path, &file_error); + Vector<uint8_t> file_data = FileAccess::get_file_as_bytes(pipeline_cache_file_path, &file_error); return file_data; } else { return Vector<uint8_t>(); @@ -4935,11 +5132,11 @@ Vector<uint8_t> RenderingDevice::_load_pipeline_cache() { void RenderingDevice::_update_pipeline_cache(bool p_closing) { { - bool still_saving = pipelines_cache_save_task != WorkerThreadPool::INVALID_TASK_ID && !WorkerThreadPool::get_singleton()->is_task_completed(pipelines_cache_save_task); + bool still_saving = pipeline_cache_save_task != WorkerThreadPool::INVALID_TASK_ID && !WorkerThreadPool::get_singleton()->is_task_completed(pipeline_cache_save_task); if (still_saving) { if (p_closing) { - WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task); - pipelines_cache_save_task = WorkerThreadPool::INVALID_TASK_ID; + WorkerThreadPool::get_singleton()->wait_for_task_completion(pipeline_cache_save_task); + pipeline_cache_save_task = WorkerThreadPool::INVALID_TASK_ID; } else { // We can't save until the currently running save is done. We'll retry next time; worst case, we'll save when exiting. return; @@ -4950,7 +5147,7 @@ void RenderingDevice::_update_pipeline_cache(bool p_closing) { { size_t new_pipelines_cache_size = driver->pipeline_cache_query_size(); ERR_FAIL_COND(!new_pipelines_cache_size); - size_t difference = new_pipelines_cache_size - pipelines_cache_size; + size_t difference = new_pipelines_cache_size - pipeline_cache_size; bool must_save = false; @@ -4962,7 +5159,7 @@ void RenderingDevice::_update_pipeline_cache(bool p_closing) { } if (must_save) { - pipelines_cache_size = new_pipelines_cache_size; + pipeline_cache_size = new_pipelines_cache_size; } else { return; } @@ -4971,7 +5168,7 @@ void RenderingDevice::_update_pipeline_cache(bool p_closing) { if (p_closing) { _save_pipeline_cache(this); } else { - pipelines_cache_save_task = WorkerThreadPool::get_singleton()->add_native_task(&_save_pipeline_cache, this, false, "PipelineCacheSave"); + pipeline_cache_save_task = WorkerThreadPool::get_singleton()->add_native_task(&_save_pipeline_cache, this, false, "PipelineCacheSave"); } } @@ -4987,7 +5184,7 @@ void RenderingDevice::_save_pipeline_cache(void *p_data) { } print_verbose(vformat("Updated PSO cache (%.1f MiB)", cache_blob.size() / (1024.0f * 1024.0f))); - Ref<FileAccess> f = FileAccess::open(self->pipelines_cache_file_path, FileAccess::WRITE, nullptr); + Ref<FileAccess> f = FileAccess::open(self->pipeline_cache_file_path, FileAccess::WRITE, nullptr); if (f.is_valid()) { f->store_buffer(cache_blob); } @@ -5122,10 +5319,15 @@ uint64_t RenderingDevice::limit_get(Limit p_limit) const { } void RenderingDevice::finalize() { - // Free all resources. + if (!frames.is_empty()) { + // Wait for all frames to have finished rendering. + _flush_and_stall_for_all_frames(); + } - _flush(false); + // Delete everything the graph has created. + draw_graph.finalize(); + // Free all resources. _free_rids(render_pipeline_owner, "Pipeline"); _free_rids(compute_pipeline_owner, "Compute"); _free_rids(uniform_set_owner, "UniformSet"); @@ -5181,9 +5383,12 @@ void RenderingDevice::finalize() { _free_pending_resources(f); driver->command_pool_free(frames[i].command_pool); driver->timestamp_query_pool_free(frames[i].timestamp_pool); + driver->semaphore_free(frames[i].setup_semaphore); + driver->semaphore_free(frames[i].draw_semaphore); + driver->fence_free(frames[i].draw_fence); } - if (pipelines_cache_enabled) { + if (pipeline_cache_enabled) { _update_pipeline_cache(true); driver->pipeline_cache_free(); } @@ -5205,6 +5410,34 @@ void RenderingDevice::finalize() { } framebuffer_formats.clear(); + // Delete the swap chains created for the screens. + for (const KeyValue<DisplayServer::WindowID, RDD::SwapChainID> &it : screen_swap_chains) { + driver->swap_chain_free(it.value); + } + + screen_swap_chains.clear(); + + // Delete the command queues. + if (present_queue) { + if (main_queue != present_queue) { + // Only delete the present queue if it's unique. + driver->command_queue_free(present_queue); + } + + present_queue = RDD::CommandQueueID(); + } + + if (main_queue) { + driver->command_queue_free(main_queue); + main_queue = RDD::CommandQueueID(); + } + + // Delete the driver once everything else has been deleted. + if (driver != nullptr) { + context->driver_free(driver); + driver = nullptr; + } + // All these should be clear at this point. ERR_FAIL_COND(dependency_map.size()); ERR_FAIL_COND(reverse_dependency_map.size()); @@ -5212,7 +5445,7 @@ void RenderingDevice::finalize() { RenderingDevice *RenderingDevice::create_local_device() { RenderingDevice *rd = memnew(RenderingDevice); - rd->initialize(context, true); + rd->initialize(context); return rd; } @@ -5291,7 +5524,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("screen_get_width", "screen"), &RenderingDevice::screen_get_width, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_height", "screen"), &RenderingDevice::screen_get_height, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); - ClassDB::bind_method(D_METHOD("screen_get_framebuffer_format"), &RenderingDevice::screen_get_framebuffer_format); + ClassDB::bind_method(D_METHOD("screen_get_framebuffer_format", "screen"), &RenderingDevice::screen_get_framebuffer_format, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("draw_list_begin_for_screen", "screen", "clear_color"), &RenderingDevice::draw_list_begin_for_screen, DEFVAL(DisplayServer::MAIN_WINDOW_ID), DEFVAL(Color())); @@ -5889,17 +6122,15 @@ void RenderingDevice::_bind_methods() { } RenderingDevice::~RenderingDevice() { - if (local_device.is_valid()) { - finalize(); - context->local_device_free(local_device); - } + finalize(); + if (singleton == this) { singleton = nullptr; } } RenderingDevice::RenderingDevice() { - if (singleton == nullptr) { // there may be more rendering devices later + if (singleton == nullptr) { singleton = this; } } diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 2ccef66308..f4b7683d6e 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -64,36 +64,11 @@ class RenderingDevice : public RenderingDeviceCommons { _THREAD_SAFE_CLASS_ public: - enum DeviceFamily { - DEVICE_UNKNOWN, - DEVICE_OPENGL, - DEVICE_VULKAN, - DEVICE_DIRECTX, - }; - enum ShaderLanguage { SHADER_LANGUAGE_GLSL, SHADER_LANGUAGE_HLSL }; - enum SubgroupOperations { - SUBGROUP_BASIC_BIT = 1, - SUBGROUP_VOTE_BIT = 2, - SUBGROUP_ARITHMETIC_BIT = 4, - SUBGROUP_BALLOT_BIT = 8, - SUBGROUP_SHUFFLE_BIT = 16, - SUBGROUP_SHUFFLE_RELATIVE_BIT = 32, - SUBGROUP_CLUSTERED_BIT = 64, - SUBGROUP_QUAD_BIT = 128, - }; - - struct Capabilities { - // main device info - DeviceFamily device_family = DEVICE_UNKNOWN; - uint32_t version_major = 1; - uint32_t version_minor = 0; - }; - typedef int64_t DrawListID; typedef int64_t ComputeListID; @@ -110,9 +85,9 @@ private: static RenderingDevice *singleton; - Capabilities device_capabilities; - - RenderingDeviceDriver *driver = nullptr; // Owned by the context. + RenderingContextDriver *context = nullptr; + RenderingDeviceDriver *driver = nullptr; + RenderingContextDriver::Device device; protected: static void _bind_methods(); @@ -188,8 +163,8 @@ private: enum StagingRequiredAction { STAGING_REQUIRED_ACTION_NONE, - STAGING_REQUIRED_ACTION_FLUSH_CURRENT, - STAGING_REQUIRED_ACTION_FLUSH_OLDER + STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL, + STAGING_REQUIRED_ACTION_STALL_PREVIOUS }; Error _staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, StagingRequiredAction &r_required_action, bool p_can_segment = true); @@ -819,12 +794,11 @@ private: Error _texture_copy_bind_compat_84976(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField<BarrierMask> p_post_barrier); Error _texture_clear_bind_compat_84976(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField<BarrierMask> p_post_barrier); Error _texture_resolve_multisample_bind_compat_84976(RID p_from_texture, RID p_to_texture, BitField<BarrierMask> p_post_barrier); + FramebufferFormatID _screen_get_framebuffer_format_bind_compat_87340() const; #endif public: - ApiContextRD *get_context() const { return context; } - - const Capabilities *get_device_capabilities() const { return &device_capabilities; }; + const RDD::Capabilities &get_device_capabilities() const { return driver->get_capabilities(); } bool has_feature(const Features p_feature) const; @@ -996,10 +970,10 @@ private: RID_Owner<RenderPipeline> render_pipeline_owner; - bool pipelines_cache_enabled = false; - size_t pipelines_cache_size = 0; - String pipelines_cache_file_path; - WorkerThreadPool::TaskID pipelines_cache_save_task = WorkerThreadPool::INVALID_TASK_ID; + bool pipeline_cache_enabled = false; + size_t pipeline_cache_size = 0; + String pipeline_cache_file_path; + WorkerThreadPool::TaskID pipeline_cache_save_task = WorkerThreadPool::INVALID_TASK_ID; Vector<uint8_t> _load_pipeline_cache(); void _update_pipeline_cache(bool p_closing = false); @@ -1024,13 +998,22 @@ public: RID compute_pipeline_create(RID p_shader, const Vector<PipelineSpecializationConstant> &p_specialization_constants = Vector<PipelineSpecializationConstant>()); bool compute_pipeline_is_valid(RID p_pipeline); +private: /****************/ /**** SCREEN ****/ /****************/ + HashMap<DisplayServer::WindowID, RDD::SwapChainID> screen_swap_chains; + HashMap<DisplayServer::WindowID, RDD::FramebufferID> screen_framebuffers; - int screen_get_width(DisplayServer::WindowID p_screen = 0) const; - int screen_get_height(DisplayServer::WindowID p_screen = 0) const; - FramebufferFormatID screen_get_framebuffer_format() const; + uint32_t _get_swap_chain_desired_count() const; + +public: + Error screen_create(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID); + Error screen_prepare_for_drawing(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID); + int screen_get_width(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID) const; + int screen_get_height(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID) const; + FramebufferFormatID screen_get_framebuffer_format(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID) const; + Error screen_free(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID); /*************************/ /**** DRAW LISTS (II) ****/ @@ -1101,7 +1084,6 @@ private: DrawList *draw_list = nullptr; uint32_t draw_list_subpass_count = 0; - uint32_t draw_list_count = 0; RDD::RenderPassID draw_list_render_pass; RDD::FramebufferID draw_list_vkframebuffer; #ifdef DEBUG_ENABLED @@ -1215,6 +1197,15 @@ private: RenderingDeviceGraph draw_graph; /**************************/ + /**** QUEUE MANAGEMENT ****/ + /**************************/ + + RDD::CommandQueueFamilyID main_queue_family; + RDD::CommandQueueFamilyID present_queue_family; + RDD::CommandQueueID main_queue; + RDD::CommandQueueID present_queue; + + /**************************/ /**** FRAME MANAGEMENT ****/ /**************************/ @@ -1232,7 +1223,6 @@ private: // nature of the GPU. They will get deleted // when the frame is cycled. -private: struct Frame { // List in usage order, from last to free to first to free. List<Buffer> buffers_to_dispose_of; @@ -1245,13 +1235,30 @@ private: List<ComputePipeline> compute_pipelines_to_dispose_of; RDD::CommandPoolID command_pool; + + // Used at the beginning of every frame for set-up. // Used for filling up newly created buffers with data provided on creation. // Primarily intended to be accessed by worker threads. - // Ideally this cmd buffer should use an async transfer queue. - RDD::CommandBufferID setup_command_buffer; // Used at the beginning of every frame for set-up. - // The main cmd buffer for drawing and compute. + // Ideally this command buffer should use an async transfer queue. + RDD::CommandBufferID setup_command_buffer; + + // The main command buffer for drawing and compute. // Primarily intended to be used by the main thread to do most stuff. - RDD::CommandBufferID draw_command_buffer; // Used at the beginning of every frame for set-up. + RDD::CommandBufferID draw_command_buffer; + + // Signaled by the setup submission. Draw must wait on this semaphore. + RDD::SemaphoreID setup_semaphore; + + // Signaled by the draw submission. Present must wait on this semaphore. + RDD::SemaphoreID draw_semaphore; + + // Signaled by the draw submission. Must wait on this fence before beginning + // command recording for the frame. + RDD::FenceID draw_fence; + bool draw_fence_signaled = false; + + // Swap chains prepared for drawing during the frame that must be presented. + LocalVector<RDD::SwapChainID> swap_chains_to_present; struct Timestamp { String description; @@ -1272,37 +1279,32 @@ private: uint32_t max_timestamp_query_elements = 0; - TightLocalVector<Frame> frames; // Frames available, for main device they are cycled (usually 3), for local devices only 1. - int frame = 0; // Current frame. - int frame_count = 0; // Total amount of frames. + int frame = 0; + TightLocalVector<Frame> frames; uint64_t frames_drawn = 0; - RID local_device; - bool local_device_processing = false; void _free_pending_resources(int p_frame); - ApiContextRD *context = nullptr; - uint64_t texture_memory = 0; uint64_t buffer_memory = 0; void _free_internal(RID p_id); - void _flush(bool p_current_frame); - - bool screen_prepared = false; + void _begin_frame(); + void _end_frame(); + void _execute_frame(bool p_signal_for_present); + void _present_frame(); + void _stall_for_previous_frames(); + void _flush_and_stall_for_all_frames(); template <class T> void _free_rids(T &p_owner, const char *p_type); - void _finalize_command_buffers(bool p_postpare); - void _begin_frame(); - #ifdef DEV_ENABLED HashMap<RID, String> resource_names; #endif public: - void initialize(ApiContextRD *p_context, bool p_local_device = false); + Error initialize(RenderingContextDriver *p_context, DisplayServer::WindowID p_main_window = DisplayServer::INVALID_WINDOW_ID); void finalize(); void free(RID p_id); @@ -1324,9 +1326,6 @@ public: uint64_t limit_get(Limit p_limit) const; - //methods below not exposed, used by RenderingDeviceRD - void prepare_screen_for_drawing(); - void swap_buffers(); uint32_t get_frame_delay() const; @@ -1352,6 +1351,7 @@ public: String get_device_vendor_name() const; String get_device_name() const; DeviceType get_device_type() const; + String get_device_api_name() const; String get_device_api_version() const; String get_device_pipeline_cache_uuid() const; diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index a8936f8cca..688a6441a7 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -820,6 +820,17 @@ public: SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, }; + enum SubgroupOperations { + SUBGROUP_BASIC_BIT = 1, + SUBGROUP_VOTE_BIT = 2, + SUBGROUP_ARITHMETIC_BIT = 4, + SUBGROUP_BALLOT_BIT = 8, + SUBGROUP_SHUFFLE_BIT = 16, + SUBGROUP_SHUFFLE_RELATIVE_BIT = 32, + SUBGROUP_CLUSTERED_BIT = 64, + SUBGROUP_QUAD_BIT = 128, + }; + //////////////////////////////////////////// // PROTECTED STUFF // Not exposed by RenderingDevice, but shared diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 663222e69d..753b3668bc 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -49,12 +49,11 @@ #include "core/object/object.h" #include "core/variant/type_info.h" #include "servers/display_server.h" +#include "servers/rendering/rendering_context_driver.h" #include "servers/rendering/rendering_device_commons.h" #include <algorithm> -class ApiContextRD; - // This may one day be used in Godot for interoperability between C arrays, Vector and LocalVector. // (See https://github.com/godotengine/godot-proposals/issues/5144.) template <class T> @@ -127,20 +126,22 @@ public: id(p_id) {} }; -#define DEFINE_ID(m_name) \ - struct m_name##ID : public ID { \ - _ALWAYS_INLINE_ operator bool() const { return id != 0; } \ - _ALWAYS_INLINE_ m_name##ID &operator=(m_name##ID p_other) { \ - id = p_other.id; \ - return *this; \ - } \ - _ALWAYS_INLINE_ bool operator<(const m_name##ID &p_other) const { return id < p_other.id; } \ - _ALWAYS_INLINE_ m_name##ID(const m_name##ID &p_other) : ID(p_other.id) {} \ - _ALWAYS_INLINE_ explicit m_name##ID(uint64_t p_int) : ID(p_int) {} \ - _ALWAYS_INLINE_ explicit m_name##ID(void *p_ptr) : ID((size_t)p_ptr) {} \ - _ALWAYS_INLINE_ m_name##ID() = default; \ - }; \ - /* Ensure type-punnable to pointer. Makes some things easier.*/ \ +#define DEFINE_ID(m_name) \ + struct m_name##ID : public ID { \ + _ALWAYS_INLINE_ operator bool() const { return id != 0; } \ + _ALWAYS_INLINE_ m_name##ID &operator=(m_name##ID p_other) { \ + id = p_other.id; \ + return *this; \ + } \ + _ALWAYS_INLINE_ bool operator<(const m_name##ID &p_other) const { return id < p_other.id; } \ + _ALWAYS_INLINE_ bool operator==(const m_name##ID &p_other) const { return id == p_other.id; } \ + _ALWAYS_INLINE_ bool operator!=(const m_name##ID &p_other) const { return id != p_other.id; } \ + _ALWAYS_INLINE_ m_name##ID(const m_name##ID &p_other) : ID(p_other.id) {} \ + _ALWAYS_INLINE_ explicit m_name##ID(uint64_t p_int) : ID(p_int) {} \ + _ALWAYS_INLINE_ explicit m_name##ID(void *p_ptr) : ID((size_t)p_ptr) {} \ + _ALWAYS_INLINE_ m_name##ID() = default; \ + }; \ + /* Ensure type-punnable to pointer. Makes some things easier.*/ \ static_assert(sizeof(m_name##ID) == sizeof(void *)); // Id types declared before anything else to prevent cyclic dependencies between the different concerns. @@ -148,14 +149,26 @@ public: DEFINE_ID(Texture); DEFINE_ID(Sampler); DEFINE_ID(VertexFormat); + DEFINE_ID(CommandQueue); + DEFINE_ID(CommandQueueFamily); DEFINE_ID(CommandPool); DEFINE_ID(CommandBuffer); + DEFINE_ID(SwapChain); DEFINE_ID(Framebuffer); DEFINE_ID(Shader); DEFINE_ID(UniformSet); DEFINE_ID(Pipeline); DEFINE_ID(RenderPass); DEFINE_ID(QueryPool); + DEFINE_ID(Fence); + DEFINE_ID(Semaphore); + +public: + /*****************/ + /**** GENERIC ****/ + /*****************/ + + virtual Error initialize(uint32_t p_device_index, uint32_t p_frame_count) = 0; /****************/ /**** MEMORY ****/ @@ -361,10 +374,44 @@ public: VectorView<BufferBarrier> p_buffer_barriers, VectorView<TextureBarrier> p_texture_barriers) = 0; + /****************/ + /**** FENCES ****/ + /****************/ + + virtual FenceID fence_create() = 0; + virtual Error fence_wait(FenceID p_fence) = 0; + virtual void fence_free(FenceID p_fence) = 0; + + /********************/ + /**** SEMAPHORES ****/ + /********************/ + + virtual SemaphoreID semaphore_create() = 0; + virtual void semaphore_free(SemaphoreID p_semaphore) = 0; + /*************************/ /**** COMMAND BUFFERS ****/ /*************************/ + // ----- QUEUE FAMILY ----- + + enum CommandQueueFamilyBits { + COMMAND_QUEUE_FAMILY_GRAPHICS_BIT = 0x1, + COMMAND_QUEUE_FAMILY_COMPUTE_BIT = 0x2, + COMMAND_QUEUE_FAMILY_TRANSFER_BIT = 0x4 + }; + + // The requested command queue family must support all specified bits or it'll fail to return a valid family otherwise. If a valid surface is specified, the queue must support presenting to it. + // It is valid to specify no bits and a valid surface: in this case, the dedicated presentation queue family will be the preferred option. + virtual CommandQueueFamilyID command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface = 0) = 0; + + // ----- QUEUE ----- + + virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) = 0; + virtual Error command_queue_execute(CommandQueueID p_cmd_queue, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_wait_semaphores, VectorView<SemaphoreID> p_signal_semaphores, FenceID p_signal_fence) = 0; + virtual Error command_queue_present(CommandQueueID p_cmd_queue, VectorView<SwapChainID> p_swap_chains, VectorView<SemaphoreID> p_wait_semaphores) = 0; + virtual void command_queue_free(CommandQueueID p_cmd_queue) = 0; + // ----- POOL ----- enum CommandBufferType { @@ -372,17 +419,39 @@ public: COMMAND_BUFFER_TYPE_SECONDARY, }; - virtual CommandPoolID command_pool_create(CommandBufferType p_cmd_buffer_type) = 0; + virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) = 0; virtual void command_pool_free(CommandPoolID p_cmd_pool) = 0; // ----- BUFFER ----- - virtual CommandBufferID command_buffer_create(CommandBufferType p_cmd_buffer_type, CommandPoolID p_cmd_pool) = 0; + virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) = 0; virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) = 0; virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) = 0; virtual void command_buffer_end(CommandBufferID p_cmd_buffer) = 0; virtual void command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) = 0; + /********************/ + /**** SWAP CHAIN ****/ + /********************/ + + // The swap chain won't be valid for use until it is resized at least once. + virtual SwapChainID swap_chain_create(RenderingContextDriver::SurfaceID p_surface) = 0; + + // The swap chain must not be in use when a resize is requested. Wait until all rendering associated to the swap chain is finished before resizing it. + virtual Error swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) = 0; + + // Acquire the framebuffer that can be used for drawing. This must be called only once every time a new frame will be rendered. + virtual FramebufferID swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) = 0; + + // Retrieve the render pass that can be used to draw on the swap chain's framebuffers. + virtual RenderPassID swap_chain_get_render_pass(SwapChainID p_swap_chain) = 0; + + // Retrieve the format used by the swap chain's framebuffers. + virtual DataFormat swap_chain_get_format(SwapChainID p_swap_chain) = 0; + + // Wait until all rendering associated to the swap chain is finished before deleting it. + virtual void swap_chain_free(SwapChainID p_swap_chain) = 0; + /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -633,17 +702,11 @@ public: virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) = 0; virtual void command_end_label(CommandBufferID p_cmd_buffer) = 0; - /****************/ - /**** SCREEN ****/ - /****************/ - - virtual DataFormat screen_get_format() = 0; - /********************/ /**** SUBMISSION ****/ /********************/ - virtual void begin_segment(CommandBufferID p_cmd_buffer, uint32_t p_frame_index, uint32_t p_frames_drawn) = 0; + virtual void begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) = 0; virtual void end_segment() = 0; /**************/ @@ -682,6 +745,19 @@ public: SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH, }; + enum DeviceFamily { + DEVICE_UNKNOWN, + DEVICE_OPENGL, + DEVICE_VULKAN, + DEVICE_DIRECTX, + }; + + struct Capabilities { + DeviceFamily device_family = DEVICE_UNKNOWN; + uint32_t version_major = 1; + uint32_t version_minor = 0; + }; + virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) = 0; virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) = 0; virtual uint64_t get_total_memory_used() = 0; @@ -689,6 +765,10 @@ public: virtual uint64_t api_trait_get(ApiTrait p_trait); virtual bool has_feature(Features p_feature) = 0; virtual const MultiviewCapabilities &get_multiview_capabilities() = 0; + virtual String get_api_name() const = 0; + virtual String get_api_version() const = 0; + virtual String get_pipeline_cache_uuid() const = 0; + virtual const Capabilities &get_capabilities() const = 0; /******************/ diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 904b439e65..83fb2d1918 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -40,15 +40,6 @@ RenderingDeviceGraph::RenderingDeviceGraph() { } RenderingDeviceGraph::~RenderingDeviceGraph() { - _wait_for_secondary_command_buffer_tasks(); - - for (Frame &f : frames) { - for (SecondaryCommandBuffer &secondary : f.secondary_command_buffers) { - if (secondary.command_pool.id != 0) { - driver->command_pool_free(secondary.command_pool); - } - } - } } bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { @@ -1246,7 +1237,7 @@ void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data } } -void RenderingDeviceGraph::initialize(RDD *p_driver, uint32_t p_frame_count, uint32_t p_secondary_command_buffers_per_frame) { +void RenderingDeviceGraph::initialize(RDD *p_driver, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame) { driver = p_driver; frames.resize(p_frame_count); @@ -1255,8 +1246,8 @@ void RenderingDeviceGraph::initialize(RDD *p_driver, uint32_t p_frame_count, uin for (uint32_t j = 0; j < p_secondary_command_buffers_per_frame; j++) { SecondaryCommandBuffer &secondary = frames[i].secondary_command_buffers[j]; - secondary.command_pool = driver->command_pool_create(RDD::COMMAND_BUFFER_TYPE_SECONDARY); - secondary.command_buffer = driver->command_buffer_create(RDD::COMMAND_BUFFER_TYPE_SECONDARY, secondary.command_pool); + secondary.command_pool = driver->command_pool_create(p_secondary_command_queue_family, RDD::COMMAND_BUFFER_TYPE_SECONDARY); + secondary.command_buffer = driver->command_buffer_create(secondary.command_pool); secondary.task = WorkerThreadPool::INVALID_TASK_ID; } } @@ -1264,6 +1255,20 @@ void RenderingDeviceGraph::initialize(RDD *p_driver, uint32_t p_frame_count, uin driver_honors_barriers = driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS); } +void RenderingDeviceGraph::finalize() { + _wait_for_secondary_command_buffer_tasks(); + + for (Frame &f : frames) { + for (SecondaryCommandBuffer &secondary : f.secondary_command_buffers) { + if (secondary.command_pool.id != 0) { + driver->command_pool_free(secondary.command_pool); + } + } + } + + frames.clear(); +} + void RenderingDeviceGraph::begin() { command_data.clear(); command_data_offsets.clear(); diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 0d48f0491b..995fdb27d1 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -619,7 +619,8 @@ private: public: RenderingDeviceGraph(); ~RenderingDeviceGraph(); - void initialize(RDD *p_driver, uint32_t p_frame_count, uint32_t p_secondary_command_buffers_per_frame); + void initialize(RDD *p_driver, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame); + void finalize(); void begin(); void add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size); void add_buffer_copy(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, RDD::BufferCopyRegion p_region); |