diff options
Diffstat (limited to 'drivers')
68 files changed, 11588 insertions, 1075 deletions
diff --git a/drivers/SCsub b/drivers/SCsub index e77b96cc87..44d29fb7c1 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -3,6 +3,7 @@ Import("env") env.drivers_sources = [] +supported = env.get("supported", []) # OS drivers SConscript("unix/SCsub") @@ -17,6 +18,9 @@ if env["platform"] == "windows": if not env.msvc: SConscript("backtrace/SCsub") if env["xaudio2"]: + if "xaudio2" not in supported: + print("Target platform '{}' does not support the XAudio2 audio driver. Aborting.".format(env["platform"])) + Exit(255) SConscript("xaudio2/SCsub") # Midi drivers @@ -28,11 +32,19 @@ SConscript("winmidi/SCsub") if env["vulkan"]: SConscript("vulkan/SCsub") if env["d3d12"]: + if "d3d12" not in supported: + print("Target platform '{}' does not support the D3D12 rendering driver. Aborting.".format(env["platform"])) + Exit(255) SConscript("d3d12/SCsub") if env["opengl3"]: SConscript("gl_context/SCsub") SConscript("gles3/SCsub") SConscript("egl/SCsub") +if env["metal"]: + if "metal" not in supported: + print("Target platform '{}' does not support the Metal rendering driver. Aborting.".format(env["platform"])) + Exit(255) + SConscript("metal/SCsub") # Core dependencies SConscript("png/SCsub") diff --git a/drivers/alsamidi/midi_driver_alsamidi.cpp b/drivers/alsamidi/midi_driver_alsamidi.cpp index b87be69cc5..445fc4a993 100644 --- a/drivers/alsamidi/midi_driver_alsamidi.cpp +++ b/drivers/alsamidi/midi_driver_alsamidi.cpp @@ -37,137 +37,36 @@ #include <errno.h> -MIDIDriverALSAMidi::MessageCategory MIDIDriverALSAMidi::msg_category(uint8_t msg_part) { - if (msg_part >= 0xf8) { - return MessageCategory::RealTime; - } else if (msg_part >= 0xf0) { - // System Exclusive begin/end are specified as System Common Category messages, - // but we separate them here and give them their own categories as their - // behavior is significantly different. - if (msg_part == 0xf0) { - return MessageCategory::SysExBegin; - } else if (msg_part == 0xf7) { - return MessageCategory::SysExEnd; - } - return MessageCategory::SystemCommon; - } else if (msg_part >= 0x80) { - return MessageCategory::Voice; - } - return MessageCategory::Data; -} - -size_t MIDIDriverALSAMidi::msg_expected_data(uint8_t status_byte) { - if (msg_category(status_byte) == MessageCategory::Voice) { - // Voice messages have a channel number in the status byte, mask it out. - status_byte &= 0xf0; - } - - switch (status_byte) { - case 0x80: // Note Off - case 0x90: // Note On - case 0xA0: // Polyphonic Key Pressure (Aftertouch) - case 0xB0: // Control Change (CC) - case 0xE0: // Pitch Bend Change - case 0xF2: // Song Position Pointer - return 2; - - case 0xC0: // Program Change - case 0xD0: // Channel Pressure (Aftertouch) - case 0xF1: // MIDI Time Code Quarter Frame - case 0xF3: // Song Select - return 1; - } +MIDIDriverALSAMidi::InputConnection::InputConnection(int p_device_index, + snd_rawmidi_t *p_rawmidi) : + parser(p_device_index), rawmidi_ptr(p_rawmidi) {} - return 0; -} - -void MIDIDriverALSAMidi::InputConnection::parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, - uint64_t timestamp, int device_index) { - switch (msg_category(byte)) { - case MessageCategory::RealTime: - // Real-Time messages are single byte messages that can - // occur at any point. - // We pass them straight through. - driver.receive_input_packet(device_index, timestamp, &byte, 1); - break; - - case MessageCategory::Data: - // We don't currently forward System Exclusive messages so skip their data. - // Collect any expected data for other message types. - if (!skipping_sys_ex && expected_data > received_data) { - buffer[received_data + 1] = byte; - received_data++; - - // Forward a complete message and reset relevant state. - if (received_data == expected_data) { - driver.receive_input_packet(device_index, timestamp, buffer, received_data + 1); - received_data = 0; - - if (msg_category(buffer[0]) != MessageCategory::Voice) { - // Voice Category messages can be sent with "running status". - // This means they don't resend the status byte until it changes. - // For other categories, we reset expected data, to require a new status byte. - expected_data = 0; - } - } - } - break; - - case MessageCategory::SysExBegin: - buffer[0] = byte; - skipping_sys_ex = true; - break; - - case MessageCategory::SysExEnd: - expected_data = 0; - skipping_sys_ex = false; - break; - - case MessageCategory::Voice: - case MessageCategory::SystemCommon: - buffer[0] = byte; - received_data = 0; - expected_data = msg_expected_data(byte); - skipping_sys_ex = false; - if (expected_data == 0) { - driver.receive_input_packet(device_index, timestamp, &byte, 1); - } - break; - } -} - -int MIDIDriverALSAMidi::InputConnection::read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index) { - int ret; +void MIDIDriverALSAMidi::InputConnection::read() { + int read_count; do { - uint8_t byte = 0; - ret = snd_rawmidi_read(rawmidi_ptr, &byte, 1); + uint8_t buffer[32]; + read_count = snd_rawmidi_read(rawmidi_ptr, buffer, sizeof(buffer)); - if (ret < 0) { - if (ret != -EAGAIN) { - ERR_PRINT("snd_rawmidi_read error: " + String(snd_strerror(ret))); + if (read_count < 0) { + if (read_count != -EAGAIN) { + ERR_PRINT("snd_rawmidi_read error: " + String(snd_strerror(read_count))); } } else { - parse_byte(byte, driver, timestamp, device_index); + for (int i = 0; i < read_count; i++) { + parser.parse_fragment(buffer[i]); + } } - } while (ret > 0); - - return ret; + } while (read_count > 0); } void MIDIDriverALSAMidi::thread_func(void *p_udata) { MIDIDriverALSAMidi *md = static_cast<MIDIDriverALSAMidi *>(p_udata); - uint64_t timestamp = 0; while (!md->exit_thread.is_set()) { md->lock(); - - InputConnection *connections = md->connected_inputs.ptrw(); - size_t connection_count = md->connected_inputs.size(); - - for (size_t i = 0; i < connection_count; i++) { - connections[i].read_in(*md, timestamp, (int)i); + for (InputConnection &conn : md->connected_inputs) { + conn.read(); } - md->unlock(); OS::get_singleton()->delay_usec(1000); @@ -181,15 +80,25 @@ Error MIDIDriverALSAMidi::open() { return ERR_CANT_OPEN; } - int i = 0; - for (void **n = hints; *n != nullptr; n++) { - char *name = snd_device_name_get_hint(*n, "NAME"); + lock(); + int device_index = 0; + for (void **h = hints; *h != nullptr; h++) { + char *name = snd_device_name_get_hint(*h, "NAME"); if (name != nullptr) { snd_rawmidi_t *midi_in; int ret = snd_rawmidi_open(&midi_in, nullptr, name, SND_RAWMIDI_NONBLOCK); if (ret >= 0) { - connected_inputs.insert(i++, InputConnection(midi_in)); + // Get display name. + snd_rawmidi_info_t *info; + snd_rawmidi_info_malloc(&info); + snd_rawmidi_info(midi_in, info); + connected_input_names.push_back(snd_rawmidi_info_get_name(info)); + snd_rawmidi_info_free(info); + + connected_inputs.push_back(InputConnection(device_index, midi_in)); + // Only increment device_index for successfully connected devices. + device_index++; } } @@ -198,6 +107,7 @@ Error MIDIDriverALSAMidi::open() { } } snd_device_name_free_hint(hints); + unlock(); exit_thread.clear(); thread.start(MIDIDriverALSAMidi::thread_func, this); @@ -211,11 +121,12 @@ void MIDIDriverALSAMidi::close() { thread.wait_to_finish(); } - for (int i = 0; i < connected_inputs.size(); i++) { - snd_rawmidi_t *midi_in = connected_inputs[i].rawmidi_ptr; - snd_rawmidi_close(midi_in); + for (const InputConnection &conn : connected_inputs) { + snd_rawmidi_close(conn.rawmidi_ptr); } + connected_inputs.clear(); + connected_input_names.clear(); } void MIDIDriverALSAMidi::lock() const { @@ -226,24 +137,6 @@ void MIDIDriverALSAMidi::unlock() const { mutex.unlock(); } -PackedStringArray MIDIDriverALSAMidi::get_connected_inputs() { - PackedStringArray list; - - lock(); - for (int i = 0; i < connected_inputs.size(); i++) { - snd_rawmidi_t *midi_in = connected_inputs[i].rawmidi_ptr; - snd_rawmidi_info_t *info; - - snd_rawmidi_info_malloc(&info); - snd_rawmidi_info(midi_in, info); - list.push_back(snd_rawmidi_info_get_name(info)); - snd_rawmidi_info_free(info); - } - unlock(); - - return list; -} - MIDIDriverALSAMidi::MIDIDriverALSAMidi() { exit_thread.clear(); } diff --git a/drivers/alsamidi/midi_driver_alsamidi.h b/drivers/alsamidi/midi_driver_alsamidi.h index 95ded3b1c9..45811bec47 100644 --- a/drivers/alsamidi/midi_driver_alsamidi.h +++ b/drivers/alsamidi/midi_driver_alsamidi.h @@ -51,24 +51,15 @@ class MIDIDriverALSAMidi : public MIDIDriver { Thread thread; Mutex mutex; - class InputConnection { - public: + struct InputConnection { InputConnection() = default; - InputConnection(snd_rawmidi_t *midi_in) : - rawmidi_ptr{ midi_in } {} - - // Read in and parse available data, forwarding any complete messages through the driver. - int read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index); + InputConnection(int p_device_index, snd_rawmidi_t *p_rawmidi); + Parser parser; snd_rawmidi_t *rawmidi_ptr = nullptr; - private: - static const size_t MSG_BUFFER_SIZE = 3; - uint8_t buffer[MSG_BUFFER_SIZE] = { 0 }; - size_t expected_data = 0; - size_t received_data = 0; - bool skipping_sys_ex = false; - void parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index); + // Read in and parse available data, forwarding complete events to Input. + void read(); }; Vector<InputConnection> connected_inputs; @@ -77,30 +68,12 @@ class MIDIDriverALSAMidi : public MIDIDriver { static void thread_func(void *p_udata); - enum class MessageCategory { - Data, - Voice, - SysExBegin, - SystemCommon, // excluding System Exclusive Begin/End - SysExEnd, - RealTime, - }; - - // If the passed byte is a status byte, return the associated message category, - // else return MessageCategory::Data. - static MessageCategory msg_category(uint8_t msg_part); - - // Return the number of data bytes expected for the provided status byte. - static size_t msg_expected_data(uint8_t status_byte); - void lock() const; void unlock() const; public: - virtual Error open(); - virtual void close(); - - virtual PackedStringArray get_connected_inputs(); + virtual Error open() override; + virtual void close() override; MIDIDriverALSAMidi(); virtual ~MIDIDriverALSAMidi(); diff --git a/drivers/coreaudio/audio_driver_coreaudio.cpp b/drivers/coreaudio/audio_driver_coreaudio.cpp index 98a8d4b2ef..fd0adb1fd1 100644 --- a/drivers/coreaudio/audio_driver_coreaudio.cpp +++ b/drivers/coreaudio/audio_driver_coreaudio.cpp @@ -66,6 +66,11 @@ OSStatus AudioDriverCoreAudio::output_device_address_cb(AudioObjectID inObjectID return noErr; } + +// Switch to kAudioObjectPropertyElementMain everywhere to remove deprecated warnings. +#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 120000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 150000) +#define kAudioObjectPropertyElementMain kAudioObjectPropertyElementMaster +#endif #endif Error AudioDriverCoreAudio::init() { @@ -89,7 +94,7 @@ Error AudioDriverCoreAudio::init() { AudioObjectPropertyAddress prop; prop.mSelector = kAudioHardwarePropertyDefaultOutputDevice; prop.mScope = kAudioObjectPropertyScopeGlobal; - prop.mElement = kAudioObjectPropertyElementMaster; + prop.mElement = kAudioObjectPropertyElementMain; result = AudioObjectAddPropertyListener(kAudioObjectSystemObject, &prop, &output_device_address_cb, this); ERR_FAIL_COND_V(result != noErr, FAILED); @@ -319,7 +324,7 @@ void AudioDriverCoreAudio::finish() { AudioObjectPropertyAddress prop; prop.mSelector = kAudioHardwarePropertyDefaultOutputDevice; prop.mScope = kAudioObjectPropertyScopeGlobal; - prop.mElement = kAudioObjectPropertyElementMaster; + prop.mElement = kAudioObjectPropertyElementMain; result = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, &prop, &output_device_address_cb, this); if (result != noErr) { @@ -358,7 +363,7 @@ Error AudioDriverCoreAudio::init_input_device() { AudioObjectPropertyAddress prop; prop.mSelector = kAudioHardwarePropertyDefaultInputDevice; prop.mScope = kAudioObjectPropertyScopeGlobal; - prop.mElement = kAudioObjectPropertyElementMaster; + prop.mElement = kAudioObjectPropertyElementMain; result = AudioObjectAddPropertyListener(kAudioObjectSystemObject, &prop, &input_device_address_cb, this); ERR_FAIL_COND_V(result != noErr, FAILED); @@ -375,7 +380,7 @@ Error AudioDriverCoreAudio::init_input_device() { #ifdef MACOS_ENABLED AudioDeviceID deviceId; size = sizeof(AudioDeviceID); - AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster }; + AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMain }; result = AudioObjectGetPropertyData(kAudioObjectSystemObject, &property, 0, nullptr, &size, &deviceId); ERR_FAIL_COND_V(result != noErr, FAILED); @@ -453,7 +458,7 @@ void AudioDriverCoreAudio::finish_input_device() { AudioObjectPropertyAddress prop; prop.mSelector = kAudioHardwarePropertyDefaultInputDevice; prop.mScope = kAudioObjectPropertyScopeGlobal; - prop.mElement = kAudioObjectPropertyElementMaster; + prop.mElement = kAudioObjectPropertyElementMain; result = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, &prop, &input_device_address_cb, this); if (result != noErr) { @@ -504,7 +509,7 @@ PackedStringArray AudioDriverCoreAudio::_get_device_list(bool input) { prop.mSelector = kAudioHardwarePropertyDevices; prop.mScope = kAudioObjectPropertyScopeGlobal; - prop.mElement = kAudioObjectPropertyElementMaster; + prop.mElement = kAudioObjectPropertyElementMain; UInt32 size = 0; AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &prop, 0, nullptr, &size); @@ -563,7 +568,7 @@ void AudioDriverCoreAudio::_set_device(const String &output_device, bool input) prop.mSelector = kAudioHardwarePropertyDevices; prop.mScope = kAudioObjectPropertyScopeGlobal; - prop.mElement = kAudioObjectPropertyElementMaster; + prop.mElement = kAudioObjectPropertyElementMain; UInt32 size = 0; AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &prop, 0, nullptr, &size); @@ -619,7 +624,7 @@ void AudioDriverCoreAudio::_set_device(const String &output_device, bool input) // If we haven't found the desired device get the system default one UInt32 size = sizeof(AudioDeviceID); UInt32 elem = input ? kAudioHardwarePropertyDefaultInputDevice : kAudioHardwarePropertyDefaultOutputDevice; - AudioObjectPropertyAddress property = { elem, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster }; + AudioObjectPropertyAddress property = { elem, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMain }; OSStatus result = AudioObjectGetPropertyData(kAudioObjectSystemObject, &property, 0, nullptr, &size, &deviceId); ERR_FAIL_COND(result != noErr); diff --git a/drivers/coremidi/midi_driver_coremidi.cpp b/drivers/coremidi/midi_driver_coremidi.cpp index 87fc7612f7..f6cc59471e 100644 --- a/drivers/coremidi/midi_driver_coremidi.cpp +++ b/drivers/coremidi/midi_driver_coremidi.cpp @@ -37,16 +37,30 @@ #import <CoreAudio/HostTime.h> #import <CoreServices/CoreServices.h> +Mutex MIDIDriverCoreMidi::mutex; +bool MIDIDriverCoreMidi::core_midi_closed = false; + +MIDIDriverCoreMidi::InputConnection::InputConnection(int p_device_index, MIDIEndpointRef p_source) : + parser(p_device_index), source(p_source) {} + void MIDIDriverCoreMidi::read(const MIDIPacketList *packet_list, void *read_proc_ref_con, void *src_conn_ref_con) { - MIDIPacket *packet = const_cast<MIDIPacket *>(packet_list->packet); - int *device_index = static_cast<int *>(src_conn_ref_con); - for (UInt32 i = 0; i < packet_list->numPackets; i++) { - receive_input_packet(*device_index, packet->timeStamp, packet->data, packet->length); - packet = MIDIPacketNext(packet); + MutexLock lock(mutex); + if (!core_midi_closed) { + InputConnection *source = static_cast<InputConnection *>(src_conn_ref_con); + const MIDIPacket *packet = packet_list->packet; + for (UInt32 packet_index = 0; packet_index < packet_list->numPackets; packet_index++) { + for (UInt16 data_index = 0; data_index < packet->length; data_index++) { + source->parser.parse_fragment(packet->data[data_index]); + } + packet = MIDIPacketNext(packet); + } } } Error MIDIDriverCoreMidi::open() { + ERR_FAIL_COND_V_MSG(client || core_midi_closed, FAILED, + "MIDIDriverCoreMidi cannot be reopened."); + CFStringRef name = CFStringCreateWithCString(nullptr, "Godot", kCFStringEncodingASCII); OSStatus result = MIDIClientCreate(name, nullptr, nullptr, &client); CFRelease(name); @@ -61,12 +75,27 @@ Error MIDIDriverCoreMidi::open() { return ERR_CANT_OPEN; } - int sources = MIDIGetNumberOfSources(); - for (int i = 0; i < sources; i++) { + int source_count = MIDIGetNumberOfSources(); + int connection_index = 0; + for (int i = 0; i < source_count; i++) { MIDIEndpointRef source = MIDIGetSource(i); if (source) { - MIDIPortConnectSource(port_in, source, static_cast<void *>(&i)); - connected_sources.insert(i, source); + InputConnection *conn = memnew(InputConnection(connection_index, source)); + const OSStatus res = MIDIPortConnectSource(port_in, source, static_cast<void *>(conn)); + if (res != noErr) { + memdelete(conn); + } else { + connected_sources.push_back(conn); + + CFStringRef nameRef = nullptr; + char name[256]; + MIDIObjectGetStringProperty(source, kMIDIPropertyDisplayName, &nameRef); + CFStringGetCString(nameRef, name, sizeof(name), kCFStringEncodingUTF8); + CFRelease(nameRef); + connected_input_names.push_back(name); + + connection_index++; // Contiguous index for successfully connected inputs. + } } } @@ -74,11 +103,17 @@ Error MIDIDriverCoreMidi::open() { } void MIDIDriverCoreMidi::close() { - for (int i = 0; i < connected_sources.size(); i++) { - MIDIEndpointRef source = connected_sources[i]; - MIDIPortDisconnectSource(port_in, source); + mutex.lock(); + core_midi_closed = true; + mutex.unlock(); + + for (InputConnection *conn : connected_sources) { + MIDIPortDisconnectSource(port_in, conn->source); + memdelete(conn); } + connected_sources.clear(); + connected_input_names.clear(); if (port_in != 0) { MIDIPortDispose(port_in); @@ -91,26 +126,6 @@ void MIDIDriverCoreMidi::close() { } } -PackedStringArray MIDIDriverCoreMidi::get_connected_inputs() { - PackedStringArray list; - - for (int i = 0; i < connected_sources.size(); i++) { - MIDIEndpointRef source = connected_sources[i]; - CFStringRef ref = nullptr; - char name[256]; - - MIDIObjectGetStringProperty(source, kMIDIPropertyDisplayName, &ref); - CFStringGetCString(ref, name, sizeof(name), kCFStringEncodingUTF8); - CFRelease(ref); - - list.push_back(name); - } - - return list; -} - -MIDIDriverCoreMidi::MIDIDriverCoreMidi() {} - MIDIDriverCoreMidi::~MIDIDriverCoreMidi() { close(); } diff --git a/drivers/coremidi/midi_driver_coremidi.h b/drivers/coremidi/midi_driver_coremidi.h index 38fb515664..02cbc6234c 100644 --- a/drivers/coremidi/midi_driver_coremidi.h +++ b/drivers/coremidi/midi_driver_coremidi.h @@ -34,6 +34,7 @@ #ifdef COREMIDI_ENABLED #include "core/os/midi_driver.h" +#include "core/os/mutex.h" #include "core/templates/vector.h" #import <CoreMIDI/CoreMIDI.h> @@ -43,17 +44,25 @@ class MIDIDriverCoreMidi : public MIDIDriver { MIDIClientRef client = 0; MIDIPortRef port_in; - Vector<MIDIEndpointRef> connected_sources; + struct InputConnection { + InputConnection() = default; + InputConnection(int p_device_index, MIDIEndpointRef p_source); + Parser parser; + MIDIEndpointRef source; + }; + + Vector<InputConnection *> connected_sources; + + static Mutex mutex; + static bool core_midi_closed; static void read(const MIDIPacketList *packet_list, void *read_proc_ref_con, void *src_conn_ref_con); public: - virtual Error open(); - virtual void close(); - - PackedStringArray get_connected_inputs(); + virtual Error open() override; + virtual void close() override; - MIDIDriverCoreMidi(); + MIDIDriverCoreMidi() = default; virtual ~MIDIDriverCoreMidi(); }; diff --git a/drivers/d3d12/SCsub b/drivers/d3d12/SCsub index 35227ebe08..482a549189 100644 --- a/drivers/d3d12/SCsub +++ b/drivers/d3d12/SCsub @@ -136,7 +136,6 @@ if env.msvc: ] else: extra_defines += [ - ("__REQUIRED_RPCNDR_H_VERSION__", 475), "HAVE_STRUCT_TIMESPEC", ] diff --git a/drivers/d3d12/d3d12ma.cpp b/drivers/d3d12/d3d12ma.cpp index 51171141de..b7c9eb7ec0 100644 --- a/drivers/d3d12/d3d12ma.cpp +++ b/drivers/d3d12/d3d12ma.cpp @@ -43,6 +43,18 @@ #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wnonnull-compare" #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" +#pragma clang diagnostic ignored "-Wtautological-undefined-compare" +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-but-set-variable" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wunused-private-field" +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" #endif #if defined(_MSC_VER) diff --git a/drivers/d3d12/dxil_hash.cpp b/drivers/d3d12/dxil_hash.cpp new file mode 100644 index 0000000000..f94a4a30df --- /dev/null +++ b/drivers/d3d12/dxil_hash.cpp @@ -0,0 +1,209 @@ +/**************************************************************************/ +/* dxil_hash.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +// Based on the patched public domain implementation released by Microsoft here: +// https://github.com/microsoft/hlsl-specs/blob/main/proposals/infra/INF-0004-validator-hashing.md + +#include "dxil_hash.h" + +#include <memory.h> + +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + +static const BYTE padding[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static void FF(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) { + a += ((b & c) | (~b & d)) + x + ac; + a = ((a << s) | (a >> (32 - s))) + b; +} + +static void GG(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) { + a += ((b & d) | (c & ~d)) + x + ac; + a = ((a << s) | (a >> (32 - s))) + b; +} + +static void HH(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) { + a += (b ^ c ^ d) + x + ac; + a = ((a << s) | (a >> (32 - s))) + b; +} + +static void II(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) { + a += (c ^ (b | ~d)) + x + ac; + a = ((a << s) | (a >> (32 - s))) + b; +} + +void compute_dxil_hash(const BYTE *pData, UINT byteCount, BYTE *pOutHash) { + UINT leftOver = byteCount & 0x3f; + UINT padAmount; + bool bTwoRowsPadding = false; + if (leftOver < 56) { + padAmount = 56 - leftOver; + } else { + padAmount = 120 - leftOver; + bTwoRowsPadding = true; + } + UINT padAmountPlusSize = padAmount + 8; + UINT state[4] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 }; + UINT N = (byteCount + padAmountPlusSize) >> 6; + UINT offset = 0; + UINT NextEndState = bTwoRowsPadding ? N - 2 : N - 1; + const BYTE *pCurrData = pData; + for (UINT i = 0; i < N; i++, offset += 64, pCurrData += 64) { + UINT x[16]; + const UINT *pX; + if (i == NextEndState) { + if (!bTwoRowsPadding && i == N - 1) { + UINT remainder = byteCount - offset; + x[0] = byteCount << 3; + memcpy((BYTE *)x + 4, pCurrData, remainder); + memcpy((BYTE *)x + 4 + remainder, padding, padAmount); + x[15] = 1 | (byteCount << 1); + } else if (bTwoRowsPadding) { + if (i == N - 2) { + UINT remainder = byteCount - offset; + memcpy(x, pCurrData, remainder); + memcpy((BYTE *)x + remainder, padding, padAmount - 56); + NextEndState = N - 1; + } else if (i == N - 1) { + x[0] = byteCount << 3; + memcpy((BYTE *)x + 4, padding + padAmount - 56, 56); + x[15] = 1 | (byteCount << 1); + } + } + pX = x; + } else { + pX = (const UINT *)pCurrData; + } + + UINT a = state[0]; + UINT b = state[1]; + UINT c = state[2]; + UINT d = state[3]; + + /* Round 1 */ + FF(a, b, c, d, pX[0], S11, 0xd76aa478); /* 1 */ + FF(d, a, b, c, pX[1], S12, 0xe8c7b756); /* 2 */ + FF(c, d, a, b, pX[2], S13, 0x242070db); /* 3 */ + FF(b, c, d, a, pX[3], S14, 0xc1bdceee); /* 4 */ + FF(a, b, c, d, pX[4], S11, 0xf57c0faf); /* 5 */ + FF(d, a, b, c, pX[5], S12, 0x4787c62a); /* 6 */ + FF(c, d, a, b, pX[6], S13, 0xa8304613); /* 7 */ + FF(b, c, d, a, pX[7], S14, 0xfd469501); /* 8 */ + FF(a, b, c, d, pX[8], S11, 0x698098d8); /* 9 */ + FF(d, a, b, c, pX[9], S12, 0x8b44f7af); /* 10 */ + FF(c, d, a, b, pX[10], S13, 0xffff5bb1); /* 11 */ + FF(b, c, d, a, pX[11], S14, 0x895cd7be); /* 12 */ + FF(a, b, c, d, pX[12], S11, 0x6b901122); /* 13 */ + FF(d, a, b, c, pX[13], S12, 0xfd987193); /* 14 */ + FF(c, d, a, b, pX[14], S13, 0xa679438e); /* 15 */ + FF(b, c, d, a, pX[15], S14, 0x49b40821); /* 16 */ + + /* Round 2 */ + GG(a, b, c, d, pX[1], S21, 0xf61e2562); /* 17 */ + GG(d, a, b, c, pX[6], S22, 0xc040b340); /* 18 */ + GG(c, d, a, b, pX[11], S23, 0x265e5a51); /* 19 */ + GG(b, c, d, a, pX[0], S24, 0xe9b6c7aa); /* 20 */ + GG(a, b, c, d, pX[5], S21, 0xd62f105d); /* 21 */ + GG(d, a, b, c, pX[10], S22, 0x2441453); /* 22 */ + GG(c, d, a, b, pX[15], S23, 0xd8a1e681); /* 23 */ + GG(b, c, d, a, pX[4], S24, 0xe7d3fbc8); /* 24 */ + GG(a, b, c, d, pX[9], S21, 0x21e1cde6); /* 25 */ + GG(d, a, b, c, pX[14], S22, 0xc33707d6); /* 26 */ + GG(c, d, a, b, pX[3], S23, 0xf4d50d87); /* 27 */ + GG(b, c, d, a, pX[8], S24, 0x455a14ed); /* 28 */ + GG(a, b, c, d, pX[13], S21, 0xa9e3e905); /* 29 */ + GG(d, a, b, c, pX[2], S22, 0xfcefa3f8); /* 30 */ + GG(c, d, a, b, pX[7], S23, 0x676f02d9); /* 31 */ + GG(b, c, d, a, pX[12], S24, 0x8d2a4c8a); /* 32 */ + + /* Round 3 */ + HH(a, b, c, d, pX[5], S31, 0xfffa3942); /* 33 */ + HH(d, a, b, c, pX[8], S32, 0x8771f681); /* 34 */ + HH(c, d, a, b, pX[11], S33, 0x6d9d6122); /* 35 */ + HH(b, c, d, a, pX[14], S34, 0xfde5380c); /* 36 */ + HH(a, b, c, d, pX[1], S31, 0xa4beea44); /* 37 */ + HH(d, a, b, c, pX[4], S32, 0x4bdecfa9); /* 38 */ + HH(c, d, a, b, pX[7], S33, 0xf6bb4b60); /* 39 */ + HH(b, c, d, a, pX[10], S34, 0xbebfbc70); /* 40 */ + HH(a, b, c, d, pX[13], S31, 0x289b7ec6); /* 41 */ + HH(d, a, b, c, pX[0], S32, 0xeaa127fa); /* 42 */ + HH(c, d, a, b, pX[3], S33, 0xd4ef3085); /* 43 */ + HH(b, c, d, a, pX[6], S34, 0x4881d05); /* 44 */ + HH(a, b, c, d, pX[9], S31, 0xd9d4d039); /* 45 */ + HH(d, a, b, c, pX[12], S32, 0xe6db99e5); /* 46 */ + HH(c, d, a, b, pX[15], S33, 0x1fa27cf8); /* 47 */ + HH(b, c, d, a, pX[2], S34, 0xc4ac5665); /* 48 */ + + /* Round 4 */ + II(a, b, c, d, pX[0], S41, 0xf4292244); /* 49 */ + II(d, a, b, c, pX[7], S42, 0x432aff97); /* 50 */ + II(c, d, a, b, pX[14], S43, 0xab9423a7); /* 51 */ + II(b, c, d, a, pX[5], S44, 0xfc93a039); /* 52 */ + II(a, b, c, d, pX[12], S41, 0x655b59c3); /* 53 */ + II(d, a, b, c, pX[3], S42, 0x8f0ccc92); /* 54 */ + II(c, d, a, b, pX[10], S43, 0xffeff47d); /* 55 */ + II(b, c, d, a, pX[1], S44, 0x85845dd1); /* 56 */ + II(a, b, c, d, pX[8], S41, 0x6fa87e4f); /* 57 */ + II(d, a, b, c, pX[15], S42, 0xfe2ce6e0); /* 58 */ + II(c, d, a, b, pX[6], S43, 0xa3014314); /* 59 */ + II(b, c, d, a, pX[13], S44, 0x4e0811a1); /* 60 */ + II(a, b, c, d, pX[4], S41, 0xf7537e82); /* 61 */ + II(d, a, b, c, pX[11], S42, 0xbd3af235); /* 62 */ + II(c, d, a, b, pX[2], S43, 0x2ad7d2bb); /* 63 */ + II(b, c, d, a, pX[9], S44, 0xeb86d391); /* 64 */ + + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + } + + memcpy(pOutHash, state, 16); +} diff --git a/drivers/d3d12/dxil_hash.h b/drivers/d3d12/dxil_hash.h new file mode 100644 index 0000000000..db8ee85a0d --- /dev/null +++ b/drivers/d3d12/dxil_hash.h @@ -0,0 +1,39 @@ +/**************************************************************************/ +/* dxil_hash.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef DXIL_HASH_H +#define DXIL_HASH_H + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +void compute_dxil_hash(const BYTE *pData, UINT byteCount, BYTE *pOutHash); + +#endif // DXIL_HASH_H diff --git a/drivers/d3d12/rendering_context_driver_d3d12.cpp b/drivers/d3d12/rendering_context_driver_d3d12.cpp index 128b8bcd03..8fa495f5c4 100644 --- a/drivers/d3d12/rendering_context_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_context_driver_d3d12.cpp @@ -43,12 +43,20 @@ #pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Wswitch" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" #endif #include "dxcapi.h" #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#elif defined(__clang__) +#pragma clang diagnostic pop #endif #if !defined(_MSC_VER) @@ -63,10 +71,6 @@ const GUID CLSID_D3D12DeviceFactoryGodot = { 0x114863bf, 0xc386, 0x4aee, { 0xb3, const GUID CLSID_D3D12DebugGodot = { 0xf2352aeb, 0xdd84, 0x49fe, { 0xb9, 0x7b, 0xa9, 0xdc, 0xfd, 0xcc, 0x1b, 0x4f } }; const GUID CLSID_D3D12SDKConfigurationGodot = { 0x7cda6aca, 0xa03e, 0x49c8, { 0x94, 0x58, 0x03, 0x34, 0xd2, 0x0e, 0x07, 0xce } }; -extern "C" { -char godot_nir_arch_name[32]; -} - #ifdef PIX_ENABLED #if defined(__GNUC__) #define _MSC_VER 1800 @@ -78,12 +82,14 @@ char godot_nir_arch_name[32]; #endif #endif -RenderingContextDriverD3D12::RenderingContextDriverD3D12() { - CharString cs = Engine::get_singleton()->get_architecture_name().ascii(); - memcpy(godot_nir_arch_name, (const char *)cs.get_data(), cs.size()); -} +RenderingContextDriverD3D12::RenderingContextDriverD3D12() {} RenderingContextDriverD3D12::~RenderingContextDriverD3D12() { + // Let's release manually everything that may still be holding + // onto the DLLs before freeing them. + device_factory.Reset(); + dxgi_factory.Reset(); + if (lib_d3d12) { FreeLibrary(lib_d3d12); } diff --git a/drivers/d3d12/rendering_context_driver_d3d12.h b/drivers/d3d12/rendering_context_driver_d3d12.h index 2e286b6927..a2d828ded1 100644 --- a/drivers/d3d12/rendering_context_driver_d3d12.h +++ b/drivers/d3d12/rendering_context_driver_d3d12.h @@ -46,6 +46,13 @@ #pragma GCC diagnostic ignored "-Wswitch" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" #endif #if defined(AS) @@ -59,6 +66,8 @@ #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#elif defined(__clang__) +#pragma clang diagnostic pop #endif using Microsoft::WRL::ComPtr; diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index fb278a4d56..a445006058 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -36,6 +36,7 @@ #include "thirdparty/zlib/zlib.h" #include "d3d12_godot_nir_bridge.h" +#include "dxil_hash.h" #include "rendering_context_driver_d3d12.h" // No point in fighting warnings in Mesa. @@ -51,9 +52,14 @@ #pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Wswitch" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" #endif -#include "dxil_validator.h" #include "nir_spirv.h" #include "nir_to_dxil.h" #include "spirv_to_dxil.h" @@ -63,6 +69,8 @@ extern "C" { #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#elif defined(__clang__) +#pragma clang diagnostic pop #endif #if defined(_MSC_VER) @@ -96,11 +104,6 @@ static const D3D12_RANGE VOID_RANGE = {}; static const uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 1); static const uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 2); -#ifdef DEV_ENABLED -//#define DEBUG_COUNT_BARRIERS -#define CUSTOM_INFO_QUEUE_ENABLED 0 -#endif - /*****************/ /**** GENERIC ****/ /*****************/ @@ -869,6 +872,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel D3D12MA::ALLOCATION_DESC allocation_desc = {}; allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COMMON; switch (p_allocation_type) { case MEMORY_ALLOCATION_TYPE_CPU: { bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT); @@ -876,10 +880,12 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel if (is_src && !is_dst) { // Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM. allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD; + initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; } if (is_dst && !is_src) { // Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads. allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK; + initial_state = D3D12_RESOURCE_STATE_COPY_DEST; } } break; case MEMORY_ALLOCATION_TYPE_GPU: { @@ -908,7 +914,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel res = allocator->CreateResource( &allocation_desc, reinterpret_cast<const D3D12_RESOURCE_DESC *>(&resource_desc), - D3D12_RESOURCE_STATE_COMMON, + initial_state, nullptr, allocation.GetAddressOf(), IID_PPV_ARGS(buffer.GetAddressOf())); @@ -922,7 +928,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel buf_info->resource = buffer.Get(); buf_info->owner_info.resource = buffer; buf_info->owner_info.allocation = allocation; - buf_info->owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_COMMON); + buf_info->owner_info.states.subresource_states.push_back(initial_state); buf_info->states_ptr = &buf_info->owner_info.states; buf_info->size = p_size; buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); @@ -1462,7 +1468,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format; } - if (p_slice_type != -1) { + if (p_slice_type != (TextureSliceType)-1) { // Complete description with slicing. switch (p_slice_type) { @@ -1560,7 +1566,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex tex_info->states_ptr = owner_tex_info->states_ptr; tex_info->format = p_view.format; tex_info->desc = new_tex_resource_desc; - if (p_slice_type == -1) { + if (p_slice_type == (TextureSliceType)-1) { tex_info->base_layer = owner_tex_info->base_layer; tex_info->layers = owner_tex_info->layers; tex_info->base_mip = owner_tex_info->base_mip; @@ -1741,7 +1747,7 @@ RDD::SamplerID RenderingDeviceDriverD3D12::sampler_create(const SamplerState &p_ slot = 1; } else { for (uint32_t i = 1; i < samplers.size(); i++) { - if (samplers[i].Filter == INT_MAX) { + if ((int)samplers[i].Filter == INT_MAX) { slot = i; break; } @@ -2137,33 +2143,59 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_ for (uint32_t i = 0; i < p_texture_barriers.size(); i++) { const TextureBarrier &texture_barrier_rd = p_texture_barriers[i]; const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id); + if (texture_info->main_texture) { + texture_info = texture_info->main_texture; + } _rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore); _rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter); texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout); texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout); texture_barrier_d3d12.pResource = texture_info->resource; - texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap; - texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count; - texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer; - texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count; - texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect); - texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format); + if (texture_barrier_rd.subresources.mipmap_count == texture_info->mipmaps && texture_barrier_rd.subresources.layer_count == texture_info->layers) { + // So, all resources. Then, let's be explicit about it so D3D12 doesn't think + // we are dealing with a subset of subresources. + texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = 0xffffffff; + texture_barrier_d3d12.Subresources.NumMipLevels = 0; + // Because NumMipLevels == 0, all the other fields are ignored by D3D12. + } else { + texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap; + texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count; + texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer; + texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count; + texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect); + texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format); + } texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE; texture_barriers.push_back(texture_barrier_d3d12); } // Define the barrier groups and execute. + D3D12_BARRIER_GROUP barrier_groups[3] = {}; - barrier_groups[0].Type = D3D12_BARRIER_TYPE_GLOBAL; - barrier_groups[1].Type = D3D12_BARRIER_TYPE_BUFFER; - barrier_groups[2].Type = D3D12_BARRIER_TYPE_TEXTURE; - barrier_groups[0].NumBarriers = global_barriers.size(); - barrier_groups[1].NumBarriers = buffer_barriers.size(); - barrier_groups[2].NumBarriers = texture_barriers.size(); - barrier_groups[0].pGlobalBarriers = global_barriers.ptr(); - barrier_groups[1].pBufferBarriers = buffer_barriers.ptr(); - barrier_groups[2].pTextureBarriers = texture_barriers.ptr(); - cmd_list_7->Barrier(ARRAY_SIZE(barrier_groups), barrier_groups); + uint32_t barrier_groups_count = 0; + + if (!global_barriers.is_empty()) { + D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++]; + barrier_group.Type = D3D12_BARRIER_TYPE_GLOBAL; + barrier_group.NumBarriers = global_barriers.size(); + barrier_group.pGlobalBarriers = global_barriers.ptr(); + } + + if (!buffer_barriers.is_empty()) { + D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++]; + barrier_group.Type = D3D12_BARRIER_TYPE_BUFFER; + barrier_group.NumBarriers = buffer_barriers.size(); + barrier_group.pBufferBarriers = buffer_barriers.ptr(); + } + + if (!texture_barriers.is_empty()) { + D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++]; + barrier_group.Type = D3D12_BARRIER_TYPE_TEXTURE; + barrier_group.NumBarriers = texture_barriers.size(); + barrier_group.pTextureBarriers = texture_barriers.ptr(); + } + + cmd_list_7->Barrier(barrier_groups_count, barrier_groups); } /****************/ @@ -2703,6 +2735,8 @@ D3D12_UNORDERED_ACCESS_VIEW_DESC RenderingDeviceDriverD3D12::_make_ranged_uav_fo uav_desc.Texture3D.MipSlice = mip; uav_desc.Texture3D.WSize >>= p_mipmap_offset; } break; + default: + break; } return uav_desc; @@ -2859,23 +2893,6 @@ static uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDevice::SHADER_STAGE_M /* SHADER_STAGE_COMPUTE */ 2, }; -dxil_validator *RenderingDeviceDriverD3D12::_get_dxil_validator_for_current_thread() { - MutexLock lock(dxil_mutex); - - int thread_idx = WorkerThreadPool::get_singleton()->get_thread_index(); - if (dxil_validators.has(thread_idx)) { - return dxil_validators[thread_idx]; - } - -#ifdef DEV_ENABLED - print_verbose("Creating DXIL validator for worker thread index " + itos(thread_idx)); -#endif - - dxil_validator *dxil_validator = dxil_create_validator(nullptr); - dxil_validators.insert(thread_idx, dxil_validator); - return dxil_validator; -} - uint32_t RenderingDeviceDriverD3D12::_shader_patch_dxil_specialization_constant( PipelineSpecializationConstantType p_type, const void *p_value, @@ -2998,40 +3015,20 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( ShaderStage stage = E.key; if ((stages_re_sign_mask & (1 << stage))) { Vector<uint8_t> &bytecode = E.value; - bool sign_ok = _shader_sign_dxil_bytecode(stage, bytecode); - ERR_FAIL_COND_V(!sign_ok, false); + _shader_sign_dxil_bytecode(stage, bytecode); } } return true; } -bool RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) { - dxil_validator *validator = _get_dxil_validator_for_current_thread(); - if (!validator) { - if (is_in_developer_mode()) { - return true; - } else { - OS::get_singleton()->alert("Shader validation failed: DXIL.dll was not found, and developer mode is disabled.\n\nClick OK to exit."); - CRASH_NOW(); - } - } - - char *err = nullptr; - bool res = dxil_validate_module(validator, r_dxil_blob.ptrw(), r_dxil_blob.size(), &err); - if (!res) { - if (err) { - ERR_FAIL_COND_V_MSG(!res, false, "Shader signing invocation at stage " + String(SHADER_STAGE_NAMES[p_stage]) + " failed:\n" + String(err)); - } else { - ERR_FAIL_COND_V_MSG(!res, false, "Shader signing invocation at stage " + String(SHADER_STAGE_NAMES[p_stage]) + " failed."); - } - } - - return true; +void RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) { + uint8_t *w = r_dxil_blob.ptrw(); + compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4); } String RenderingDeviceDriverD3D12::shader_get_binary_cache_key() { - return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model) + (is_in_developer_mode() ? "dev" : ""); + return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model); } Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) { @@ -3299,10 +3296,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec nir_to_dxil_options nir_to_dxil_options = {}; nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN; nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(shader_capabilities.shader_model); - dxil_validator *validator = _get_dxil_validator_for_current_thread(); - if (validator) { - nir_to_dxil_options.validator_version_max = dxil_get_validator_version(validator); - } + nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION; nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks; dxil_logger logger = {}; @@ -3353,8 +3347,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec for (KeyValue<ShaderStage, Vector<uint8_t>> &E : dxil_blobs) { ShaderStage stage = E.key; Vector<uint8_t> &dxil_blob = E.value; - bool sign_ok = _shader_sign_dxil_bytecode(stage, dxil_blob); - ERR_FAIL_COND_V(!sign_ok, Vector<uint8_t>()); + _shader_sign_dxil_bytecode(stage, dxil_blob); } // Build the root signature. @@ -3822,6 +3815,11 @@ void RenderingDeviceDriverD3D12::shader_free(ShaderID p_shader) { VersatileResource::free(resources_allocator, shader_info_in); } +void RenderingDeviceDriverD3D12::shader_destroy_modules(ShaderID p_shader) { + ShaderInfo *shader_info_in = (ShaderInfo *)p_shader.id; + shader_info_in->stages_bytecode.clear(); +} + /*********************/ /**** UNIFORM SET ****/ /*********************/ @@ -4094,7 +4092,6 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun { uniform_set_info->resource_states.reserve(resource_states.size()); - uint32_t i = 0; for (const KeyValue<ResourceInfo *, NeededState> &E : resource_states) { UniformSetInfo::StateRequirement sr; sr.resource = E.key; @@ -4102,7 +4099,6 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun sr.states = E.value.states; sr.shader_uniform_idx_mask = E.value.shader_uniform_idx_mask; uniform_set_info->resource_states.push_back(sr); - i++; } } @@ -5092,6 +5088,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) { clear.aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT); clear.color_attachment = i; + tex_info->pending_clear.remove_from_list(); } } else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { if (pass_info->attachments[i].stencil_load_op == ATTACHMENT_LOAD_OP_CLEAR) { @@ -5370,10 +5367,12 @@ void RenderingDeviceDriverD3D12::command_bind_render_pipeline(CommandBufferID p_ cmd_buf_info->cmd_list->OMSetBlendFactor(pso_extra_info.dyn_params.blend_constant.components); cmd_buf_info->cmd_list->OMSetStencilRef(pso_extra_info.dyn_params.stencil_reference); - ComPtr<ID3D12GraphicsCommandList1> command_list_1; - cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf()); - if (command_list_1) { - command_list_1->OMSetDepthBounds(pso_extra_info.dyn_params.depth_bounds_min, pso_extra_info.dyn_params.depth_bounds_max); + if (misc_features_support.depth_bounds_supported) { + ComPtr<ID3D12GraphicsCommandList1> command_list_1; + cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf()); + if (command_list_1) { + command_list_1->OMSetDepthBounds(pso_extra_info.dyn_params.depth_bounds_min, pso_extra_info.dyn_params.depth_bounds_max); + } } cmd_buf_info->render_pass_state.vf_info = pso_extra_info.vf_info; @@ -5763,8 +5762,15 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( (&pipeline_desc.DepthStencilState)->BackFace.StencilDepthFailOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.back_op.depth_fail]; (&pipeline_desc.DepthStencilState)->BackFace.StencilFunc = RD_TO_D3D12_COMPARE_OP[p_depth_stencil_state.back_op.compare]; - pso_extra_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f; - pso_extra_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f; + if (misc_features_support.depth_bounds_supported) { + pso_extra_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f; + pso_extra_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f; + } else { + if (p_depth_stencil_state.enable_depth_range) { + WARN_PRINT_ONCE("Depth bounds test is not supported by the GPU driver."); + } + } + pso_extra_info.dyn_params.stencil_reference = p_depth_stencil_state.front_op.reference; } @@ -6035,6 +6041,10 @@ void RenderingDeviceDriverD3D12::command_end_label(CommandBufferID p_cmd_buffer) #endif } +void RenderingDeviceDriverD3D12::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) { + // TODO: Implement via DRED. +} + /********************/ /**** SUBMISSION ****/ /********************/ @@ -6281,15 +6291,6 @@ RenderingDeviceDriverD3D12::RenderingDeviceDriverD3D12(RenderingContextDriverD3D } RenderingDeviceDriverD3D12::~RenderingDeviceDriverD3D12() { - { - MutexLock lock(dxil_mutex); - for (const KeyValue<int, dxil_validator *> &E : dxil_validators) { - if (E.value) { - dxil_destroy_validator(E.value); - } - } - } - glsl_type_singleton_decref(); } @@ -6485,6 +6486,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { subgroup_capabilities.wave_ops_supported = options1.WaveOps; } + D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2)); + if (SUCCEEDED(res)) { + misc_features_support.depth_bounds_supported = options2.DepthBoundsTestSupported; + } + D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {}; res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)); if (SUCCEEDED(res)) { @@ -6570,6 +6577,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { print_verbose(String("- D3D12 16-bit ops supported: ") + (shader_capabilities.native_16bit_ops ? "yes" : "no")); + if (misc_features_support.depth_bounds_supported) { + print_verbose("- Depth bounds test supported"); + } else { + print_verbose("- Depth bounds test not supported"); + } + return OK; } @@ -6635,7 +6648,7 @@ Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) { D3D12MA::ALLOCATION_DESC allocation_desc = {}; allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + //CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); uint32_t resource_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame"); uint32_t sampler_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame"); uint32_t misc_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame"); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 1782819238..d8381279ec 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -36,6 +36,11 @@ #include "core/templates/self_list.h" #include "servers/rendering/rendering_device_driver.h" +#ifndef _MSC_VER +// Match current version used by MinGW, MSVC and Direct3D 12 headers use 500. +#define __REQUIRED_RPCNDR_H_VERSION__ 475 +#endif + #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" @@ -43,6 +48,13 @@ #pragma GCC diagnostic ignored "-Wswitch" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" #endif #include "d3dx12.h" @@ -59,13 +71,19 @@ #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#elif defined(__clang__) +#pragma clang diagnostic pop #endif using Microsoft::WRL::ComPtr; #define D3D12_BITCODE_OFFSETS_NUM_STAGES 3 -struct dxil_validator; +#ifdef DEV_ENABLED +//#define DEBUG_COUNT_BARRIERS +#define CUSTOM_INFO_QUEUE_ENABLED 0 +#endif + class RenderingContextDriverD3D12; // Design principles: @@ -126,6 +144,10 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { bool enhanced_barriers_supported = false; }; + struct MiscFeaturesSupport { + bool depth_bounds_supported = false; + }; + RenderingContextDriverD3D12 *context_driver = nullptr; RenderingContextDriver::Device context_device; ComPtr<IDXGIAdapter> adapter; @@ -141,6 +163,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { StorageBufferCapabilities storage_buffer_capabilities; FormatCapabilities format_capabilities; BarrierCapabilities barrier_capabilities; + MiscFeaturesSupport misc_features_support; String pipeline_cache_id; class DescriptorsHeap { @@ -257,7 +280,7 @@ private: LocalVector<D3D12_RESOURCE_BARRIER> res_barriers; uint32_t res_barriers_count = 0; uint32_t res_barriers_batch = 0; -#ifdef DEV_ENABLED +#ifdef DEBUG_COUNT_BARRIERS int frame_barriers_count = 0; int frame_barriers_batches_count = 0; uint64_t frame_barriers_cpu_time = 0; @@ -678,10 +701,6 @@ private: uint32_t root_signature_crc = 0; }; - Mutex dxil_mutex; - HashMap<int, dxil_validator *> dxil_validators; // One per WorkerThreadPool thread used for shader compilation, plus one (-1) for all the other. - - dxil_validator *_get_dxil_validator_for_current_thread(); uint32_t _shader_patch_dxil_specialization_constant( PipelineSpecializationConstantType p_type, const void *p_value, @@ -692,7 +711,7 @@ private: const ShaderInfo *p_shader_info, VectorView<PipelineSpecializationConstant> p_specialization_constants, HashMap<ShaderStage, Vector<uint8_t>> &r_final_stages_bytecode); - bool _shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob); + void _shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob); public: virtual String shader_get_binary_cache_key() override final; @@ -700,6 +719,7 @@ public: virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final; virtual uint32_t shader_get_layout_hash(ShaderID p_shader) override final; virtual void shader_free(ShaderID p_shader) override final; + virtual void shader_destroy_modules(ShaderID p_shader) override final; /*********************/ /**** UNIFORM SET ****/ @@ -931,6 +951,11 @@ public: virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final; virtual void command_end_label(CommandBufferID p_cmd_buffer) override final; + /****************/ + /**** DEBUG *****/ + /****************/ + virtual void command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) override final; + /********************/ /**** SUBMISSION ****/ /********************/ diff --git a/drivers/egl/egl_manager.cpp b/drivers/egl/egl_manager.cpp index 9c1d08331d..4477ba7752 100644 --- a/drivers/egl/egl_manager.cpp +++ b/drivers/egl/egl_manager.cpp @@ -357,7 +357,7 @@ Error EGLManager::initialize(void *p_native_display) { // have to temporarily get a proper display and reload EGL once again to // initialize everything else. if (!gladLoaderLoadEGL(EGL_NO_DISPLAY)) { - ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL."); + ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL dynamic library."); } EGLDisplay tmp_display = EGL_NO_DISPLAY; @@ -387,7 +387,7 @@ Error EGLManager::initialize(void *p_native_display) { int version = gladLoaderLoadEGL(tmp_display); if (!version) { eglTerminate(tmp_display); - ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL."); + ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL dynamic library."); } int major = GLAD_VERSION_MAJOR(version); diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 941b1a1b28..b9206f310e 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -647,18 +647,17 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, Point2()); } else { Point2 start_pos = ci->repeat_size * -(ci->repeat_times / 2); - Point2 end_pos = ci->repeat_size * ci->repeat_times + ci->repeat_size + start_pos; - Point2 pos = start_pos; - - do { - do { - _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, pos); - pos.y += ci->repeat_size.y; - } while (pos.y < end_pos.y); - - pos.x += ci->repeat_size.x; - pos.y = start_pos.y; - } while (pos.x < end_pos.x); + Point2 offset; + + int repeat_times_x = ci->repeat_size.x ? ci->repeat_times : 0; + int repeat_times_y = ci->repeat_size.y ? ci->repeat_times : 0; + for (int ry = 0; ry <= repeat_times_y; ry++) { + offset.y = start_pos.y + ry * ci->repeat_size.y; + for (int rx = 0; rx <= repeat_times_x; rx++) { + offset.x = start_pos.x + rx * ci->repeat_size.x; + _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, offset); + } + } } } @@ -809,7 +808,7 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou state.last_item_index += index; } -void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, const Point2 &p_offset) { +void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, const Point2 &p_repeat_offset) { RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? state.default_filter : p_item->texture_filter; if (texture_filter != state.canvas_instance_batches[state.current_batch_index].filter) { @@ -826,11 +825,11 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_rend state.canvas_instance_batches[state.current_batch_index].repeat = texture_repeat; } - Transform2D base_transform = p_canvas_transform_inverse * p_item->final_transform; - - if (p_offset.x || p_offset.y) { - base_transform *= Transform2D(0, p_offset / p_item->xform_curr.get_scale()); // TODO: Interpolate or explain why not needed. + Transform2D base_transform = p_item->final_transform; + if (p_item->repeat_source_item && (p_repeat_offset.x || p_repeat_offset.y)) { + base_transform.columns[2] += p_item->repeat_source_item->final_transform.basis_xform(p_repeat_offset); } + base_transform = p_canvas_transform_inverse * base_transform; Transform2D draw_transform; // Used by transform command @@ -1735,7 +1734,7 @@ void RasterizerCanvasGLES3::light_update_directional_shadow(RID p_rid, int p_sha Vector2 center = p_clip_rect.get_center(); - float to_edge_distance = ABS(light_dir.dot(p_clip_rect.get_support(light_dir)) - light_dir.dot(center)); + float to_edge_distance = ABS(light_dir.dot(p_clip_rect.get_support(-light_dir)) - light_dir.dot(center)); Vector2 from_pos = center - light_dir * (to_edge_distance + p_cull_distance); float distance = to_edge_distance * 2.0 + p_cull_distance; diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index 7fc9992c3d..027f717eb7 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -63,7 +63,6 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender { FLAGS_TRANSPOSE_RECT = (1 << 10), FLAGS_NINEPACH_DRAW_CENTER = (1 << 12), - FLAGS_USING_PARTICLES = (1 << 13), FLAGS_USE_SKELETON = (1 << 15), FLAGS_NINEPATCH_H_MODE_SHIFT = 16, @@ -363,7 +362,7 @@ public: void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used, RenderingMethod::RenderInfo *r_render_info = nullptr) override; void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr); - void _record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch, bool &r_sdf_used, const Point2 &p_offset); + void _record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch, bool &r_sdf_used, const Point2 &p_repeat_offset); void _render_batch(Light *p_lights, uint32_t p_index, RenderingMethod::RenderInfo *r_render_info = nullptr); bool _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization); void _new_batch(bool &r_batch_broken); diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index ae39c86d44..19ef3d416c 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -62,6 +62,10 @@ #define _EXT_DEBUG_SEVERITY_LOW_ARB 0x9148 #define _EXT_DEBUG_OUTPUT 0x92E0 +#ifndef GL_FRAMEBUFFER_SRGB +#define GL_FRAMEBUFFER_SRGB 0x8DB9 +#endif + #ifndef GLAPIENTRY #if defined(WINDOWS_ENABLED) #define GLAPIENTRY APIENTRY @@ -72,7 +76,7 @@ #if !defined(IOS_ENABLED) && !defined(WEB_ENABLED) // We include EGL below to get debug callback on GLES2 platforms, -// but EGL is not available on iOS. +// but EGL is not available on iOS or the web. #define CAN_DEBUG #endif @@ -107,7 +111,7 @@ void RasterizerGLES3::end_frame(bool p_swap_buffers) { utils->capture_timestamps_end(); } -void RasterizerGLES3::end_viewport(bool p_swap_buffers) { +void RasterizerGLES3::gl_end_frame(bool p_swap_buffers) { if (p_swap_buffers) { DisplayServer::get_singleton()->swap_buffers(); } else { @@ -345,6 +349,9 @@ RasterizerGLES3::RasterizerGLES3() { } } + // Disable OpenGL linear to sRGB conversion, because Godot will always do this conversion itself. + glDisable(GL_FRAMEBUFFER_SRGB); + // OpenGL needs to be initialized before initializing the Rasterizers config = memnew(GLES3::Config); utilities = memnew(GLES3::Utilities); @@ -491,7 +498,7 @@ void RasterizerGLES3::set_boot_image(const Ref<Image> &p_image, const Color &p_c copy_effects->copy_to_rect(screenrect); glBindTexture(GL_TEXTURE_2D, 0); - end_viewport(true); + gl_end_frame(true); texture_storage->texture_free(texture); } diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h index 0d0c26016d..80a4a792bb 100644 --- a/drivers/gles3/rasterizer_gles3.h +++ b/drivers/gles3/rasterizer_gles3.h @@ -99,7 +99,7 @@ public: void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount); - void end_viewport(bool p_swap_buffers); + void gl_end_frame(bool p_swap_buffers); void end_frame(bool p_swap_buffers); void finalize(); diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 9ea030bbd4..3ed8042f3f 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -777,7 +777,6 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, ERR_FAIL_COND(p_env.is_null()); Sky *sky = sky_owner.get_or_null(environment_get_sky(p_env)); - ERR_FAIL_NULL(sky); GLES3::SkyMaterialData *material_data = nullptr; RID sky_material; @@ -851,6 +850,15 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection, material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::SKY_ENERGY_MULTIPLIER, p_sky_energy_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + Color fog_color = environment_get_fog_light_color(p_env).srgb_to_linear() * environment_get_fog_light_energy(p_env); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_ENABLED, environment_get_fog_enabled(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_AERIAL_PERSPECTIVE, environment_get_fog_aerial_perspective(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_LIGHT_COLOR, fog_color, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_SUN_SCATTER, environment_get_fog_sun_scatter(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_DENSITY, environment_get_fog_density(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_SKY_AFFECT, environment_get_fog_sky_affect(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::DIRECTIONAL_LIGHT_COUNT, sky_globals.directional_light_count, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants); + if (p_use_multiview) { glBindBufferBase(GL_UNIFORM_BUFFER, SKY_MULTIVIEW_UNIFORM_LOCATION, scene_state.multiview_buffer); glBindBuffer(GL_UNIFORM_BUFFER, 0); @@ -1420,7 +1428,7 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const #else bool force_alpha = false; #endif - if (!force_alpha && (surf->flags & GeometryInstanceSurface::FLAG_PASS_OPAQUE)) { + if (!force_alpha && (surf->flags & (GeometryInstanceSurface::FLAG_PASS_DEPTH | GeometryInstanceSurface::FLAG_PASS_OPAQUE))) { rl->add_element(surf); } if (force_alpha || (surf->flags & GeometryInstanceSurface::FLAG_PASS_ALPHA)) { @@ -2247,7 +2255,6 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ bool glow_enabled = false; if (p_environment.is_valid()) { glow_enabled = environment_get_glow_enabled(p_environment); - rb->ensure_internal_buffers(); // Ensure our intermediate buffer is available if glow is enabled if (glow_enabled) { // If glow is enabled, we apply tonemapping etc. in post, so disable it during rendering apply_color_adjustments_in_post = true; @@ -2339,7 +2346,6 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ if (render_data.environment.is_valid()) { bool use_bcs = environment_get_adjustments_enabled(render_data.environment); if (use_bcs) { - rb->ensure_internal_buffers(); apply_color_adjustments_in_post = true; } @@ -2473,6 +2479,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ if (is_reflection_probe) { fbo = GLES3::LightStorage::get_singleton()->reflection_probe_instance_get_framebuffer(render_data.reflection_probe, render_data.reflection_probe_pass); } else { + rb->set_apply_color_adjustments_in_post(apply_color_adjustments_in_post); fbo = rb->get_render_fbo(); } @@ -2500,7 +2507,9 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ glColorMask(0, 0, 0, 0); RasterizerGLES3::clear_depth(0.0); glClear(GL_DEPTH_BUFFER_BIT); - glDrawBuffers(0, nullptr); + // Some desktop GL implementations fall apart when using Multiview with GL_NONE. + GLuint db = p_camera_data->view_count > 1 ? GL_COLOR_ATTACHMENT0 : GL_NONE; + glDrawBuffers(1, &db); uint64_t spec_constant = SceneShaderGLES3::DISABLE_FOG | SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL | SceneShaderGLES3::DISABLE_LIGHTMAP | SceneShaderGLES3::DISABLE_LIGHT_OMNI | @@ -2586,7 +2595,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_ scene_state.enable_gl_depth_draw(false); - if (draw_sky) { + if (draw_sky || draw_sky_fog_only) { RENDER_TIMESTAMP("Render Sky"); scene_state.enable_gl_depth_test(true); @@ -3201,6 +3210,10 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, if (lm->uses_spherical_harmonics) { spec_constants |= SceneShaderGLES3::USE_SH_LIGHTMAP; } + + if (lightmap_bicubic_upscale) { + spec_constants |= SceneShaderGLES3::LIGHTMAP_BICUBIC_FILTER; + } } else if (inst->lightmap_sh) { spec_constants |= SceneShaderGLES3::USE_LIGHTMAP_CAPTURE; } else { @@ -3343,6 +3356,11 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, Vector4 uv_scale(inst->lightmap_uv_scale.position.x, inst->lightmap_uv_scale.position.y, inst->lightmap_uv_scale.size.x, inst->lightmap_uv_scale.size.y); material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::LIGHTMAP_UV_SCALE, uv_scale, shader->version, instance_variant, spec_constants); + if (lightmap_bicubic_upscale) { + Vector2 light_texture_size(lm->light_texture_size.x, lm->light_texture_size.y); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::LIGHTMAP_TEXTURE_SIZE, light_texture_size, shader->version, instance_variant, spec_constants); + } + float exposure_normalization = 1.0; if (p_render_data->camera_attributes.is_valid()) { float enf = RSG::camera_attributes->camera_attributes_get_exposure_normalization_factor(p_render_data->camera_attributes); @@ -4038,6 +4056,10 @@ void RasterizerSceneGLES3::decals_set_filter(RS::DecalFilter p_filter) { void RasterizerSceneGLES3::light_projectors_set_filter(RS::LightProjectorFilter p_filter) { } +void RasterizerSceneGLES3::lightmaps_set_bicubic_filter(bool p_enable) { + lightmap_bicubic_upscale = p_enable; +} + RasterizerSceneGLES3::RasterizerSceneGLES3() { singleton = this; @@ -4051,6 +4073,7 @@ RasterizerSceneGLES3::RasterizerSceneGLES3() { positional_soft_shadow_filter_set_quality((RS::ShadowQuality)(int)GLOBAL_GET("rendering/lights_and_shadows/positional_shadow/soft_shadow_filter_quality")); directional_soft_shadow_filter_set_quality((RS::ShadowQuality)(int)GLOBAL_GET("rendering/lights_and_shadows/directional_shadow/soft_shadow_filter_quality")); + lightmaps_set_bicubic_filter(GLOBAL_GET("rendering/lightmapping/lightmap_gi/use_bicubic_filter")); { // Setup Lights diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index 4c70c43244..e4af8f99e9 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -680,6 +680,8 @@ protected: bool glow_bicubic_upscale = false; RS::EnvironmentSSRRoughnessQuality ssr_roughness_quality = RS::ENV_SSR_ROUGHNESS_QUALITY_LOW; + bool lightmap_bicubic_upscale = false; + /* Sky */ struct SkyGlobals { @@ -863,6 +865,7 @@ public: void decals_set_filter(RS::DecalFilter p_filter) override; void light_projectors_set_filter(RS::LightProjectorFilter p_filter) override; + virtual void lightmaps_set_bicubic_filter(bool p_enable) override; RasterizerSceneGLES3(); ~RasterizerSceneGLES3(); diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 4a15ed827a..5a0f394db0 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -698,7 +698,8 @@ void ShaderGLES3::_clear_version(Version *p_version) { void ShaderGLES3::_initialize_version(Version *p_version) { ERR_FAIL_COND(p_version->variants.size() > 0); - if (shader_cache_dir_valid && _load_from_cache(p_version)) { + bool use_cache = shader_cache_dir_valid && !(feedback_count > 0 && GLES3::Config::get_singleton()->disable_transform_feedback_shader_cache); + if (use_cache && _load_from_cache(p_version)) { return; } p_version->variants.reserve(variant_count); @@ -709,7 +710,7 @@ void ShaderGLES3::_initialize_version(Version *p_version) { _compile_specialization(spec, i, p_version, specialization_default_mask); p_version->variants[i].insert(specialization_default_mask, spec); } - if (shader_cache_dir_valid) { + if (use_cache) { _save_to_cache(p_version); } } diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index 65332c06be..e358230747 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -239,13 +239,6 @@ void main() { model_matrix = model_matrix * transpose(mat4(instance_xform0, instance_xform1, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))); #endif // USE_INSTANCING -#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) - if (bool(read_draw_data_flags & FLAGS_USING_PARTICLES)) { - //scale by texture size - vertex /= read_draw_data_color_texture_pixel_size; - } -#endif - vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size; #ifdef USE_POINT_SIZE @@ -346,14 +339,16 @@ uniform sampler2D color_texture; //texunit:0 layout(location = 0) out vec4 frag_color; +/* clang-format off */ +// This needs to be outside clang-format so the ubo comment is in the right place #ifdef MATERIAL_UNIFORMS_USED -layout(std140) uniform MaterialUniforms{ -//ubo:4 +layout(std140) uniform MaterialUniforms{ //ubo:4 #MATERIAL_UNIFORMS }; #endif +/* clang-format on */ #GLOBALS diff --git a/drivers/gles3/shaders/canvas_uniforms_inc.glsl b/drivers/gles3/shaders/canvas_uniforms_inc.glsl index 21fd4d3d9d..f6ad2b730a 100644 --- a/drivers/gles3/shaders/canvas_uniforms_inc.glsl +++ b/drivers/gles3/shaders/canvas_uniforms_inc.glsl @@ -14,7 +14,6 @@ #define FLAGS_TRANSPOSE_RECT uint(1 << 10) // (1 << 11) is for FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR in RD backends, unused here. #define FLAGS_NINEPACH_DRAW_CENTER uint(1 << 12) -#define FLAGS_USING_PARTICLES uint(1 << 13) #define FLAGS_NINEPATCH_H_MODE_SHIFT 16 #define FLAGS_NINEPATCH_V_MODE_SHIFT 18 diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index be7a6aba57..6143ce2167 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -36,6 +36,7 @@ ADDITIVE_OMNI = false ADDITIVE_SPOT = false RENDER_MATERIAL = false SECOND_REFLECTION_PROBE = false +LIGHTMAP_BICUBIC_FILTER = false #[vertex] @@ -43,6 +44,7 @@ SECOND_REFLECTION_PROBE = false #define M_PI 3.14159265359 #define SHADER_IS_SRGB true +#define SHADER_SPACE_FAR -1.0 #include "stdlib_inc.glsl" @@ -582,6 +584,9 @@ void main() { /* clang-format on */ #define SHADER_IS_SRGB true +#define SHADER_SPACE_FAR -1.0 + +#define FLAGS_NON_UNIFORM_SCALE (1 << 4) /* Varyings */ @@ -869,13 +874,15 @@ uniform lowp uint directional_shadow_index; #if !defined(ADDITIVE_OMNI) float sample_shadow(highp sampler2DShadow shadow, float shadow_pixel_size, vec4 pos) { - float avg = textureProj(shadow, pos); + // Use textureProjLod with LOD set to 0.0 over textureProj, as textureProj not working correctly on ANGLE with Metal backend. + // https://github.com/godotengine/godot/issues/93537 + float avg = textureProjLod(shadow, pos, 0.0); #ifdef SHADOW_MODE_PCF_13 pos /= pos.w; - avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size * 2.0, 0.0), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size * 2.0, 0.0), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size * 2.0), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size * 2.0), pos.zw)); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size * 2.0, 0.0), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size * 2.0, 0.0), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size * 2.0), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size * 2.0), pos.zw), 0.0); // Early bail if distant samples are fully shaded (or none are shaded) to improve performance. if (avg <= 0.000001) { @@ -886,23 +893,23 @@ float sample_shadow(highp sampler2DShadow shadow, float shadow_pixel_size, vec4 return 1.0; } - avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, shadow_pixel_size), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, shadow_pixel_size), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, -shadow_pixel_size), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, -shadow_pixel_size), pos.zw)); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, shadow_pixel_size), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, shadow_pixel_size), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, -shadow_pixel_size), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, -shadow_pixel_size), pos.zw), 0.0); return avg * (1.0 / 13.0); #endif #ifdef SHADOW_MODE_PCF_5 pos /= pos.w; - avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw)); - avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw)); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw), 0.0); + avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw), 0.0); return avg * (1.0 / 5.0); #endif @@ -921,6 +928,10 @@ uniform lowp uint lightmap_slice; uniform highp vec4 lightmap_uv_scale; uniform float lightmap_exposure_normalization; +#ifdef LIGHTMAP_BICUBIC_FILTER +uniform highp vec2 lightmap_texture_size; +#endif + #ifdef USE_SH_LIGHTMAP uniform mediump mat3 lightmap_normal_xform; #endif // USE_SH_LIGHTMAP @@ -953,6 +964,7 @@ ivec2 multiview_uv(ivec2 uv) { uniform highp mat4 world_transform; uniform mediump float opaque_prepass_threshold; +uniform highp uint model_flags; #if defined(RENDER_MATERIAL) layout(location = 0) out vec4 albedo_output_buffer; @@ -1412,6 +1424,67 @@ void reflection_process(samplerCube reflection_map, #endif // !MODE_RENDER_DEPTH +#ifdef LIGHTMAP_BICUBIC_FILTER +// w0, w1, w2, and w3 are the four cubic B-spline basis functions +float w0(float a) { + return (1.0 / 6.0) * (a * (a * (-a + 3.0) - 3.0) + 1.0); +} + +float w1(float a) { + return (1.0 / 6.0) * (a * a * (3.0 * a - 6.0) + 4.0); +} + +float w2(float a) { + return (1.0 / 6.0) * (a * (a * (-3.0 * a + 3.0) + 3.0) + 1.0); +} + +float w3(float a) { + return (1.0 / 6.0) * (a * a * a); +} + +// g0 and g1 are the two amplitude functions +float g0(float a) { + return w0(a) + w1(a); +} + +float g1(float a) { + return w2(a) + w3(a); +} + +// h0 and h1 are the two offset functions +float h0(float a) { + return -1.0 + w1(a) / (w0(a) + w1(a)); +} + +float h1(float a) { + return 1.0 + w3(a) / (w2(a) + w3(a)); +} + +vec4 textureArray_bicubic(sampler2DArray tex, vec3 uv, vec2 texture_size) { + vec2 texel_size = vec2(1.0) / texture_size; + + uv.xy = uv.xy * texture_size + vec2(0.5); + + vec2 iuv = floor(uv.xy); + vec2 fuv = fract(uv.xy); + + float g0x = g0(fuv.x); + float g1x = g1(fuv.x); + float h0x = h0(fuv.x); + float h1x = h1(fuv.x); + float h0y = h0(fuv.y); + float h1y = h1(fuv.y); + + vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5)) * texel_size; + vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5)) * texel_size; + vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5)) * texel_size; + vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5)) * texel_size; + + return (g0(fuv.y) * (g0x * texture(tex, vec3(p0, uv.z)) + g1x * texture(tex, vec3(p1, uv.z)))) + + (g1(fuv.y) * (g0x * texture(tex, vec3(p2, uv.z)) + g1x * texture(tex, vec3(p3, uv.z)))); +} +#endif //LIGHTMAP_BICUBIC_FILTER + void main() { //lay out everything, whatever is unused is optimized away anyway vec3 vertex = vertex_interp; @@ -1519,6 +1592,13 @@ void main() { vec3 light_vertex = vertex; #endif //LIGHT_VERTEX_USED + highp mat3 model_normal_matrix; + if (bool(model_flags & uint(FLAGS_NON_UNIFORM_SCALE))) { + model_normal_matrix = transpose(inverse(mat3(model_matrix))); + } else { + model_normal_matrix = mat3(model_matrix); + } + { #CODE : FRAGMENT } @@ -1607,6 +1687,7 @@ void main() { #ifdef BASE_PASS /////////////////////// LIGHTING ////////////////////////////// +#ifndef AMBIENT_LIGHT_DISABLED // IBL precalculations float ndotv = clamp(dot(normal, view), 0.0, 1.0); vec3 F = f0 + (max(vec3(1.0 - roughness), f0) - f0) * pow(1.0 - ndotv, 5.0); @@ -1719,43 +1800,45 @@ void main() { #ifdef USE_SH_LIGHTMAP uvw.z *= 4.0; // SH textures use 4 times more data. + +#ifdef LIGHTMAP_BICUBIC_FILTER + vec3 lm_light_l0 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 0.0), lightmap_texture_size).rgb; + vec3 lm_light_l1n1 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 1.0), lightmap_texture_size).rgb; + vec3 lm_light_l1_0 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 2.0), lightmap_texture_size).rgb; + vec3 lm_light_l1p1 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 3.0), lightmap_texture_size).rgb; +#else vec3 lm_light_l0 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 0.0), 0.0).rgb; vec3 lm_light_l1n1 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 1.0), 0.0).rgb; vec3 lm_light_l1_0 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; vec3 lm_light_l1p1 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; +#endif vec3 n = normalize(lightmap_normal_xform * normal); - ambient_light += lm_light_l0 * 0.282095f; - ambient_light += lm_light_l1n1 * 0.32573 * n.y * lightmap_exposure_normalization; - ambient_light += lm_light_l1_0 * 0.32573 * n.z * lightmap_exposure_normalization; - ambient_light += lm_light_l1p1 * 0.32573 * n.x * lightmap_exposure_normalization; - if (metallic > 0.01) { // Since the more direct bounced light is lost, we can kind of fake it with this trick. - vec3 r = reflect(normalize(-vertex), normal); - specular_light += lm_light_l1n1 * 0.32573 * r.y * lightmap_exposure_normalization; - specular_light += lm_light_l1_0 * 0.32573 * r.z * lightmap_exposure_normalization; - specular_light += lm_light_l1p1 * 0.32573 * r.x * lightmap_exposure_normalization; - } + ambient_light += lm_light_l0 * lightmap_exposure_normalization; + ambient_light += lm_light_l1n1 * n.y * lightmap_exposure_normalization; + ambient_light += lm_light_l1_0 * n.z * lightmap_exposure_normalization; + ambient_light += lm_light_l1p1 * n.x * lightmap_exposure_normalization; +#else +#ifdef LIGHTMAP_BICUBIC_FILTER + ambient_light += textureArray_bicubic(lightmap_textures, uvw, lightmap_texture_size).rgb * lightmap_exposure_normalization; #else ambient_light += textureLod(lightmap_textures, uvw, 0.0).rgb * lightmap_exposure_normalization; #endif +#endif } #endif // USE_LIGHTMAP #endif // USE_LIGHTMAP_CAPTURE #endif // !DISABLE_LIGHTMAP - { -#if defined(AMBIENT_LIGHT_DISABLED) - ambient_light = vec3(0.0, 0.0, 0.0); -#else - ambient_light *= albedo.rgb; - ambient_light *= ao; -#endif // AMBIENT_LIGHT_DISABLED - } + ambient_light *= albedo.rgb; + ambient_light *= ao; + +#endif // !AMBIENT_LIGHT_DISABLED // convert ao to direct light ao ao = mix(1.0, ao, ao_light_affect); - +#ifndef AMBIENT_LIGHT_DISABLED { #if defined(DIFFUSE_TOON) //simplify for toon, as @@ -1777,6 +1860,8 @@ void main() { #endif } +#endif // !AMBIENT_LIGHT_DISABLED + #ifndef DISABLE_LIGHT_DIRECTIONAL for (uint i = uint(0); i < scene_data.directional_light_count; i++) { #if defined(USE_LIGHTMAP) && !defined(DISABLE_LIGHTMAP) @@ -1869,7 +1954,7 @@ void main() { alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); #if defined(ALPHA_SCISSOR_USED) - if (alpha < alpha_scissor) { + if (alpha < alpha_scissor_threshold) { discard; } #endif // !ALPHA_SCISSOR_USED @@ -1920,11 +2005,7 @@ void main() { fog.xy = unpackHalf2x16(fog_rg); fog.zw = unpackHalf2x16(fog_ba); -#ifndef DISABLE_FOG - if (scene_data.fog_enabled) { - frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); - } -#endif // !DISABLE_FOG + frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); #endif // !FOG_DISABLED // Tonemap before writing as we are writing to an sRGB framebuffer @@ -2131,11 +2212,7 @@ void main() { fog.xy = unpackHalf2x16(fog_rg); fog.zw = unpackHalf2x16(fog_ba); -#ifndef DISABLE_FOG - if (scene_data.fog_enabled) { - additive_light_color *= (1.0 - fog.a); - } -#endif // !DISABLE_FOG + additive_light_color *= (1.0 - fog.a); #endif // !FOG_DISABLED // Tonemap before writing as we are writing to an sRGB framebuffer diff --git a/drivers/gles3/shaders/skeleton.glsl b/drivers/gles3/shaders/skeleton.glsl index aad856a5a2..66befbc3b2 100644 --- a/drivers/gles3/shaders/skeleton.glsl +++ b/drivers/gles3/shaders/skeleton.glsl @@ -59,7 +59,7 @@ layout(location = 10) in highp uvec4 in_bone_attrib; layout(location = 11) in mediump vec4 in_weight_attrib; #endif -uniform mediump sampler2D skeleton_texture; // texunit:0 +uniform highp sampler2D skeleton_texture; // texunit:0 #endif /* clang-format on */ diff --git a/drivers/gles3/shaders/sky.glsl b/drivers/gles3/shaders/sky.glsl index 9de65ba960..f734e4b355 100644 --- a/drivers/gles3/shaders/sky.glsl +++ b/drivers/gles3/shaders/sky.glsl @@ -108,11 +108,11 @@ uniform float sky_energy_multiplier; uniform float luminance_multiplier; uniform float fog_aerial_perspective; -uniform vec3 fog_light_color; +uniform vec4 fog_light_color; uniform float fog_sun_scatter; uniform bool fog_enabled; uniform float fog_density; -uniform float z_far; +uniform float fog_sky_affect; uniform uint directional_light_count; #ifdef USE_MULTIVIEW @@ -135,6 +135,24 @@ vec3 interleaved_gradient_noise(vec2 pos) { } #endif +#if !defined(DISABLE_FOG) +vec4 fog_process(vec3 view, vec3 sky_color) { + vec3 fog_color = mix(fog_light_color.rgb, sky_color, fog_aerial_perspective); + + if (fog_sun_scatter > 0.001) { + vec4 sun_scatter = vec4(0.0); + float sun_total = 0.0; + for (uint i = 0u; i < directional_light_count; i++) { + vec3 light_color = directional_lights.data[i].color_size.xyz * directional_lights.data[i].direction_energy.w; + float light_amount = pow(max(dot(view, directional_lights.data[i].direction_energy.xyz), 0.0), 8.0); + fog_color += light_color * light_amount * fog_sun_scatter; + } + } + + return vec4(fog_color, 1.0); +} +#endif // !DISABLE_FOG + void main() { vec3 cube_normal; #ifdef USE_MULTIVIEW @@ -203,6 +221,21 @@ void main() { // Convert to Linear for tonemapping so color matches scene shader better color = srgb_to_linear(color); + +#if !defined(DISABLE_FOG) && !defined(USE_CUBEMAP_PASS) + + // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. + if (fog_enabled) { + vec4 fog = fog_process(cube_normal, color.rgb); + color.rgb = mix(color.rgb, fog.rgb, fog.a * fog_sky_affect); + } + + if (custom_fog.a > 0.0) { + color.rgb = mix(color.rgb, custom_fog.rgb, custom_fog.a); + } + +#endif // DISABLE_FOG + color *= exposure; #ifdef APPLY_TONEMAPPING color = apply_tonemapping(color, white); diff --git a/drivers/gles3/shaders/stdlib_inc.glsl b/drivers/gles3/shaders/stdlib_inc.glsl index 029084c34c..f88c218506 100644 --- a/drivers/gles3/shaders/stdlib_inc.glsl +++ b/drivers/gles3/shaders/stdlib_inc.glsl @@ -9,19 +9,17 @@ // Floating point pack/unpack functions are part of the GLSL ES 300 specification used by web and mobile. uint float2half(uint f) { - uint e = f & uint(0x7f800000); - if (e <= uint(0x38000000)) { - return uint(0); - } else { - return ((f >> uint(16)) & uint(0x8000)) | - (((e - uint(0x38000000)) >> uint(13)) & uint(0x7c00)) | - ((f >> uint(13)) & uint(0x03ff)); - } + uint b = f + uint(0x00001000); + uint e = (b & uint(0x7F800000)) >> 23; + uint m = b & uint(0x007FFFFF); + return (b & uint(0x80000000)) >> uint(16) | uint(e > uint(112)) * ((((e - uint(112)) << uint(10)) & uint(0x7C00)) | m >> uint(13)) | (uint(e < uint(113)) & uint(e > uint(101))) * ((((uint(0x007FF000) + m) >> (uint(125) - e)) + uint(1)) >> uint(1)) | uint(e > uint(143)) * uint(0x7FFF); } uint half2float(uint h) { - uint h_e = h & uint(0x7c00); - return ((h & uint(0x8000)) << uint(16)) | uint((h_e >> uint(10)) != uint(0)) * (((h_e + uint(0x1c000)) << uint(13)) | ((h & uint(0x03ff)) << uint(13))); + uint e = (h & uint(0x7C00)) >> uint(10); + uint m = (h & uint(0x03FF)) << uint(13); + uint v = m >> uint(23); + return (h & uint(0x8000)) << uint(16) | uint(e != uint(0)) * ((e + uint(112)) << uint(23) | m) | (uint(e == uint(0)) & uint(m != uint(0))) * ((v - uint(37)) << uint(23) | ((m << (uint(150) - v)) & uint(0x007FE000))); } uint godot_packHalf2x16(vec2 v) { diff --git a/drivers/gles3/storage/config.cpp b/drivers/gles3/storage/config.cpp index 1a14902c7c..2b3c19dbb8 100644 --- a/drivers/gles3/storage/config.cpp +++ b/drivers/gles3/storage/config.cpp @@ -35,6 +35,10 @@ #include "../rasterizer_gles3.h" #include "texture_storage.h" +#ifdef WEB_ENABLED +#include <emscripten/html5_webgl.h> +#endif + using namespace GLES3; #define _GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF @@ -44,6 +48,23 @@ Config *Config::singleton = nullptr; Config::Config() { singleton = this; +#ifdef WEB_ENABLED + // Starting with Emscripten 3.1.51, glGetStringi(GL_EXTENSIONS, i) will only ever return + // a fixed list of extensions, regardless of what additional extensions are enabled. This + // isn't very useful for us in determining which extensions we can rely on here. So, instead + // we use emscripten_webgl_get_supported_extensions() to get all supported extensions, which + // is what Emscripten 3.1.50 and earlier do. + { + char *extension_array_string = emscripten_webgl_get_supported_extensions(); + PackedStringArray extension_array = String((const char *)extension_array_string).split(" "); + extensions.reserve(extension_array.size() * 2); + for (const String &s : extension_array) { + extensions.insert(s); + extensions.insert("GL_" + s); + } + free(extension_array_string); + } +#else { GLint max_extensions = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &max_extensions); @@ -55,6 +76,7 @@ Config::Config() { extensions.insert((const char *)s); } } +#endif bptc_supported = extensions.has("GL_ARB_texture_compression_bptc") || extensions.has("EXT_texture_compression_bptc"); astc_supported = extensions.has("GL_KHR_texture_compression_astc") || extensions.has("GL_OES_texture_compression_astc") || extensions.has("GL_KHR_texture_compression_astc_ldr") || extensions.has("GL_KHR_texture_compression_astc_hdr"); @@ -196,6 +218,8 @@ Config::Config() { //https://github.com/godotengine/godot/issues/92662#issuecomment-2161199477 //disable_particles_workaround = false; } + } else if (rendering_device_name == "PowerVR Rogue GE8320") { + disable_transform_feedback_shader_cache = true; } } diff --git a/drivers/gles3/storage/config.h b/drivers/gles3/storage/config.h index 0c9f9bc275..ff72fc5b58 100644 --- a/drivers/gles3/storage/config.h +++ b/drivers/gles3/storage/config.h @@ -96,6 +96,9 @@ public: bool disable_particles_workaround = false; // set to 'true' to disable 'GPUParticles' bool flip_xy_workaround = false; + // PowerVR GE 8320 workaround + bool disable_transform_feedback_shader_cache = false; + #ifdef ANDROID_ENABLED PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC eglFramebufferTextureMultiviewOVR = nullptr; PFNGLTEXSTORAGE3DMULTISAMPLEPROC eglTexStorage3DMultisample = nullptr; diff --git a/drivers/gles3/storage/light_storage.cpp b/drivers/gles3/storage/light_storage.cpp index f9547502f4..aab1aadf02 100644 --- a/drivers/gles3/storage/light_storage.cpp +++ b/drivers/gles3/storage/light_storage.cpp @@ -1046,6 +1046,9 @@ void LightStorage::lightmap_set_textures(RID p_lightmap, RID p_light, bool p_use lightmap->light_texture = p_light; lightmap->uses_spherical_harmonics = p_uses_spherical_haromics; + Vector3i light_texture_size = GLES3::TextureStorage::get_singleton()->texture_get_size(lightmap->light_texture); + lightmap->light_texture_size = Vector2i(light_texture_size.x, light_texture_size.y); + GLuint tex = GLES3::TextureStorage::get_singleton()->texture_get_texid(lightmap->light_texture); glBindTexture(GL_TEXTURE_2D_ARRAY, tex); glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); diff --git a/drivers/gles3/storage/light_storage.h b/drivers/gles3/storage/light_storage.h index b6e64c9492..ed00dd235f 100644 --- a/drivers/gles3/storage/light_storage.h +++ b/drivers/gles3/storage/light_storage.h @@ -180,6 +180,7 @@ struct Lightmap { bool interior = false; AABB bounds = AABB(Vector3(), Vector3(1, 1, 1)); float baked_exposure = 1.0; + Vector2i light_texture_size; int32_t array_index = -1; //unassigned PackedVector3Array points; PackedColorArray point_sh; @@ -202,7 +203,7 @@ struct LightmapInstance { class LightStorage : public RendererLightStorage { public: - enum ShadowAtlastQuadrant { + enum ShadowAtlastQuadrant : uint32_t { QUADRANT_SHIFT = 27, OMNI_LIGHT_FLAG = 1 << 26, SHADOW_INDEX_MASK = OMNI_LIGHT_FLAG - 1, diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index bacf607c66..a37eba3b15 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -586,11 +586,7 @@ void ShaderData::get_shader_uniform_list(List<PropertyInfo> *p_param_list) const if (E.value.scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_LOCAL) { continue; } - if (E.value.texture_order >= 0) { - filtered_uniforms.push_back(Pair<StringName, int>(E.key, E.value.texture_order + 100000)); - } else { - filtered_uniforms.push_back(Pair<StringName, int>(E.key, E.value.order)); - } + filtered_uniforms.push_back(Pair<StringName, int>(E.key, E.value.prop_order)); } int uniform_count = filtered_uniforms.size(); sorter.sort(filtered_uniforms.ptr(), uniform_count); @@ -640,7 +636,7 @@ bool ShaderData::is_parameter_texture(const StringName &p_param) const { return false; } - return uniforms[p_param].texture_order >= 0; + return uniforms[p_param].is_texture(); } /////////////////////////////////////////////////////////////////////////// @@ -719,7 +715,7 @@ void MaterialData::update_uniform_buffer(const HashMap<StringName, ShaderLanguag bool uses_global_buffer = false; for (const KeyValue<StringName, ShaderLanguage::ShaderNode::Uniform> &E : p_uniforms) { - if (E.value.order < 0) { + if (E.value.is_texture()) { continue; // texture, does not go here } @@ -874,7 +870,8 @@ void MaterialData::update_textures(const HashMap<StringName, Variant> &p_paramet if (V->value.is_array()) { Array array = (Array)V->value; if (uniform_array_size > 0) { - for (int j = 0; j < array.size(); j++) { + int size = MIN(uniform_array_size, array.size()); + for (int j = 0; j < size; j++) { textures.push_back(array[j]); } } else { @@ -1111,15 +1108,15 @@ MaterialStorage::MaterialStorage() { global_shader_uniforms.buffer_size = MAX(16, (int)GLOBAL_GET("rendering/limits/global_shader_variables/buffer_size")); if (global_shader_uniforms.buffer_size * sizeof(GlobalShaderUniforms::Value) > uint32_t(Config::get_singleton()->max_uniform_buffer_size)) { + // Limit to maximum support UBO size. global_shader_uniforms.buffer_size = uint32_t(Config::get_singleton()->max_uniform_buffer_size) / sizeof(GlobalShaderUniforms::Value); - WARN_PRINT("Project setting \"rendering/limits/global_shader_variables/buffer_size\" exceeds maximum uniform buffer size of: " + itos(Config::get_singleton()->max_uniform_buffer_size / sizeof(GlobalShaderUniforms::Value)) + ". Falling back on maximum buffer size."); } global_shader_uniforms.buffer_values = memnew_arr(GlobalShaderUniforms::Value, global_shader_uniforms.buffer_size); memset(global_shader_uniforms.buffer_values, 0, sizeof(GlobalShaderUniforms::Value) * global_shader_uniforms.buffer_size); global_shader_uniforms.buffer_usage = memnew_arr(GlobalShaderUniforms::ValueUsage, global_shader_uniforms.buffer_size); - global_shader_uniforms.buffer_dirty_regions = memnew_arr(bool, global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE); - memset(global_shader_uniforms.buffer_dirty_regions, 0, sizeof(bool) * global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE); + global_shader_uniforms.buffer_dirty_regions = memnew_arr(bool, 1 + (global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE)); + memset(global_shader_uniforms.buffer_dirty_regions, 0, sizeof(bool) * (1 + (global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE))); glGenBuffers(1, &global_shader_uniforms.buffer); glBindBuffer(GL_UNIFORM_BUFFER, global_shader_uniforms.buffer); glBufferData(GL_UNIFORM_BUFFER, sizeof(GlobalShaderUniforms::Value) * global_shader_uniforms.buffer_size, nullptr, GL_DYNAMIC_DRAW); @@ -1276,11 +1273,12 @@ MaterialStorage::MaterialStorage() { actions.renames["CUSTOM2"] = "custom2_attrib"; actions.renames["CUSTOM3"] = "custom3_attrib"; actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; + actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["LIGHT_VERTEX"] = "light_vertex"; actions.renames["NODE_POSITION_WORLD"] = "model_matrix[3].xyz"; actions.renames["CAMERA_POSITION_WORLD"] = "scene_data.inv_view_matrix[3].xyz"; - actions.renames["CAMERA_DIRECTION_WORLD"] = "scene_data.view_matrix[3].xyz"; + actions.renames["CAMERA_DIRECTION_WORLD"] = "scene_data.inv_view_matrix[2].xyz"; actions.renames["CAMERA_VISIBLE_LAYERS"] = "scene_data.camera_visible_layers"; actions.renames["NODE_POSITION_VIEW"] = "(scene_data.view_matrix * model_matrix)[3].xyz"; @@ -1788,7 +1786,7 @@ void MaterialStorage::global_shader_parameter_add(const StringName &p_name, RS:: //is vector, allocate in buffer and update index gv.buffer_index = _global_shader_uniform_allocate(gv.buffer_elements); - ERR_FAIL_COND_MSG(gv.buffer_index < 0, vformat("Failed allocating global variable '%s' out of buffer memory. Consider increasing it in the Project Settings.", String(p_name))); + ERR_FAIL_COND_MSG(gv.buffer_index < 0, vformat("Failed allocating global variable '%s' out of buffer memory. Consider increasing rendering/limits/global_shader_variables/buffer_size in the Project Settings. Maximum items supported by this hardware is: %d.", String(p_name), Config::get_singleton()->max_uniform_buffer_size / sizeof(GlobalShaderUniforms::Value))); global_shader_uniforms.buffer_usage[gv.buffer_index].elements = gv.buffer_elements; _global_shader_uniform_store_in_buffer(gv.buffer_index, gv.type, gv.value); _global_shader_uniform_mark_buffer_dirty(gv.buffer_index, gv.buffer_elements); @@ -1999,7 +1997,7 @@ int32_t MaterialStorage::global_shader_parameters_instance_allocate(RID p_instan ERR_FAIL_COND_V(global_shader_uniforms.instance_buffer_pos.has(p_instance), -1); int32_t pos = _global_shader_uniform_allocate(ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES); global_shader_uniforms.instance_buffer_pos[p_instance] = pos; //save anyway - ERR_FAIL_COND_V_MSG(pos < 0, -1, "Too many instances using shader instance variables. Increase buffer size in Project Settings."); + ERR_FAIL_COND_V_MSG(pos < 0, -1, vformat("Too many instances using shader instance variables. Consider increasing rendering/limits/global_shader_variables/buffer_size in the Project Settings. Maximum items supported by this hardware is: %d.", Config::get_singleton()->max_uniform_buffer_size / sizeof(GlobalShaderUniforms::Value))); global_shader_uniforms.buffer_usage[pos].elements = ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES; return pos; } @@ -2079,7 +2077,7 @@ void MaterialStorage::global_shader_parameters_instance_update(RID p_instance, i void MaterialStorage::_update_global_shader_uniforms() { MaterialStorage *material_storage = MaterialStorage::get_singleton(); if (global_shader_uniforms.buffer_dirty_region_count > 0) { - uint32_t total_regions = global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE; + uint32_t total_regions = 1 + (global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE); if (total_regions / global_shader_uniforms.buffer_dirty_region_count <= 4) { // 25% of regions dirty, just update all buffer glBindBuffer(GL_UNIFORM_BUFFER, global_shader_uniforms.buffer); diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index d8a5b960b8..b55a2e0a8a 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -301,7 +301,7 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) Vector<uint8_t> ir = new_surface.index_data; wr = wf_indices.ptrw(); - if (new_surface.vertex_count < (1 << 16)) { + if (new_surface.vertex_count <= 65536) { // Read 16 bit indices. const uint16_t *src_idx = (const uint16_t *)ir.ptr(); for (uint32_t i = 0; i + 5 < wf_index_count; i += 6) { @@ -1432,15 +1432,17 @@ void MeshStorage::update_mesh_instances() { /* MULTIMESH API */ -RID MeshStorage::multimesh_allocate() { +RID MeshStorage::_multimesh_allocate() { return multimesh_owner.allocate_rid(); } -void MeshStorage::multimesh_initialize(RID p_rid) { +void MeshStorage::_multimesh_initialize(RID p_rid) { multimesh_owner.initialize_rid(p_rid, MultiMesh()); } -void MeshStorage::multimesh_free(RID p_rid) { +void MeshStorage::_multimesh_free(RID p_rid) { + // Remove from interpolator. + _interpolation_data.notify_free_multimesh(p_rid); _update_dirty_multimeshes(); multimesh_allocate_data(p_rid, 0, RS::MULTIMESH_TRANSFORM_2D); MultiMesh *multimesh = multimesh_owner.get_or_null(p_rid); @@ -1448,7 +1450,7 @@ void MeshStorage::multimesh_free(RID p_rid) { multimesh_owner.free(p_rid); } -void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors, bool p_use_custom_data) { +void MeshStorage::_multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors, bool p_use_custom_data) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); @@ -1495,13 +1497,13 @@ void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS:: multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_MULTIMESH); } -int MeshStorage::multimesh_get_instance_count(RID p_multimesh) const { +int MeshStorage::_multimesh_get_instance_count(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, 0); return multimesh->instances; } -void MeshStorage::multimesh_set_mesh(RID p_multimesh, RID p_mesh) { +void MeshStorage::_multimesh_set_mesh(RID p_multimesh, RID p_mesh) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); if (multimesh->mesh == p_mesh || p_mesh.is_null()) { @@ -1651,7 +1653,7 @@ void MeshStorage::_multimesh_re_create_aabb(MultiMesh *multimesh, const float *p multimesh->aabb = aabb; } -void MeshStorage::multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) { +void MeshStorage::_multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); ERR_FAIL_INDEX(p_index, multimesh->instances); @@ -1681,7 +1683,7 @@ void MeshStorage::multimesh_instance_set_transform(RID p_multimesh, int p_index, _multimesh_mark_dirty(multimesh, p_index, true); } -void MeshStorage::multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) { +void MeshStorage::_multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); ERR_FAIL_INDEX(p_index, multimesh->instances); @@ -1707,7 +1709,7 @@ void MeshStorage::multimesh_instance_set_transform_2d(RID p_multimesh, int p_ind _multimesh_mark_dirty(multimesh, p_index, true); } -void MeshStorage::multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) { +void MeshStorage::_multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); ERR_FAIL_INDEX(p_index, multimesh->instances); @@ -1727,7 +1729,7 @@ void MeshStorage::multimesh_instance_set_color(RID p_multimesh, int p_index, con _multimesh_mark_dirty(multimesh, p_index, false); } -void MeshStorage::multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) { +void MeshStorage::_multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); ERR_FAIL_INDEX(p_index, multimesh->instances); @@ -1746,27 +1748,27 @@ void MeshStorage::multimesh_instance_set_custom_data(RID p_multimesh, int p_inde _multimesh_mark_dirty(multimesh, p_index, false); } -RID MeshStorage::multimesh_get_mesh(RID p_multimesh) const { +RID MeshStorage::_multimesh_get_mesh(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, RID()); return multimesh->mesh; } -void MeshStorage::multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) { +void MeshStorage::_multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); multimesh->custom_aabb = p_aabb; multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); } -AABB MeshStorage::multimesh_get_custom_aabb(RID p_multimesh) const { +AABB MeshStorage::_multimesh_get_custom_aabb(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, AABB()); return multimesh->custom_aabb; } -AABB MeshStorage::multimesh_get_aabb(RID p_multimesh) const { +AABB MeshStorage::_multimesh_get_aabb(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, AABB()); if (multimesh->custom_aabb != AABB()) { @@ -1778,7 +1780,7 @@ AABB MeshStorage::multimesh_get_aabb(RID p_multimesh) const { return multimesh->aabb; } -Transform3D MeshStorage::multimesh_instance_get_transform(RID p_multimesh, int p_index) const { +Transform3D MeshStorage::_multimesh_instance_get_transform(RID p_multimesh, int p_index) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, Transform3D()); ERR_FAIL_INDEX_V(p_index, multimesh->instances, Transform3D()); @@ -1809,7 +1811,7 @@ Transform3D MeshStorage::multimesh_instance_get_transform(RID p_multimesh, int p return t; } -Transform2D MeshStorage::multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const { +Transform2D MeshStorage::_multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, Transform2D()); ERR_FAIL_INDEX_V(p_index, multimesh->instances, Transform2D()); @@ -1834,7 +1836,7 @@ Transform2D MeshStorage::multimesh_instance_get_transform_2d(RID p_multimesh, in return t; } -Color MeshStorage::multimesh_instance_get_color(RID p_multimesh, int p_index) const { +Color MeshStorage::_multimesh_instance_get_color(RID p_multimesh, int p_index) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, Color()); ERR_FAIL_INDEX_V(p_index, multimesh->instances, Color()); @@ -1858,7 +1860,7 @@ Color MeshStorage::multimesh_instance_get_color(RID p_multimesh, int p_index) co return c; } -Color MeshStorage::multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const { +Color MeshStorage::_multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, Color()); ERR_FAIL_INDEX_V(p_index, multimesh->instances, Color()); @@ -1882,7 +1884,7 @@ Color MeshStorage::multimesh_instance_get_custom_data(RID p_multimesh, int p_ind return c; } -void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) { +void MeshStorage::_multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); @@ -1971,7 +1973,7 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_b } } -Vector<float> MeshStorage::multimesh_get_buffer(RID p_multimesh) const { +Vector<float> MeshStorage::_multimesh_get_buffer(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, Vector<float>()); Vector<float> ret; @@ -2043,7 +2045,7 @@ Vector<float> MeshStorage::multimesh_get_buffer(RID p_multimesh) const { } } -void MeshStorage::multimesh_set_visible_instances(RID p_multimesh, int p_visible) { +void MeshStorage::_multimesh_set_visible_instances(RID p_multimesh, int p_visible) { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); ERR_FAIL_COND(p_visible < -1 || p_visible > multimesh->instances); @@ -2065,12 +2067,19 @@ void MeshStorage::multimesh_set_visible_instances(RID p_multimesh, int p_visible multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_MULTIMESH_VISIBLE_INSTANCES); } -int MeshStorage::multimesh_get_visible_instances(RID p_multimesh) const { +int MeshStorage::_multimesh_get_visible_instances(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, 0); return multimesh->visible_instances; } +MeshStorage::MultiMeshInterpolator *MeshStorage::_multimesh_get_interpolator(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); + ERR_FAIL_NULL_V_MSG(multimesh, nullptr, "Multimesh not found: " + itos(p_multimesh.get_id())); + + return &multimesh->interpolator; +} + void MeshStorage::_update_dirty_multimeshes() { while (multimesh_dirty_list) { MultiMesh *multimesh = multimesh_dirty_list; diff --git a/drivers/gles3/storage/mesh_storage.h b/drivers/gles3/storage/mesh_storage.h index d246e7725c..a2edbb9c48 100644 --- a/drivers/gles3/storage/mesh_storage.h +++ b/drivers/gles3/storage/mesh_storage.h @@ -205,6 +205,8 @@ struct MultiMesh { bool dirty = false; MultiMesh *dirty_list = nullptr; + RendererMeshStorage::MultiMeshInterpolator interpolator; + Dependency dependency; }; @@ -493,32 +495,34 @@ public: MultiMesh *get_multimesh(RID p_rid) { return multimesh_owner.get_or_null(p_rid); }; bool owns_multimesh(RID p_rid) { return multimesh_owner.owns(p_rid); }; - virtual RID multimesh_allocate() override; - virtual void multimesh_initialize(RID p_rid) override; - virtual void multimesh_free(RID p_rid) override; - virtual void multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors = false, bool p_use_custom_data = false) override; - virtual int multimesh_get_instance_count(RID p_multimesh) const override; - - virtual void multimesh_set_mesh(RID p_multimesh, RID p_mesh) override; - virtual void multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) override; - virtual void multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) override; - virtual void multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) override; - virtual void multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) override; - - virtual RID multimesh_get_mesh(RID p_multimesh) const override; - virtual void multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) override; - virtual AABB multimesh_get_custom_aabb(RID p_multimesh) const override; - virtual AABB multimesh_get_aabb(RID p_multimesh) const override; - - virtual Transform3D multimesh_instance_get_transform(RID p_multimesh, int p_index) const override; - virtual Transform2D multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const override; - virtual Color multimesh_instance_get_color(RID p_multimesh, int p_index) const override; - virtual Color multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const override; - virtual void multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) override; - virtual Vector<float> multimesh_get_buffer(RID p_multimesh) const override; - - virtual void multimesh_set_visible_instances(RID p_multimesh, int p_visible) override; - virtual int multimesh_get_visible_instances(RID p_multimesh) const override; + virtual RID _multimesh_allocate() override; + virtual void _multimesh_initialize(RID p_rid) override; + virtual void _multimesh_free(RID p_rid) override; + virtual void _multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors = false, bool p_use_custom_data = false) override; + virtual int _multimesh_get_instance_count(RID p_multimesh) const override; + + virtual void _multimesh_set_mesh(RID p_multimesh, RID p_mesh) override; + virtual void _multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) override; + virtual void _multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) override; + virtual void _multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) override; + virtual void _multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) override; + + virtual RID _multimesh_get_mesh(RID p_multimesh) const override; + virtual void _multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) override; + virtual AABB _multimesh_get_custom_aabb(RID p_multimesh) const override; + virtual AABB _multimesh_get_aabb(RID p_multimesh) const override; + + virtual Transform3D _multimesh_instance_get_transform(RID p_multimesh, int p_index) const override; + virtual Transform2D _multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const override; + virtual Color _multimesh_instance_get_color(RID p_multimesh, int p_index) const override; + virtual Color _multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const override; + virtual void _multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) override; + virtual Vector<float> _multimesh_get_buffer(RID p_multimesh) const override; + + virtual void _multimesh_set_visible_instances(RID p_multimesh, int p_visible) override; + virtual int _multimesh_get_visible_instances(RID p_multimesh) const override; + + virtual MultiMeshInterpolator *_multimesh_get_interpolator(RID p_multimesh) const override; void _update_dirty_multimeshes(); diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.cpp b/drivers/gles3/storage/render_scene_buffers_gles3.cpp index e4f1a01f68..c91547d2b1 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.cpp +++ b/drivers/gles3/storage/render_scene_buffers_gles3.cpp @@ -194,7 +194,7 @@ void RenderSceneBuffersGLES3::_check_render_buffers() { ERR_FAIL_COND(view_count == 0); - bool use_internal_buffer = scaling_3d_mode != RS::VIEWPORT_SCALING_3D_MODE_OFF || needs_internal_buffers; + bool use_internal_buffer = scaling_3d_mode != RS::VIEWPORT_SCALING_3D_MODE_OFF || apply_color_adjustments_in_post; uint32_t depth_format_size = 3; bool use_multiview = view_count > 1; @@ -558,8 +558,8 @@ void RenderSceneBuffersGLES3::_clear_back_buffers() { } } -void RenderSceneBuffersGLES3::ensure_internal_buffers() { - needs_internal_buffers = true; +void RenderSceneBuffersGLES3::set_apply_color_adjustments_in_post(bool p_apply_in_post) { + apply_color_adjustments_in_post = p_apply_in_post; } void RenderSceneBuffersGLES3::check_glow_buffers() { diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h index 8273c18b8e..a7a676ad33 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.h +++ b/drivers/gles3/storage/render_scene_buffers_gles3.h @@ -50,7 +50,7 @@ public: //bool use_taa = false; //bool use_debanding = false; uint32_t view_count = 1; - bool needs_internal_buffers = false; + bool apply_color_adjustments_in_post = false; RID render_target; @@ -106,12 +106,12 @@ public: virtual void set_fsr_sharpness(float p_fsr_sharpness) override{}; virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override{}; virtual void set_use_debanding(bool p_use_debanding) override{}; + void set_apply_color_adjustments_in_post(bool p_apply_in_post); void free_render_buffer_data(); void check_backbuffer(bool p_need_color, bool p_need_depth); // Check if we need to initialize our backbuffer. void check_glow_buffers(); // Check if we need to initialize our glow buffers. - void ensure_internal_buffers(); GLuint get_render_fbo(); GLuint get_msaa3d_fbo() { diff --git a/drivers/gles3/storage/texture_storage.cpp b/drivers/gles3/storage/texture_storage.cpp index 2dcf623995..36393dde86 100644 --- a/drivers/gles3/storage/texture_storage.cpp +++ b/drivers/gles3/storage/texture_storage.cpp @@ -1030,10 +1030,8 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { if (texture->compressed) { glPixelStorei(GL_PACK_ALIGNMENT, 4); glGetCompressedTexImage(texture->target, i, &w[ofs]); - } else { glPixelStorei(GL_PACK_ALIGNMENT, 1); - glGetTexImage(texture->target, i, texture->gl_format_cache, texture->gl_type_cache, &w[ofs]); } } @@ -1391,8 +1389,22 @@ void TextureStorage::texture_debug_usage(List<RS::TextureInfo> *r_info) { tinfo.format = t->format; tinfo.width = t->alloc_width; tinfo.height = t->alloc_height; - tinfo.depth = t->depth; tinfo.bytes = t->total_data_size; + + switch (t->type) { + case Texture::TYPE_3D: + tinfo.depth = t->depth; + break; + + case Texture::TYPE_LAYERED: + tinfo.depth = t->layers; + break; + + default: + tinfo.depth = 0; + break; + } + r_info->push_back(tinfo); } } @@ -1493,17 +1505,15 @@ void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image, int tsize = 0; for (int i = 0; i < mipmaps; i++) { - int size, ofs; + int64_t size, ofs; img->get_mipmap_offset_and_size(i, ofs, size); if (compressed) { glPixelStorei(GL_UNPACK_ALIGNMENT, 4); if (texture->target == GL_TEXTURE_2D_ARRAY) { if (p_initialize) { - glCompressedTexImage3D(GL_TEXTURE_2D_ARRAY, i, internal_format, w, h, texture->layers, 0, - size * texture->layers, &read[ofs]); - } else { - glCompressedTexSubImage3D(GL_TEXTURE_2D_ARRAY, i, 0, 0, p_layer, w, h, 1, internal_format, size, &read[ofs]); + glCompressedTexImage3D(GL_TEXTURE_2D_ARRAY, i, internal_format, w, h, texture->layers, 0, size * texture->layers, nullptr); } + glCompressedTexSubImage3D(GL_TEXTURE_2D_ARRAY, i, 0, 0, p_layer, w, h, 1, internal_format, size, &read[ofs]); } else { glCompressedTexImage2D(blit_target, i, internal_format, w, h, 0, size, &read[ofs]); } @@ -1525,7 +1535,11 @@ void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image, h = MAX(1, h >> 1); } - texture->total_data_size = tsize; + if (texture->target == GL_TEXTURE_CUBE_MAP || texture->target == GL_TEXTURE_2D_ARRAY) { + texture->total_data_size = tsize * texture->layers; + } else { + texture->total_data_size = tsize; + } texture->stored_cube_sides |= (1 << p_layer); @@ -1682,6 +1696,14 @@ uint32_t TextureStorage::texture_get_texid(RID p_texture) const { return texture->tex_id; } +Vector3i TextureStorage::texture_get_size(RID p_texture) const { + Texture *texture = texture_owner.get_or_null(p_texture); + + ERR_FAIL_NULL_V(texture, Vector3i(0, 0, 0)); + + return Vector3i(texture->width, texture->height, texture->depth); +} + uint32_t TextureStorage::texture_get_width(RID p_texture) const { Texture *texture = texture_owner.get_or_null(p_texture); @@ -2123,7 +2145,7 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { texture->layers = 1; } texture->gl_format_cache = rt->color_format; - texture->gl_type_cache = GL_UNSIGNED_BYTE; + texture->gl_type_cache = !rt->hdr ? GL_UNSIGNED_BYTE : GL_FLOAT; // to set HDR format size to 8 and keep 4 for LDR format texture->gl_internal_format_cache = rt->color_internal_format; texture->tex_id = rt->color; texture->width = rt->size.x; diff --git a/drivers/gles3/storage/texture_storage.h b/drivers/gles3/storage/texture_storage.h index 8a03d72b9b..5569abcc73 100644 --- a/drivers/gles3/storage/texture_storage.h +++ b/drivers/gles3/storage/texture_storage.h @@ -169,7 +169,7 @@ struct Texture { TYPE_3D }; - Type type; + Type type = TYPE_2D; RS::TextureLayeredType layered_type = RS::TEXTURE_LAYERED_2D_ARRAY; GLenum target = GL_TEXTURE_2D; @@ -553,6 +553,7 @@ public: void texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer = 0); virtual Image::Format texture_get_format(RID p_texture) const override; uint32_t texture_get_texid(RID p_texture) const; + Vector3i texture_get_size(RID p_texture) const; uint32_t texture_get_width(RID p_texture) const; uint32_t texture_get_height(RID p_texture) const; uint32_t texture_get_depth(RID p_texture) const; diff --git a/drivers/metal/README.md b/drivers/metal/README.md new file mode 100644 index 0000000000..30cfa52360 --- /dev/null +++ b/drivers/metal/README.md @@ -0,0 +1,39 @@ +# Metal Rendering Device + +This document aims to describe the Metal rendering device implementation in Godot. + +## Future work / ideas + +* Use placement heaps +* Explicit hazard tracking +* [MetalFX] upscaling support? + +## Acknowledgments + +The Metal rendering owes a lot to the work of the [MoltenVK] project, which is a Vulkan implementation on top of Metal. +In accordance with the Apache 2.0 license, the following copyright notices have been included where applicable: + +``` +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ +``` + +[MoltenVK]: https://github.com/KhronosGroup/MoltenVK +[MetalFX]: https://developer.apple.com/documentation/metalfx?language=objc diff --git a/drivers/metal/SCsub b/drivers/metal/SCsub new file mode 100644 index 0000000000..30129b7806 --- /dev/null +++ b/drivers/metal/SCsub @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +Import("env") + +env_metal = env.Clone() + +# Thirdparty source files + +thirdparty_obj = [] + +thirdparty_dir = "#thirdparty/spirv-cross/" +thirdparty_sources = [ + "spirv_cfg.cpp", + "spirv_cross_util.cpp", + "spirv_cross.cpp", + "spirv_parser.cpp", + "spirv_msl.cpp", + "spirv_reflect.cpp", + "spirv_glsl.cpp", + "spirv_cross_parsed_ir.cpp", +] +thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + +env_metal.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include"]) + +# Must enable exceptions for SPIRV-Cross; otherwise, it will abort the process on errors. +if "-fno-exceptions" in env_metal["CXXFLAGS"]: + env_metal["CXXFLAGS"].remove("-fno-exceptions") +env_metal.Append(CXXFLAGS=["-fexceptions"]) + +env_thirdparty = env_metal.Clone() +env_thirdparty.disable_warnings() +env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) +env_metal.drivers_sources += thirdparty_obj + +# Enable C++20 for the Objective-C++ Metal code, which uses C++20 concepts. +if "-std=gnu++17" in env_metal["CXXFLAGS"]: + env_metal["CXXFLAGS"].remove("-std=gnu++17") +env_metal.Append(CXXFLAGS=["-std=c++20"]) + +# Driver source files + +driver_obj = [] + +env_metal.add_source_files(driver_obj, "*.mm") +env.drivers_sources += driver_obj + +# Needed to force rebuilding the driver files when the thirdparty library is updated. +env.Depends(driver_obj, thirdparty_obj) diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h new file mode 100644 index 0000000000..7467e8ceb4 --- /dev/null +++ b/drivers/metal/metal_device_properties.h @@ -0,0 +1,141 @@ +/**************************************************************************/ +/* metal_device_properties.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#ifndef METAL_DEVICE_PROPERTIES_H +#define METAL_DEVICE_PROPERTIES_H + +#import "servers/rendering/rendering_device.h" + +#import <Foundation/Foundation.h> +#import <Metal/Metal.h> + +/** The buffer index to use for vertex content. */ +const static uint32_t VERT_CONTENT_BUFFER_INDEX = 0; +const static uint32_t MAX_COLOR_ATTACHMENT_COUNT = 8; + +typedef NS_OPTIONS(NSUInteger, SampleCount) { + SampleCount1 = (1UL << 0), + SampleCount2 = (1UL << 1), + SampleCount4 = (1UL << 2), + SampleCount8 = (1UL << 3), + SampleCount16 = (1UL << 4), + SampleCount32 = (1UL << 5), + SampleCount64 = (1UL << 6), +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) MetalFeatures { + uint32_t mslVersion; + MTLGPUFamily highestFamily; + MTLLanguageVersion mslVersionEnum; + SampleCount supportedSampleCounts; + long hostMemoryPageSize; + bool layeredRendering; + bool multisampleLayeredRendering; + bool quadPermute; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */ + bool simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */ + bool simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */ + bool tessellationShader; /**< If true, tessellation shaders are supported. */ + bool imageCubeArray; /**< If true, image cube arrays are supported. */ +}; + +struct MetalLimits { + uint64_t maxImageArrayLayers; + uint64_t maxFramebufferHeight; + uint64_t maxFramebufferWidth; + uint64_t maxImageDimension1D; + uint64_t maxImageDimension2D; + uint64_t maxImageDimension3D; + uint64_t maxImageDimensionCube; + uint64_t maxViewportDimensionX; + uint64_t maxViewportDimensionY; + MTLSize maxThreadsPerThreadGroup; + MTLSize maxComputeWorkGroupCount; + uint64_t maxBoundDescriptorSets; + uint64_t maxColorAttachments; + uint64_t maxTexturesPerArgumentBuffer; + uint64_t maxSamplersPerArgumentBuffer; + uint64_t maxBuffersPerArgumentBuffer; + uint64_t maxBufferLength; + uint64_t minUniformBufferOffsetAlignment; + uint64_t maxVertexDescriptorLayoutStride; + uint16_t maxViewports; + uint32_t maxPerStageBufferCount; /**< The total number of per-stage Metal buffers available for shader uniform content and attributes. */ + uint32_t maxPerStageTextureCount; /**< The total number of per-stage Metal textures available for shader uniform content. */ + uint32_t maxPerStageSamplerCount; /**< The total number of per-stage Metal samplers available for shader uniform content. */ + uint32_t maxVertexInputAttributes; + uint32_t maxVertexInputBindings; + uint32_t maxVertexInputBindingStride; + uint32_t maxDrawIndexedIndexValue; + + uint32_t minSubgroupSize; /**< The minimum number of threads in a SIMD-group. */ + uint32_t maxSubgroupSize; /**< The maximum number of threads in a SIMD-group. */ + BitField<RDD::ShaderStage> subgroupSupportedShaderStages; + BitField<RD::SubgroupOperations> subgroupSupportedOperations; /**< The subgroup operations supported by the device. */ +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MetalDeviceProperties { +private: + void init_features(id<MTLDevice> p_device); + void init_limits(id<MTLDevice> p_device); + +public: + MetalFeatures features; + MetalLimits limits; + + SampleCount find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const; + + MetalDeviceProperties(id<MTLDevice> p_device); + ~MetalDeviceProperties(); + +private: + static const SampleCount sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX]; +}; + +#endif // METAL_DEVICE_PROPERTIES_H diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm new file mode 100644 index 0000000000..857fa8c66e --- /dev/null +++ b/drivers/metal/metal_device_properties.mm @@ -0,0 +1,327 @@ +/**************************************************************************/ +/* metal_device_properties.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#import "metal_device_properties.h" + +#import <Metal/Metal.h> +#import <spirv_cross.hpp> +#import <spirv_msl.hpp> + +// Common scaling multipliers. +#define KIBI (1024) +#define MEBI (KIBI * KIBI) + +#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000) +#define MTLGPUFamilyApple9 (MTLGPUFamily)1009 +#endif + +API_AVAILABLE(macos(11.0), ios(14.0)) +MTLGPUFamily &operator--(MTLGPUFamily &p_family) { + p_family = static_cast<MTLGPUFamily>(static_cast<int>(p_family) - 1); + if (p_family < MTLGPUFamilyApple1) { + p_family = MTLGPUFamilyApple9; + } + + return p_family; +} + +void MetalDeviceProperties::init_features(id<MTLDevice> p_device) { + features = {}; + + features.highestFamily = MTLGPUFamilyApple1; + for (MTLGPUFamily family = MTLGPUFamilyApple9; family >= MTLGPUFamilyApple1; --family) { + if ([p_device supportsFamily:family]) { + features.highestFamily = family; + break; + } + } + + features.hostMemoryPageSize = sysconf(_SC_PAGESIZE); + + for (SampleCount sc = SampleCount1; sc <= SampleCount64; sc <<= 1) { + if ([p_device supportsTextureSampleCount:sc]) { + features.supportedSampleCounts |= sc; + } + } + + features.layeredRendering = [p_device supportsFamily:MTLGPUFamilyApple5]; + features.multisampleLayeredRendering = [p_device supportsFamily:MTLGPUFamilyApple7]; + features.tessellationShader = [p_device supportsFamily:MTLGPUFamilyApple3]; + features.imageCubeArray = [p_device supportsFamily:MTLGPUFamilyApple3]; + features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4]; + features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6]; + features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7]; + + MTLCompileOptions *opts = [MTLCompileOptions new]; + features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version. + +#define setMSLVersion(m_maj, m_min) \ + features.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(m_maj, m_min) + + switch (features.mslVersionEnum) { +#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000 + case MTLLanguageVersion3_2: + setMSLVersion(3, 2); + break; +#endif +#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 140000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000 + case MTLLanguageVersion3_1: + setMSLVersion(3, 1); + break; +#endif + case MTLLanguageVersion3_0: + setMSLVersion(3, 0); + break; + case MTLLanguageVersion2_4: + setMSLVersion(2, 4); + break; + case MTLLanguageVersion2_3: + setMSLVersion(2, 3); + break; + case MTLLanguageVersion2_2: + setMSLVersion(2, 2); + break; + case MTLLanguageVersion2_1: + setMSLVersion(2, 1); + break; + case MTLLanguageVersion2_0: + setMSLVersion(2, 0); + break; + case MTLLanguageVersion1_2: + setMSLVersion(1, 2); + break; + case MTLLanguageVersion1_1: + setMSLVersion(1, 1); + break; +#if TARGET_OS_IPHONE && !TARGET_OS_MACCATALYST + case MTLLanguageVersion1_0: + setMSLVersion(1, 0); + break; +#endif + } +} + +void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) { + using std::max; + using std::min; + + // FST: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf + + // FST: Maximum number of layers per 1D texture array, 2D texture array, or 3D texture. + limits.maxImageArrayLayers = 2048; + if ([p_device supportsFamily:MTLGPUFamilyApple3]) { + // FST: Maximum 2D texture width and height. + limits.maxFramebufferWidth = 16384; + limits.maxFramebufferHeight = 16384; + limits.maxViewportDimensionX = 16384; + limits.maxViewportDimensionY = 16384; + // FST: Maximum 1D texture width. + limits.maxImageDimension1D = 16384; + // FST: Maximum 2D texture width and height. + limits.maxImageDimension2D = 16384; + // FST: Maximum cube map texture width and height. + limits.maxImageDimensionCube = 16384; + } else { + // FST: Maximum 2D texture width and height. + limits.maxFramebufferWidth = 8192; + limits.maxFramebufferHeight = 8192; + limits.maxViewportDimensionX = 8192; + limits.maxViewportDimensionY = 8192; + // FST: Maximum 1D texture width. + limits.maxImageDimension1D = 8192; + // FST: Maximum 2D texture width and height. + limits.maxImageDimension2D = 8192; + // FST: Maximum cube map texture width and height. + limits.maxImageDimensionCube = 8192; + } + // FST: Maximum 3D texture width, height, and depth. + limits.maxImageDimension3D = 2048; + + limits.maxThreadsPerThreadGroup = p_device.maxThreadsPerThreadgroup; + // No effective limits. + limits.maxComputeWorkGroupCount = { std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max() }; + // https://github.com/KhronosGroup/MoltenVK/blob/568cc3acc0e2299931fdaecaaa1fc3ec5b4af281/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h#L85 + limits.maxBoundDescriptorSets = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers; + // FST: Maximum number of color render targets per render pass descriptor. + limits.maxColorAttachments = 8; + + // Maximum number of textures the device can access, per stage, from an argument buffer. + if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + limits.maxTexturesPerArgumentBuffer = 1'000'000; + } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + limits.maxTexturesPerArgumentBuffer = 96; + } else { + limits.maxTexturesPerArgumentBuffer = 31; + } + + // Maximum number of samplers the device can access, per stage, from an argument buffer. + if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + limits.maxSamplersPerArgumentBuffer = 1024; + } else { + limits.maxSamplersPerArgumentBuffer = 16; + } + + // Maximum number of buffers the device can access, per stage, from an argument buffer. + if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + limits.maxBuffersPerArgumentBuffer = std::numeric_limits<uint64_t>::max(); + } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + limits.maxBuffersPerArgumentBuffer = 96; + } else { + limits.maxBuffersPerArgumentBuffer = 31; + } + + limits.minSubgroupSize = limits.maxSubgroupSize = 1; + // These values were taken from MoltenVK. + if (features.simdPermute) { + limits.minSubgroupSize = 4; + limits.maxSubgroupSize = 32; + } else if (features.quadPermute) { + limits.minSubgroupSize = limits.maxSubgroupSize = 4; + } + + limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_COMPUTE_BIT); + if (features.tessellationShader) { + limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_TESSELATION_CONTROL_BIT); + } + limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_FRAGMENT_BIT); + + limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BASIC_BIT); + if (features.simdPermute || features.quadPermute) { + limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_VOTE_BIT); + limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BALLOT_BIT); + limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_BIT); + limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT); + } + + if (features.simdReduction) { + limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT); + } + + if (features.quadPermute) { + limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_QUAD_BIT); + } + + limits.maxBufferLength = p_device.maxBufferLength; + + // FST: Maximum size of vertex descriptor layout stride. + limits.maxVertexDescriptorLayoutStride = std::numeric_limits<uint64_t>::max(); + + // Maximum number of viewports. + if ([p_device supportsFamily:MTLGPUFamilyApple5]) { + limits.maxViewports = 16; + } else { + limits.maxViewports = 1; + } + + limits.maxPerStageBufferCount = 31; + limits.maxPerStageSamplerCount = 16; + if ([p_device supportsFamily:MTLGPUFamilyApple6]) { + limits.maxPerStageTextureCount = 128; + } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) { + limits.maxPerStageTextureCount = 96; + } else { + limits.maxPerStageTextureCount = 31; + } + + limits.maxVertexInputAttributes = 31; + limits.maxVertexInputBindings = 31; + limits.maxVertexInputBindingStride = (2 * KIBI); + +#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST + limits.minUniformBufferOffsetAlignment = 64; +#endif + +#if TARGET_OS_OSX + // This is Apple Silicon specific. + limits.minUniformBufferOffsetAlignment = 16; +#endif + + limits.maxDrawIndexedIndexValue = std::numeric_limits<uint32_t>::max() - 1; +} + +MetalDeviceProperties::MetalDeviceProperties(id<MTLDevice> p_device) { + init_features(p_device); + init_limits(p_device); +} + +MetalDeviceProperties::~MetalDeviceProperties() { +} + +SampleCount MetalDeviceProperties::find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const { + SampleCount supported = features.supportedSampleCounts; + if (supported & sample_count[p_samples]) { + return sample_count[p_samples]; + } + + SampleCount requested_sample_count = sample_count[p_samples]; + // Find the nearest supported sample count. + while (requested_sample_count > SampleCount1) { + if (supported & requested_sample_count) { + return requested_sample_count; + } + requested_sample_count = (SampleCount)(requested_sample_count >> 1); + } + + return SampleCount1; +} + +// region static members + +const SampleCount MetalDeviceProperties::sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX] = { + SampleCount1, + SampleCount2, + SampleCount4, + SampleCount8, + SampleCount16, + SampleCount32, + SampleCount64, +}; + +// endregion diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h new file mode 100644 index 0000000000..97f33bb1e8 --- /dev/null +++ b/drivers/metal/metal_objects.h @@ -0,0 +1,909 @@ +/**************************************************************************/ +/* metal_objects.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#ifndef METAL_OBJECTS_H +#define METAL_OBJECTS_H + +#import "metal_device_properties.h" +#import "metal_utils.h" +#import "pixel_formats.h" + +#import "servers/rendering/rendering_device_driver.h" + +#import <CommonCrypto/CommonDigest.h> +#import <Foundation/Foundation.h> +#import <Metal/Metal.h> +#import <QuartzCore/CAMetalLayer.h> +#import <simd/simd.h> +#import <zlib.h> +#import <initializer_list> +#import <optional> +#import <spirv.hpp> + +// These types can be used in Vector and other containers that use +// pointer operations not supported by ARC. +namespace MTL { +#define MTL_CLASS(name) \ + class name { \ + public: \ + name(id<MTL##name> obj = nil) : m_obj(obj) {} \ + operator id<MTL##name>() const { return m_obj; } \ + id<MTL##name> m_obj; \ + }; + +MTL_CLASS(Texture) + +} //namespace MTL + +enum ShaderStageUsage : uint32_t { + None = 0, + Vertex = RDD::SHADER_STAGE_VERTEX_BIT, + Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT, + TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT, + TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT, + Compute = RDD::SHADER_STAGE_COMPUTE_BIT, +}; + +_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) { + p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b)); + return p_a; +} + +enum class MDCommandBufferStateType { + None, + Render, + Compute, + Blit, +}; + +enum class MDPipelineType { + None, + Render, + Compute, +}; + +class MDRenderPass; +class MDPipeline; +class MDRenderPipeline; +class MDComputePipeline; +class MDFrameBuffer; +class RenderingDeviceDriverMetal; +class MDUniformSet; +class MDShader; + +#pragma mark - Resource Factory + +struct ClearAttKey { + const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT; + const static uint32_t DEPTH_INDEX = COLOR_COUNT; + const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1; + const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1; + + uint16_t sample_count = 0; + uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 }; + + _FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; } + _FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; } + _FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; } + _FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; } + _FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; } + + _FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; } + _FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; } + _FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; } + + _FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const { + return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0; + } + + uint32_t hash() const { + uint32_t h = hash_murmur3_one_32(sample_count); + h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h); + return h; + } +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDResourceFactory { +private: + RenderingDeviceDriverMetal *device_driver; + + id<MTLFunction> new_func(NSString *p_source, NSString *p_name, NSError **p_error); + id<MTLFunction> new_clear_vert_func(ClearAttKey &p_key); + id<MTLFunction> new_clear_frag_func(ClearAttKey &p_key); + NSString *get_format_type_string(MTLPixelFormat p_fmt); + +public: + id<MTLRenderPipelineState> new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error); + id<MTLDepthStencilState> new_depth_stencil_state(bool p_use_depth, bool p_use_stencil); + + MDResourceFactory(RenderingDeviceDriverMetal *p_device_driver) : + device_driver(p_device_driver) {} + ~MDResourceFactory() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDResourceCache { +private: + typedef HashMap<ClearAttKey, id<MTLRenderPipelineState>, HashableHasher<ClearAttKey>> HashMap; + std::unique_ptr<MDResourceFactory> resource_factory; + HashMap clear_states; + + struct { + id<MTLDepthStencilState> all; + id<MTLDepthStencilState> depth_only; + id<MTLDepthStencilState> stencil_only; + id<MTLDepthStencilState> none; + } clear_depth_stencil_state; + +public: + id<MTLRenderPipelineState> get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error); + id<MTLDepthStencilState> get_depth_stencil_state(bool p_use_depth, bool p_use_stencil); + + explicit MDResourceCache(RenderingDeviceDriverMetal *p_device_driver) : + resource_factory(new MDResourceFactory(p_device_driver)) {} + ~MDResourceCache() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDCommandBuffer { +private: + RenderingDeviceDriverMetal *device_driver = nullptr; + id<MTLCommandQueue> queue = nil; + id<MTLCommandBuffer> commandBuffer = nil; + + void _end_compute_dispatch(); + void _end_blit(); + +#pragma mark - Render + + void _render_set_dirty_state(); + void _render_bind_uniform_sets(); + + static void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects); + static uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size); + void _end_render_pass(); + void _render_clear_render_area(); + +public: + MDCommandBufferStateType type = MDCommandBufferStateType::None; + + struct RenderState { + MDRenderPass *pass = nullptr; + MDFrameBuffer *frameBuffer = nullptr; + MDRenderPipeline *pipeline = nullptr; + LocalVector<RDD::RenderPassClearValue> clear_values; + LocalVector<MTLViewport> viewports; + LocalVector<MTLScissorRect> scissors; + std::optional<Color> blend_constants; + uint32_t current_subpass = UINT32_MAX; + Rect2i render_area = {}; + bool is_rendering_entire_area = false; + MTLRenderPassDescriptor *desc = nil; + id<MTLRenderCommandEncoder> encoder = nil; + id<MTLBuffer> __unsafe_unretained index_buffer = nil; // Buffer is owned by RDD. + MTLIndexType index_type = MTLIndexTypeUInt16; + uint32_t index_offset = 0; + LocalVector<id<MTLBuffer> __unsafe_unretained> vertex_buffers; + LocalVector<NSUInteger> vertex_offsets; + // clang-format off + enum DirtyFlag: uint8_t { + DIRTY_NONE = 0b0000'0000, + DIRTY_PIPELINE = 0b0000'0001, //! pipeline state + DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets + DIRTY_DEPTH = 0b0000'0100, //! depth / stenci state + DIRTY_VERTEX = 0b0000'1000, //! vertex buffers + DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles + DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles + DIRTY_BLEND = 0b0100'0000, //! blend state + DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode + + DIRTY_ALL = 0xff, + }; + // clang-format on + BitField<DirtyFlag> dirty = DIRTY_NONE; + + LocalVector<MDUniformSet *> uniform_sets; + // Bit mask of the uniform sets that are dirty, to prevent redundant binding. + uint64_t uniform_set_mask = 0; + + _FORCE_INLINE_ void reset() { + pass = nil; + frameBuffer = nil; + pipeline = nil; + current_subpass = UINT32_MAX; + render_area = {}; + is_rendering_entire_area = false; + desc = nil; + encoder = nil; + index_buffer = nil; + index_type = MTLIndexTypeUInt16; + dirty = DIRTY_NONE; + uniform_sets.clear(); + uniform_set_mask = 0; + clear_values.clear(); + viewports.clear(); + scissors.clear(); + blend_constants.reset(); + vertex_buffers.clear(); + vertex_offsets.clear(); + } + + _FORCE_INLINE_ void mark_viewport_dirty() { + if (viewports.is_empty()) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_VIEWPORT); + } + + _FORCE_INLINE_ void mark_scissors_dirty() { + if (scissors.is_empty()) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_SCISSOR); + } + + _FORCE_INLINE_ void mark_vertex_dirty() { + if (vertex_buffers.is_empty()) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_VERTEX); + } + + _FORCE_INLINE_ void mark_uniforms_dirty(std::initializer_list<uint32_t> l) { + if (uniform_sets.is_empty()) { + return; + } + for (uint32_t i : l) { + if (i < uniform_sets.size() && uniform_sets[i] != nullptr) { + uniform_set_mask |= 1 << i; + } + } + dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); + } + + _FORCE_INLINE_ void mark_uniforms_dirty(void) { + if (uniform_sets.is_empty()) { + return; + } + for (uint32_t i = 0; i < uniform_sets.size(); i++) { + if (uniform_sets[i] != nullptr) { + uniform_set_mask |= 1 << i; + } + } + dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); + } + + MTLScissorRect clip_to_render_area(MTLScissorRect p_rect) const { + uint32_t raLeft = render_area.position.x; + uint32_t raRight = raLeft + render_area.size.width; + uint32_t raBottom = render_area.position.y; + uint32_t raTop = raBottom + render_area.size.height; + + p_rect.x = CLAMP(p_rect.x, raLeft, MAX(raRight - 1, raLeft)); + p_rect.y = CLAMP(p_rect.y, raBottom, MAX(raTop - 1, raBottom)); + p_rect.width = MIN(p_rect.width, raRight - p_rect.x); + p_rect.height = MIN(p_rect.height, raTop - p_rect.y); + + return p_rect; + } + + Rect2i clip_to_render_area(Rect2i p_rect) const { + int32_t raLeft = render_area.position.x; + int32_t raRight = raLeft + render_area.size.width; + int32_t raBottom = render_area.position.y; + int32_t raTop = raBottom + render_area.size.height; + + p_rect.position.x = CLAMP(p_rect.position.x, raLeft, MAX(raRight - 1, raLeft)); + p_rect.position.y = CLAMP(p_rect.position.y, raBottom, MAX(raTop - 1, raBottom)); + p_rect.size.width = MIN(p_rect.size.width, raRight - p_rect.position.x); + p_rect.size.height = MIN(p_rect.size.height, raTop - p_rect.position.y); + + return p_rect; + } + + } render; + + // State specific for a compute pass. + struct { + MDComputePipeline *pipeline = nullptr; + id<MTLComputeCommandEncoder> encoder = nil; + _FORCE_INLINE_ void reset() { + pipeline = nil; + encoder = nil; + } + } compute; + + // State specific to a blit pass. + struct { + id<MTLBlitCommandEncoder> encoder = nil; + _FORCE_INLINE_ void reset() { + encoder = nil; + } + } blit; + + _FORCE_INLINE_ id<MTLCommandBuffer> get_command_buffer() const { + return commandBuffer; + } + + void begin(); + void commit(); + void end(); + + id<MTLBlitCommandEncoder> blit_command_encoder(); + void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label); + + void bind_pipeline(RDD::PipelineID p_pipeline); + +#pragma mark - Render Commands + + void render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index); + void render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects); + void render_set_viewport(VectorView<Rect2i> p_viewports); + void render_set_scissor(VectorView<Rect2i> p_scissors); + void render_set_blend_constants(const Color &p_constants); + void render_begin_pass(RDD::RenderPassID p_render_pass, + RDD::FramebufferID p_frameBuffer, + RDD::CommandBufferType p_cmd_buffer_type, + const Rect2i &p_rect, + VectorView<RDD::RenderPassClearValue> p_clear_values); + void render_next_subpass(); + void render_draw(uint32_t p_vertex_count, + uint32_t p_instance_count, + uint32_t p_base_vertex, + uint32_t p_first_instance); + void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets); + void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset); + + void render_draw_indexed(uint32_t p_index_count, + uint32_t p_instance_count, + uint32_t p_first_index, + int32_t p_vertex_offset, + uint32_t p_first_instance); + + void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride); + void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride); + void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride); + void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride); + + void render_end_pass(); + +#pragma mark - Compute Commands + + void compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index); + void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset); + + MDCommandBuffer(id<MTLCommandQueue> p_queue, RenderingDeviceDriverMetal *p_device_driver) : + device_driver(p_device_driver), queue(p_queue) { + type = MDCommandBufferStateType::None; + } + + MDCommandBuffer() = default; +}; + +#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000) +#define MTLBindingAccess MTLArgumentAccess +#define MTLBindingAccessReadOnly MTLArgumentAccessReadOnly +#define MTLBindingAccessReadWrite MTLArgumentAccessReadWrite +#define MTLBindingAccessWriteOnly MTLArgumentAccessWriteOnly +#endif + +struct API_AVAILABLE(macos(11.0), ios(14.0)) BindingInfo { + MTLDataType dataType = MTLDataTypeNone; + uint32_t index = 0; + MTLBindingAccess access = MTLBindingAccessReadOnly; + MTLResourceUsage usage = 0; + MTLTextureType textureType = MTLTextureType2D; + spv::ImageFormat imageFormat = spv::ImageFormatUnknown; + uint32_t arrayLength = 0; + bool isMultisampled = false; + + inline MTLArgumentDescriptor *new_argument_descriptor() const { + MTLArgumentDescriptor *desc = MTLArgumentDescriptor.argumentDescriptor; + desc.dataType = dataType; + desc.index = index; + desc.access = access; + desc.textureType = textureType; + desc.arrayLength = arrayLength; + return desc; + } + + size_t serialize_size() const { + return sizeof(uint32_t) * 8 /* 8 uint32_t fields */; + } + + template <typename W> + void serialize(W &p_writer) const { + p_writer.write((uint32_t)dataType); + p_writer.write(index); + p_writer.write((uint32_t)access); + p_writer.write((uint32_t)usage); + p_writer.write((uint32_t)textureType); + p_writer.write(imageFormat); + p_writer.write(arrayLength); + p_writer.write(isMultisampled); + } + + template <typename R> + void deserialize(R &p_reader) { + p_reader.read((uint32_t &)dataType); + p_reader.read(index); + p_reader.read((uint32_t &)access); + p_reader.read((uint32_t &)usage); + p_reader.read((uint32_t &)textureType); + p_reader.read((uint32_t &)imageFormat); + p_reader.read(arrayLength); + p_reader.read(isMultisampled); + } +}; + +using RDC = RenderingDeviceCommons; + +typedef API_AVAILABLE(macos(11.0), ios(14.0)) HashMap<RDC::ShaderStage, BindingInfo> BindingInfoMap; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformInfo { + uint32_t binding; + ShaderStageUsage active_stages = None; + BindingInfoMap bindings; + BindingInfoMap bindings_secondary; +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformSet { + LocalVector<UniformInfo> uniforms; + uint32_t buffer_size = 0; + HashMap<RDC::ShaderStage, uint32_t> offsets; + HashMap<RDC::ShaderStage, id<MTLArgumentEncoder>> encoders; +}; + +struct ShaderCacheEntry; + +enum class ShaderLoadStrategy { + DEFAULT, + LAZY, +}; + +/// A Metal shader library. +@interface MDLibrary : NSObject { + ShaderCacheEntry *_entry; +}; +- (id<MTLLibrary>)library; +- (NSError *)error; +- (void)setLabel:(NSString *)label; + ++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry + device:(id<MTLDevice>)device + source:(NSString *)source + options:(MTLCompileOptions *)options + strategy:(ShaderLoadStrategy)strategy; +@end + +struct SHA256Digest { + unsigned char data[CC_SHA256_DIGEST_LENGTH]; + + uint32_t hash() const { + uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH); + return c; + } + + SHA256Digest() { + bzero(data, CC_SHA256_DIGEST_LENGTH); + } + + SHA256Digest(const char *p_data, size_t p_length) { + CC_SHA256(p_data, (CC_LONG)p_length, data); + } + + _FORCE_INLINE_ uint32_t short_sha() const { + return __builtin_bswap32(*(uint32_t *)&data[0]); + } +}; + +template <> +struct HashMapComparatorDefault<SHA256Digest> { + static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) { + return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0; + } +}; + +/// A cache entry for a Metal shader library. +struct ShaderCacheEntry { + RenderingDeviceDriverMetal &owner; + /// A hash of the Metal shader source code. + SHA256Digest key; + CharString name; + RD::ShaderStage stage = RD::SHADER_STAGE_VERTEX; + /// This reference must be weak, to ensure that when the last strong reference to the library + /// is released, the cache entry is freed. + MDLibrary *__weak library = nil; + + /// Notify the cache that this entry is no longer needed. + void notify_free() const; + + ShaderCacheEntry(RenderingDeviceDriverMetal &p_owner, SHA256Digest p_key) : + owner(p_owner), key(p_key) { + } + ~ShaderCacheEntry() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDShader { +public: + CharString name; + Vector<UniformSet> sets; + + virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0; + + MDShader(CharString p_name, Vector<UniformSet> p_sets) : + name(p_name), sets(p_sets) {} + virtual ~MDShader() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDComputeShader final : public MDShader { +public: + struct { + uint32_t binding = -1; + uint32_t size = 0; + } push_constants; + MTLSize local = {}; + + MDLibrary *kernel; +#if DEV_ENABLED + CharString kernel_source; +#endif + + void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final; + + MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel); +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderShader final : public MDShader { +public: + struct { + struct { + int32_t binding = -1; + uint32_t size = 0; + } vert; + struct { + int32_t binding = -1; + uint32_t size = 0; + } frag; + } push_constants; + + MDLibrary *vert; + MDLibrary *frag; +#if DEV_ENABLED + CharString vert_source; + CharString frag_source; +#endif + + void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final; + + MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_vert, MDLibrary *p_frag); +}; + +enum StageResourceUsage : uint32_t { + VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2), + VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2), + FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2), + FragmentWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2), + TesselationControlRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), + TesselationControlWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), + TesselationEvaluationRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), + TesselationEvaluationWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), + ComputeRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2), + ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2), +}; + +_FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) { + p_a = StageResourceUsage(uint32_t(p_a) | p_b); + return p_a; +} + +_FORCE_INLINE_ StageResourceUsage stage_resource_usage(RDC::ShaderStage p_stage, MTLResourceUsage p_usage) { + return StageResourceUsage(p_usage << (p_stage * 2)); +} + +_FORCE_INLINE_ MTLResourceUsage resource_usage_for_stage(StageResourceUsage p_usage, RDC::ShaderStage p_stage) { + return MTLResourceUsage((p_usage >> (p_stage * 2)) & 0b11); +} + +template <> +struct HashMapComparatorDefault<RDD::ShaderID> { + static bool compare(const RDD::ShaderID &p_lhs, const RDD::ShaderID &p_rhs) { + return p_lhs.id == p_rhs.id; + } +}; + +struct BoundUniformSet { + id<MTLBuffer> buffer; + HashMap<id<MTLResource>, StageResourceUsage> bound_resources; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDUniformSet { +public: + uint32_t index; + LocalVector<RDD::BoundUniform> uniforms; + HashMap<MDShader *, BoundUniformSet> bound_uniforms; + + BoundUniformSet &boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device); +}; + +enum class MDAttachmentType : uint8_t { + None = 0, + Color = 1 << 0, + Depth = 1 << 1, + Stencil = 1 << 2, +}; + +_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) { + flags::set(p_a, p_b); + return p_a; +} + +_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) { + return uint8_t(p_a) & uint8_t(p_b); +} + +struct MDSubpass { + uint32_t subpass_index = 0; + LocalVector<RDD::AttachmentReference> input_references; + LocalVector<RDD::AttachmentReference> color_references; + RDD::AttachmentReference depth_stencil_reference; + LocalVector<RDD::AttachmentReference> resolve_references; + + MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const; +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) MDAttachment { +private: + uint32_t index = 0; + uint32_t firstUseSubpassIndex = 0; + uint32_t lastUseSubpassIndex = 0; + +public: + MTLPixelFormat format = MTLPixelFormatInvalid; + MDAttachmentType type = MDAttachmentType::None; + MTLLoadAction loadAction = MTLLoadActionDontCare; + MTLStoreAction storeAction = MTLStoreActionDontCare; + MTLLoadAction stencilLoadAction = MTLLoadActionDontCare; + MTLStoreAction stencilStoreAction = MTLStoreActionDontCare; + uint32_t samples = 1; + + /*! + * @brief Returns true if this attachment is first used in the given subpass. + * @param p_subpass + * @return + */ + _FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const { + return p_subpass.subpass_index == firstUseSubpassIndex; + } + + /*! + * @brief Returns true if this attachment is last used in the given subpass. + * @param p_subpass + * @return + */ + _FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const { + return p_subpass.subpass_index == lastUseSubpassIndex; + } + + void linkToSubpass(MDRenderPass const &p_pass); + + MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const; + bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc, + PixelFormats &p_pf, + MDSubpass const &p_subpass, + id<MTLTexture> p_attachment, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const; + /** Returns whether this attachment should be cleared in the subpass. */ + bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPass { +public: + Vector<MDAttachment> attachments; + Vector<MDSubpass> subpasses; + + uint32_t get_sample_count() const { + return attachments.is_empty() ? 1 : attachments[0].samples; + } + + MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses); +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDPipeline { +public: + MDPipelineType type; + + explicit MDPipeline(MDPipelineType p_type) : + type(p_type) {} + virtual ~MDPipeline() = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPipeline final : public MDPipeline { +public: + id<MTLRenderPipelineState> state = nil; + id<MTLDepthStencilState> depth_stencil = nil; + uint32_t push_constant_size = 0; + uint32_t push_constant_stages_mask = 0; + SampleCount sample_count = SampleCount1; + + struct { + MTLCullMode cull_mode = MTLCullModeNone; + MTLTriangleFillMode fill_mode = MTLTriangleFillModeFill; + MTLDepthClipMode clip_mode = MTLDepthClipModeClip; + MTLWinding winding = MTLWindingClockwise; + MTLPrimitiveType render_primitive = MTLPrimitiveTypePoint; + + struct { + bool enabled = false; + } depth_test; + + struct { + bool enabled = false; + float depth_bias = 0.0; + float slope_scale = 0.0; + float clamp = 0.0; + _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const { + if (!enabled) { + return; + } + [p_enc setDepthBias:depth_bias slopeScale:slope_scale clamp:clamp]; + } + } depth_bias; + + struct { + bool enabled = false; + uint32_t front_reference = 0; + uint32_t back_reference = 0; + _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const { + if (!enabled) + return; + [p_enc setStencilFrontReferenceValue:front_reference backReferenceValue:back_reference]; + }; + } stencil; + + struct { + bool enabled = false; + float r = 0.0; + float g = 0.0; + float b = 0.0; + float a = 0.0; + + _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const { + //if (!enabled) + // return; + [p_enc setBlendColorRed:r green:g blue:b alpha:a]; + }; + } blend; + + _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const { + [p_enc setCullMode:cull_mode]; + [p_enc setTriangleFillMode:fill_mode]; + [p_enc setDepthClipMode:clip_mode]; + [p_enc setFrontFacingWinding:winding]; + depth_bias.apply(p_enc); + stencil.apply(p_enc); + blend.apply(p_enc); + } + + } raster_state; + + MDRenderShader *shader = nil; + + MDRenderPipeline() : + MDPipeline(MDPipelineType::Render) {} + ~MDRenderPipeline() final = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDComputePipeline final : public MDPipeline { +public: + id<MTLComputePipelineState> state = nil; + struct { + MTLSize local = {}; + } compute_state; + + MDComputeShader *shader = nil; + + explicit MDComputePipeline(id<MTLComputePipelineState> p_state) : + MDPipeline(MDPipelineType::Compute), state(p_state) {} + ~MDComputePipeline() final = default; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDFrameBuffer { +public: + Vector<MTL::Texture> textures; + Size2i size; + MDFrameBuffer(Vector<MTL::Texture> p_textures, Size2i p_size) : + textures(p_textures), size(p_size) {} + MDFrameBuffer() {} + + virtual ~MDFrameBuffer() = default; +}; + +// These functions are used to convert between Objective-C objects and +// the RIDs used by Godot, respecting automatic reference counting. +namespace rid { + +// Converts an Objective-C object to a pointer, and incrementing the +// reference count. +_FORCE_INLINE_ +void *owned(id p_id) { + return (__bridge_retained void *)p_id; +} + +#define MAKE_ID(FROM, TO) \ + _FORCE_INLINE_ TO make(FROM p_obj) { return TO(owned(p_obj)); } + +MAKE_ID(id<MTLTexture>, RDD::TextureID) +MAKE_ID(id<MTLBuffer>, RDD::BufferID) +MAKE_ID(id<MTLSamplerState>, RDD::SamplerID) +MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID) +MAKE_ID(id<MTLCommandQueue>, RDD::CommandPoolID) + +// Converts a pointer to an Objective-C object without changing the reference count. +_FORCE_INLINE_ +auto get(RDD::ID p_id) { + return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil; +} + +// Converts a pointer to an Objective-C object, and decrements the reference count. +_FORCE_INLINE_ +auto release(RDD::ID p_id) { + return (__bridge_transfer ::id)(void *)p_id.id; +} + +} // namespace rid + +#endif // METAL_OBJECTS_H diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm new file mode 100644 index 0000000000..abdcccf00c --- /dev/null +++ b/drivers/metal/metal_objects.mm @@ -0,0 +1,1581 @@ +/**************************************************************************/ +/* metal_objects.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#import "metal_objects.h" + +#import "metal_utils.h" +#import "pixel_formats.h" +#import "rendering_device_driver_metal.h" + +#import <os/signpost.h> + +void MDCommandBuffer::begin() { + DEV_ASSERT(commandBuffer == nil); + commandBuffer = queue.commandBuffer; +} + +void MDCommandBuffer::end() { + switch (type) { + case MDCommandBufferStateType::None: + return; + case MDCommandBufferStateType::Render: + return render_end_pass(); + case MDCommandBufferStateType::Compute: + return _end_compute_dispatch(); + case MDCommandBufferStateType::Blit: + return _end_blit(); + } +} + +void MDCommandBuffer::commit() { + end(); + [commandBuffer commit]; + commandBuffer = nil; +} + +void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) { + MDPipeline *p = (MDPipeline *)(p_pipeline.id); + + // End current encoder if it is a compute encoder or blit encoder, + // as they do not have a defined end boundary in the RDD like render. + if (type == MDCommandBufferStateType::Compute) { + _end_compute_dispatch(); + } else if (type == MDCommandBufferStateType::Blit) { + _end_blit(); + } + + if (p->type == MDPipelineType::Render) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + MDRenderPipeline *rp = (MDRenderPipeline *)p; + + if (render.encoder == nil) { + // This condition occurs when there are no attachments when calling render_next_subpass() + // and is due to the SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS flag. + render.desc.defaultRasterSampleCount = static_cast<NSUInteger>(rp->sample_count); + +// NOTE(sgc): This is to test rdar://FB13605547 and will be deleted once fix is confirmed. +#if 0 + if (render.pipeline->sample_count == 4) { + static id<MTLTexture> tex = nil; + static id<MTLTexture> res_tex = nil; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + Size2i sz = render.frameBuffer->size; + MTLTextureDescriptor *td = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm width:sz.width height:sz.height mipmapped:NO]; + td.textureType = MTLTextureType2DMultisample; + td.storageMode = MTLStorageModeMemoryless; + td.usage = MTLTextureUsageRenderTarget; + td.sampleCount = render.pipeline->sample_count; + tex = [device_driver->get_device() newTextureWithDescriptor:td]; + + td.textureType = MTLTextureType2D; + td.storageMode = MTLStorageModePrivate; + td.usage = MTLTextureUsageShaderWrite; + td.sampleCount = 1; + res_tex = [device_driver->get_device() newTextureWithDescriptor:td]; + }); + render.desc.colorAttachments[0].texture = tex; + render.desc.colorAttachments[0].loadAction = MTLLoadActionClear; + render.desc.colorAttachments[0].storeAction = MTLStoreActionMultisampleResolve; + + render.desc.colorAttachments[0].resolveTexture = res_tex; + } +#endif + render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc]; + } + + if (render.pipeline != rp) { + render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_RASTER)); + // Mark all uniforms as dirty, as variants of a shader pipeline may have a different entry point ABI, + // due to setting force_active_argument_buffer_resources = true for spirv_cross::CompilerMSL::Options. + // As a result, uniform sets with the same layout will generate redundant binding warnings when + // capturing a Metal frame in Xcode. + // + // If we don't mark as dirty, then some bindings will generate a validation error. + render.mark_uniforms_dirty(); + if (render.pipeline != nullptr && render.pipeline->depth_stencil != rp->depth_stencil) { + render.dirty.set_flag(RenderState::DIRTY_DEPTH); + } + render.pipeline = rp; + } + } else if (p->type == MDPipelineType::Compute) { + DEV_ASSERT(type == MDCommandBufferStateType::None); + type = MDCommandBufferStateType::Compute; + + compute.pipeline = (MDComputePipeline *)p; + compute.encoder = commandBuffer.computeCommandEncoder; + [compute.encoder setComputePipelineState:compute.pipeline->state]; + } +} + +id<MTLBlitCommandEncoder> MDCommandBuffer::blit_command_encoder() { + switch (type) { + case MDCommandBufferStateType::None: + break; + case MDCommandBufferStateType::Render: + render_end_pass(); + break; + case MDCommandBufferStateType::Compute: + _end_compute_dispatch(); + break; + case MDCommandBufferStateType::Blit: + return blit.encoder; + } + + type = MDCommandBufferStateType::Blit; + blit.encoder = commandBuffer.blitCommandEncoder; + return blit.encoder; +} + +void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label) { + switch (type) { + case MDCommandBufferStateType::None: + break; + case MDCommandBufferStateType::Render: + render_end_pass(); + break; + case MDCommandBufferStateType::Compute: + _end_compute_dispatch(); + break; + case MDCommandBufferStateType::Blit: + _end_blit(); + break; + } + + id<MTLRenderCommandEncoder> enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc]; + if (p_label != nil) { + [enc pushDebugGroup:p_label]; + [enc popDebugGroup]; + } + [enc endEncoding]; +} + +#pragma mark - Render Commands + +void MDCommandBuffer::render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id); + if (render.uniform_sets.size() <= set->index) { + uint32_t s = render.uniform_sets.size(); + render.uniform_sets.resize(set->index + 1); + // Set intermediate values to null. + std::fill(&render.uniform_sets[s], &render.uniform_sets[set->index] + 1, nullptr); + } + + if (render.uniform_sets[set->index] != set) { + render.dirty.set_flag(RenderState::DIRTY_UNIFORMS); + render.uniform_set_mask |= 1ULL << set->index; + render.uniform_sets[set->index] = set; + } +} + +void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + uint32_t vertex_count = p_rects.size() * 6; + + simd::float4 vertices[vertex_count]; + simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT]; + + Size2i size = render.frameBuffer->size; + Rect2i render_area = render.clip_to_render_area({ { 0, 0 }, size }); + size = Size2i(render_area.position.x + render_area.size.width, render_area.position.y + render_area.size.height); + _populate_vertices(vertices, size, p_rects); + + ClearAttKey key; + key.sample_count = render.pass->get_sample_count(); + + float depth_value = 0; + uint32_t stencil_value = 0; + + for (uint32_t i = 0; i < p_attachment_clears.size(); i++) { + RDD::AttachmentClear const &attClear = p_attachment_clears[i]; + uint32_t attachment_index; + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { + attachment_index = attClear.color_attachment; + } else { + attachment_index = render.pass->subpasses[render.current_subpass].depth_stencil_reference.attachment; + } + + MDAttachment const &mda = render.pass->attachments[attachment_index]; + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { + key.set_color_format(attachment_index, mda.format); + clear_colors[attachment_index] = { + attClear.value.color.r, + attClear.value.color.g, + attClear.value.color.b, + attClear.value.color.a + }; + } + + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT)) { + key.set_depth_format(mda.format); + depth_value = attClear.value.depth; + } + + if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT)) { + key.set_stencil_format(mda.format); + stencil_value = attClear.value.stencil; + } + } + clear_colors[ClearAttKey::DEPTH_INDEX] = { + depth_value, + depth_value, + depth_value, + depth_value + }; + + id<MTLRenderCommandEncoder> enc = render.encoder; + + MDResourceCache &cache = device_driver->get_resource_cache(); + + [enc pushDebugGroup:@"ClearAttachments"]; + [enc setRenderPipelineState:cache.get_clear_render_pipeline_state(key, nil)]; + [enc setDepthStencilState:cache.get_depth_stencil_state( + key.is_depth_enabled(), + key.is_stencil_enabled())]; + [enc setStencilReferenceValue:stencil_value]; + [enc setCullMode:MTLCullModeNone]; + [enc setTriangleFillMode:MTLTriangleFillModeFill]; + [enc setDepthBias:0 slopeScale:0 clamp:0]; + [enc setViewport:{ 0, 0, (double)size.width, (double)size.height, 0.0, 1.0 }]; + [enc setScissorRect:{ 0, 0, (NSUInteger)size.width, (NSUInteger)size.height }]; + + [enc setVertexBytes:clear_colors length:sizeof(clear_colors) atIndex:0]; + [enc setFragmentBytes:clear_colors length:sizeof(clear_colors) atIndex:0]; + [enc setVertexBytes:vertices length:vertex_count * sizeof(vertices[0]) atIndex:device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX)]; + + [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:vertex_count]; + [enc popDebugGroup]; + + render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_DEPTH | RenderState::DIRTY_RASTER)); + render.mark_uniforms_dirty({ 0 }); // Mark index 0 dirty, if there is already a binding for index 0. + render.mark_viewport_dirty(); + render.mark_scissors_dirty(); + render.mark_vertex_dirty(); +} + +void MDCommandBuffer::_render_set_dirty_state() { + _render_bind_uniform_sets(); + + if (render.dirty.has_flag(RenderState::DIRTY_PIPELINE)) { + [render.encoder setRenderPipelineState:render.pipeline->state]; + } + + if (render.dirty.has_flag(RenderState::DIRTY_VIEWPORT)) { + [render.encoder setViewports:render.viewports.ptr() count:render.viewports.size()]; + } + + if (render.dirty.has_flag(RenderState::DIRTY_DEPTH)) { + [render.encoder setDepthStencilState:render.pipeline->depth_stencil]; + } + + if (render.dirty.has_flag(RenderState::DIRTY_RASTER)) { + render.pipeline->raster_state.apply(render.encoder); + } + + if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) { + size_t len = render.scissors.size(); + MTLScissorRect rects[len]; + for (size_t i = 0; i < len; i++) { + rects[i] = render.clip_to_render_area(render.scissors[i]); + } + [render.encoder setScissorRects:rects count:len]; + } + + if (render.dirty.has_flag(RenderState::DIRTY_BLEND) && render.blend_constants.has_value()) { + [render.encoder setBlendColorRed:render.blend_constants->r green:render.blend_constants->g blue:render.blend_constants->b alpha:render.blend_constants->a]; + } + + if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) { + uint32_t p_binding_count = render.vertex_buffers.size(); + uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); + [render.encoder setVertexBuffers:render.vertex_buffers.ptr() + offsets:render.vertex_offsets.ptr() + withRange:NSMakeRange(first, p_binding_count)]; + } + + render.dirty.clear(); +} + +void MDCommandBuffer::render_set_viewport(VectorView<Rect2i> p_viewports) { + render.viewports.resize(p_viewports.size()); + for (uint32_t i = 0; i < p_viewports.size(); i += 1) { + Rect2i const &vp = p_viewports[i]; + render.viewports[i] = { + .originX = static_cast<double>(vp.position.x), + .originY = static_cast<double>(vp.position.y), + .width = static_cast<double>(vp.size.width), + .height = static_cast<double>(vp.size.height), + .znear = 0.0, + .zfar = 1.0, + }; + } + + render.dirty.set_flag(RenderState::DIRTY_VIEWPORT); +} + +void MDCommandBuffer::render_set_scissor(VectorView<Rect2i> p_scissors) { + render.scissors.resize(p_scissors.size()); + for (uint32_t i = 0; i < p_scissors.size(); i += 1) { + Rect2i const &vp = p_scissors[i]; + render.scissors[i] = { + .x = static_cast<NSUInteger>(vp.position.x), + .y = static_cast<NSUInteger>(vp.position.y), + .width = static_cast<NSUInteger>(vp.size.width), + .height = static_cast<NSUInteger>(vp.size.height), + }; + } + + render.dirty.set_flag(RenderState::DIRTY_SCISSOR); +} + +void MDCommandBuffer::render_set_blend_constants(const Color &p_constants) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + if (render.blend_constants != p_constants) { + render.blend_constants = p_constants; + render.dirty.set_flag(RenderState::DIRTY_BLEND); + } +} + +void MDCommandBuffer::_render_bind_uniform_sets() { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + if (!render.dirty.has_flag(RenderState::DIRTY_UNIFORMS)) { + return; + } + + render.dirty.clear_flag(RenderState::DIRTY_UNIFORMS); + uint64_t set_uniforms = render.uniform_set_mask; + render.uniform_set_mask = 0; + + id<MTLRenderCommandEncoder> enc = render.encoder; + MDRenderShader *shader = render.pipeline->shader; + id<MTLDevice> device = enc.device; + + while (set_uniforms != 0) { + // Find the index of the next set bit. + int index = __builtin_ctzll(set_uniforms); + // Clear the set bit. + set_uniforms &= ~(1ULL << index); + MDUniformSet *set = render.uniform_sets[index]; + if (set == nullptr || set->index >= (uint32_t)shader->sets.size()) { + continue; + } + UniformSet const &set_info = shader->sets[set->index]; + + BoundUniformSet &bus = set->boundUniformSetForShader(shader, device); + + for (KeyValue<id<MTLResource>, StageResourceUsage> const &keyval : bus.bound_resources) { + MTLResourceUsage usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_VERTEX); + if (usage != 0) { + [enc useResource:keyval.key usage:usage stages:MTLRenderStageVertex]; + } + usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_FRAGMENT); + if (usage != 0) { + [enc useResource:keyval.key usage:usage stages:MTLRenderStageFragment]; + } + } + + // Set the buffer for the vertex stage. + { + uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX); + if (offset) { + [enc setVertexBuffer:bus.buffer offset:*offset atIndex:set->index]; + } + } + // Set the buffer for the fragment stage. + { + uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT); + if (offset) { + [enc setFragmentBuffer:bus.buffer offset:*offset atIndex:set->index]; + } + } + } +} + +void MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects) { + uint32_t idx = 0; + for (uint32_t i = 0; i < p_rects.size(); i++) { + Rect2i const &rect = p_rects[i]; + idx = _populate_vertices(p_vertices, idx, rect, p_fb_size); + } +} + +uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size) { + // Determine the positions of the four edges of the + // clear rectangle as a fraction of the attachment size. + float leftPos = (float)(p_rect.position.x) / (float)p_fb_size.width; + float rightPos = (float)(p_rect.size.width) / (float)p_fb_size.width + leftPos; + float bottomPos = (float)(p_rect.position.y) / (float)p_fb_size.height; + float topPos = (float)(p_rect.size.height) / (float)p_fb_size.height + bottomPos; + + // Transform to clip-space coordinates, which are bounded by (-1.0 < p < 1.0) in clip-space. + leftPos = (leftPos * 2.0f) - 1.0f; + rightPos = (rightPos * 2.0f) - 1.0f; + bottomPos = (bottomPos * 2.0f) - 1.0f; + topPos = (topPos * 2.0f) - 1.0f; + + simd::float4 vtx; + + uint32_t idx = p_index; + vtx.z = 0.0; + vtx.w = (float)1; + + // Top left vertex - First triangle. + vtx.y = topPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; + + // Bottom left vertex. + vtx.y = bottomPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; + + // Bottom right vertex. + vtx.y = bottomPos; + vtx.x = rightPos; + p_vertices[idx++] = vtx; + + // Bottom right vertex - Second triangle. + p_vertices[idx++] = vtx; + + // Top right vertex. + vtx.y = topPos; + vtx.x = rightPos; + p_vertices[idx++] = vtx; + + // Top left vertex. + vtx.y = topPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; + + return idx; +} + +void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RDD::RenderPassClearValue> p_clear_values) { + DEV_ASSERT(commandBuffer != nil); + end(); + + MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); + MDFrameBuffer *fb = (MDFrameBuffer *)(p_frameBuffer.id); + + type = MDCommandBufferStateType::Render; + render.pass = pass; + render.current_subpass = UINT32_MAX; + render.render_area = p_rect; + render.clear_values.resize(p_clear_values.size()); + for (uint32_t i = 0; i < p_clear_values.size(); i++) { + render.clear_values[i] = p_clear_values[i]; + } + render.is_rendering_entire_area = (p_rect.position == Point2i(0, 0)) && p_rect.size == fb->size; + render.frameBuffer = fb; + render_next_subpass(); +} + +void MDCommandBuffer::_end_render_pass() { + MDFrameBuffer const &fb_info = *render.frameBuffer; + MDRenderPass const &pass_info = *render.pass; + MDSubpass const &subpass = pass_info.subpasses[render.current_subpass]; + + PixelFormats &pf = device_driver->get_pixel_formats(); + + for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) { + uint32_t color_index = subpass.color_references[i].attachment; + uint32_t resolve_index = subpass.resolve_references[i].attachment; + DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED)); + if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.textures[color_index]) { + continue; + } + + id<MTLTexture> resolve_tex = fb_info.textures[resolve_index]; + + CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve), "not implemented: unresolvable texture types"); + // see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407 + } + + [render.encoder endEncoding]; + render.encoder = nil; +} + +void MDCommandBuffer::_render_clear_render_area() { + MDRenderPass const &pass = *render.pass; + MDSubpass const &subpass = pass.subpasses[render.current_subpass]; + + // First determine attachments that should be cleared. + LocalVector<RDD::AttachmentClear> clears; + clears.reserve(subpass.color_references.size() + /* possible depth stencil clear */ 1); + + for (uint32_t i = 0; i < subpass.color_references.size(); i++) { + uint32_t idx = subpass.color_references[i].attachment; + if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) { + clears.push_back({ .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = render.clear_values[idx] }); + } + } + uint32_t ds_index = subpass.depth_stencil_reference.attachment; + MDAttachment const &attachment = pass.attachments[ds_index]; + bool shouldClearDepth = (ds_index != RDD::AttachmentReference::UNUSED && attachment.shouldClear(subpass, false)); + bool shouldClearStencil = (ds_index != RDD::AttachmentReference::UNUSED && attachment.shouldClear(subpass, true)); + if (shouldClearDepth || shouldClearStencil) { + BitField<RDD::TextureAspectBits> bits; + if (shouldClearDepth && attachment.type & MDAttachmentType::Depth) { + bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); + } + if (shouldClearStencil && attachment.type & MDAttachmentType::Stencil) { + bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT); + } + + clears.push_back({ .aspect = bits, .color_attachment = ds_index, .value = render.clear_values[ds_index] }); + } + + if (clears.is_empty()) { + return; + } + + render_clear_attachments(clears, { render.render_area }); +} + +void MDCommandBuffer::render_next_subpass() { + DEV_ASSERT(commandBuffer != nil); + + if (render.current_subpass == UINT32_MAX) { + render.current_subpass = 0; + } else { + _end_render_pass(); + render.current_subpass++; + } + + MDFrameBuffer const &fb = *render.frameBuffer; + MDRenderPass const &pass = *render.pass; + MDSubpass const &subpass = pass.subpasses[render.current_subpass]; + + MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; + PixelFormats &pf = device_driver->get_pixel_formats(); + + uint32_t attachmentCount = 0; + for (uint32_t i = 0; i < subpass.color_references.size(); i++) { + uint32_t idx = subpass.color_references[i].attachment; + if (idx == RDD::AttachmentReference::UNUSED) { + continue; + } + + attachmentCount += 1; + MTLRenderPassColorAttachmentDescriptor *ca = desc.colorAttachments[i]; + + uint32_t resolveIdx = subpass.resolve_references.is_empty() ? RDD::AttachmentReference::UNUSED : subpass.resolve_references[i].attachment; + bool has_resolve = resolveIdx != RDD::AttachmentReference::UNUSED; + bool can_resolve = true; + if (resolveIdx != RDD::AttachmentReference::UNUSED) { + id<MTLTexture> resolve_tex = fb.textures[resolveIdx]; + can_resolve = flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve); + if (can_resolve) { + ca.resolveTexture = resolve_tex; + } else { + CRASH_NOW_MSG("unimplemented: using a texture format that is not supported for resolve"); + } + } + + MDAttachment const &attachment = pass.attachments[idx]; + + id<MTLTexture> tex = fb.textures[idx]; + if ((attachment.type & MDAttachmentType::Color)) { + if (attachment.configureDescriptor(ca, pf, subpass, tex, render.is_rendering_entire_area, has_resolve, can_resolve, false)) { + Color clearColor = render.clear_values[idx].color; + ca.clearColor = MTLClearColorMake(clearColor.r, clearColor.g, clearColor.b, clearColor.a); + } + } + } + + if (subpass.depth_stencil_reference.attachment != RDD::AttachmentReference::UNUSED) { + attachmentCount += 1; + uint32_t idx = subpass.depth_stencil_reference.attachment; + MDAttachment const &attachment = pass.attachments[idx]; + id<MTLTexture> tex = fb.textures[idx]; + if (attachment.type & MDAttachmentType::Depth) { + MTLRenderPassDepthAttachmentDescriptor *da = desc.depthAttachment; + if (attachment.configureDescriptor(da, pf, subpass, tex, render.is_rendering_entire_area, false, false, false)) { + da.clearDepth = render.clear_values[idx].depth; + } + } + + if (attachment.type & MDAttachmentType::Stencil) { + MTLRenderPassStencilAttachmentDescriptor *sa = desc.stencilAttachment; + if (attachment.configureDescriptor(sa, pf, subpass, tex, render.is_rendering_entire_area, false, false, true)) { + sa.clearStencil = render.clear_values[idx].stencil; + } + } + } + + desc.renderTargetWidth = MAX((NSUInteger)MIN(render.render_area.position.x + render.render_area.size.width, fb.size.width), 1u); + desc.renderTargetHeight = MAX((NSUInteger)MIN(render.render_area.position.y + render.render_area.size.height, fb.size.height), 1u); + + if (attachmentCount == 0) { + // If there are no attachments, delay the creation of the encoder, + // so we can use a matching sample count for the pipeline, by setting + // the defaultRasterSampleCount from the pipeline's sample count. + render.desc = desc; + } else { + render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc]; + + if (!render.is_rendering_entire_area) { + _render_clear_render_area(); + } + // With a new encoder, all state is dirty. + render.dirty.set_flag(RenderState::DIRTY_ALL); + } +} + +void MDCommandBuffer::render_draw(uint32_t p_vertex_count, + uint32_t p_instance_count, + uint32_t p_base_vertex, + uint32_t p_first_instance) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + _render_set_dirty_state(); + + DEV_ASSERT(render.dirty == 0); + + id<MTLRenderCommandEncoder> enc = render.encoder; + + [enc drawPrimitives:render.pipeline->raster_state.render_primitive + vertexStart:p_base_vertex + vertexCount:p_vertex_count + instanceCount:p_instance_count + baseInstance:p_first_instance]; +} + +void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + render.vertex_buffers.resize(p_binding_count); + render.vertex_offsets.resize(p_binding_count); + + // Reverse the buffers, as their bindings are assigned in descending order. + for (uint32_t i = 0; i < p_binding_count; i += 1) { + render.vertex_buffers[i] = rid::get(p_buffers[p_binding_count - i - 1]); + render.vertex_offsets[i] = p_offsets[p_binding_count - i - 1]; + } + + if (render.encoder) { + uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); + [render.encoder setVertexBuffers:render.vertex_buffers.ptr() + offsets:render.vertex_offsets.ptr() + withRange:NSMakeRange(first, p_binding_count)]; + } else { + render.dirty.set_flag(RenderState::DIRTY_VERTEX); + } +} + +void MDCommandBuffer::render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + render.index_buffer = rid::get(p_buffer); + render.index_type = p_format == RDD::IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32; + render.index_offset = p_offset; +} + +void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count, + uint32_t p_instance_count, + uint32_t p_first_index, + int32_t p_vertex_offset, + uint32_t p_first_instance) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + _render_set_dirty_state(); + + id<MTLRenderCommandEncoder> enc = render.encoder; + + uint32_t index_offset = render.index_offset; + index_offset += p_first_index * (render.index_type == MTLIndexTypeUInt16 ? sizeof(uint16_t) : sizeof(uint32_t)); + + [enc drawIndexedPrimitives:render.pipeline->raster_state.render_primitive + indexCount:p_index_count + indexType:render.index_type + indexBuffer:render.index_buffer + indexBufferOffset:index_offset + instanceCount:p_instance_count + baseVertex:p_vertex_offset + baseInstance:p_first_instance]; +} + +void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + _render_set_dirty_state(); + + id<MTLRenderCommandEncoder> enc = render.encoder; + + id<MTLBuffer> indirect_buffer = rid::get(p_indirect_buffer); + NSUInteger indirect_offset = p_offset; + + for (uint32_t i = 0; i < p_draw_count; i++) { + [enc drawIndexedPrimitives:render.pipeline->raster_state.render_primitive + indexType:render.index_type + indexBuffer:render.index_buffer + indexBufferOffset:0 + indirectBuffer:indirect_buffer + indirectBufferOffset:indirect_offset]; + indirect_offset += p_stride; + } +} + +void MDCommandBuffer::render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + ERR_FAIL_MSG("not implemented"); +} + +void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + _render_set_dirty_state(); + + id<MTLRenderCommandEncoder> enc = render.encoder; + + id<MTLBuffer> indirect_buffer = rid::get(p_indirect_buffer); + NSUInteger indirect_offset = p_offset; + + for (uint32_t i = 0; i < p_draw_count; i++) { + [enc drawPrimitives:render.pipeline->raster_state.render_primitive + indirectBuffer:indirect_buffer + indirectBufferOffset:indirect_offset]; + indirect_offset += p_stride; + } +} + +void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + ERR_FAIL_MSG("not implemented"); +} + +void MDCommandBuffer::render_end_pass() { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + [render.encoder endEncoding]; + render.reset(); + type = MDCommandBufferStateType::None; +} + +#pragma mark - Compute + +void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + id<MTLComputeCommandEncoder> enc = compute.encoder; + id<MTLDevice> device = enc.device; + + MDShader *shader = (MDShader *)(p_shader.id); + UniformSet const &set_info = shader->sets[p_set_index]; + + MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id); + BoundUniformSet &bus = set->boundUniformSetForShader(shader, device); + + for (KeyValue<id<MTLResource>, StageResourceUsage> &keyval : bus.bound_resources) { + MTLResourceUsage usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_COMPUTE); + if (usage != 0) { + [enc useResource:keyval.key usage:usage]; + } + } + + uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE); + if (offset) { + [enc setBuffer:bus.buffer offset:*offset atIndex:p_set_index]; + } +} + +void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups); + + id<MTLComputeCommandEncoder> enc = compute.encoder; + [enc dispatchThreadgroups:region.size threadsPerThreadgroup:compute.pipeline->compute_state.local]; +} + +void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + id<MTLBuffer> indirectBuffer = rid::get(p_indirect_buffer); + + id<MTLComputeCommandEncoder> enc = compute.encoder; + [enc dispatchThreadgroupsWithIndirectBuffer:indirectBuffer indirectBufferOffset:p_offset threadsPerThreadgroup:compute.pipeline->compute_state.local]; +} + +void MDCommandBuffer::_end_compute_dispatch() { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + + [compute.encoder endEncoding]; + compute.reset(); + type = MDCommandBufferStateType::None; +} + +void MDCommandBuffer::_end_blit() { + DEV_ASSERT(type == MDCommandBufferStateType::Blit); + + [blit.encoder endEncoding]; + blit.reset(); + type = MDCommandBufferStateType::None; +} + +MDComputeShader::MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel) : + MDShader(p_name, p_sets), kernel(p_kernel) { +} + +void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) { + DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute); + if (push_constants.binding == (uint32_t)-1) { + return; + } + + id<MTLComputeCommandEncoder> enc = p_cb->compute.encoder; + + void const *ptr = p_data.ptr(); + size_t length = p_data.size() * sizeof(uint32_t); + + [enc setBytes:ptr length:length atIndex:push_constants.binding]; +} + +MDRenderShader::MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) : + MDShader(p_name, p_sets), vert(p_vert), frag(p_frag) { +} + +void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) { + DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render); + id<MTLRenderCommandEncoder> enc = p_cb->render.encoder; + + void const *ptr = p_data.ptr(); + size_t length = p_data.size() * sizeof(uint32_t); + + if (push_constants.vert.binding > -1) { + [enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding]; + } + + if (push_constants.frag.binding > -1) { + [enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding]; + } +} + +BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device) { + BoundUniformSet *sus = bound_uniforms.getptr(p_shader); + if (sus != nullptr) { + return *sus; + } + + UniformSet const &set = p_shader->sets[index]; + + HashMap<id<MTLResource>, StageResourceUsage> bound_resources; + auto add_usage = [&bound_resources](id<MTLResource> __unsafe_unretained res, RDD::ShaderStage stage, MTLResourceUsage usage) { + StageResourceUsage *sru = bound_resources.getptr(res); + if (sru == nullptr) { + bound_resources.insert(res, stage_resource_usage(stage, usage)); + } else { + *sru |= stage_resource_usage(stage, usage); + } + }; + id<MTLBuffer> enc_buffer = nil; + if (set.buffer_size > 0) { + MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceHazardTrackingModeTracked; + enc_buffer = [p_device newBufferWithLength:set.buffer_size options:options]; + for (KeyValue<RDC::ShaderStage, id<MTLArgumentEncoder>> const &kv : set.encoders) { + RDD::ShaderStage const stage = kv.key; + ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage); + id<MTLArgumentEncoder> const enc = kv.value; + + [enc setArgumentBuffer:enc_buffer offset:set.offsets[stage]]; + + for (uint32_t i = 0; i < uniforms.size(); i++) { + RDD::BoundUniform const &uniform = uniforms[i]; + UniformInfo ui = set.uniforms[i]; + + BindingInfo *bi = ui.bindings.getptr(stage); + if (bi == nullptr) { + // No binding for this stage. + continue; + } + + if ((ui.active_stages & stage_usage) == 0) { + // Not active for this state, so don't bind anything. + continue; + } + + switch (uniform.type) { + case RDD::UNIFORM_TYPE_SAMPLER: { + size_t count = uniform.ids.size(); + id<MTLSamplerState> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + objects[j] = rid::get(uniform.ids[j].id); + } + [enc setSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { + size_t count = uniform.ids.size() / 2; + id<MTLTexture> __unsafe_unretained *textures = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count); + id<MTLSamplerState> __unsafe_unretained *samplers = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count); + for (uint32_t j = 0; j < count; j += 1) { + id<MTLSamplerState> sampler = rid::get(uniform.ids[j * 2 + 0]); + id<MTLTexture> texture = rid::get(uniform.ids[j * 2 + 1]); + samplers[j] = sampler; + textures[j] = texture; + add_usage(texture, stage, bi->usage); + } + BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + if (sbi) { + [enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; + } + [enc setTextures:textures + withRange:NSMakeRange(bi->index, count)]; + } break; + case RDD::UNIFORM_TYPE_TEXTURE: { + size_t count = uniform.ids.size(); + if (count == 1) { + id<MTLTexture> obj = rid::get(uniform.ids[0]); + [enc setTexture:obj atIndex:bi->index]; + add_usage(obj, stage, bi->usage); + } else { + id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id<MTLTexture> obj = rid::get(uniform.ids[j]); + objects[j] = obj; + add_usage(obj, stage, bi->usage); + } + [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } break; + case RDD::UNIFORM_TYPE_IMAGE: { + size_t count = uniform.ids.size(); + if (count == 1) { + id<MTLTexture> obj = rid::get(uniform.ids[0]); + [enc setTexture:obj atIndex:bi->index]; + add_usage(obj, stage, bi->usage); + BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + if (sbi) { + id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj; + id<MTLBuffer> buf = tex.buffer; + if (buf) { + [enc setBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; + } + } + } else { + id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id<MTLTexture> obj = rid::get(uniform.ids[j]); + objects[j] = obj; + add_usage(obj, stage, bi->usage); + } + [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } break; + case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_IMAGE_BUFFER: { + CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); + } break; + case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: { + id<MTLBuffer> buffer = rid::get(uniform.ids[0]); + [enc setBuffer:buffer offset:0 atIndex:bi->index]; + add_usage(buffer, stage, bi->usage); + } break; + case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { + id<MTLBuffer> buffer = rid::get(uniform.ids[0]); + [enc setBuffer:buffer offset:0 atIndex:bi->index]; + add_usage(buffer, stage, bi->usage); + } break; + case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { + size_t count = uniform.ids.size(); + if (count == 1) { + id<MTLTexture> obj = rid::get(uniform.ids[0]); + [enc setTexture:obj atIndex:bi->index]; + add_usage(obj, stage, bi->usage); + } else { + id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id<MTLTexture> obj = rid::get(uniform.ids[j]); + objects[j] = obj; + add_usage(obj, stage, bi->usage); + } + [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + } + } break; + default: { + DEV_ASSERT(false); + } + } + } + } + } + + BoundUniformSet bs = { .buffer = enc_buffer, .bound_resources = bound_resources }; + bound_uniforms.insert(p_shader, bs); + return bound_uniforms.get(p_shader); +} + +MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const { + MTLFmtCaps caps = kMTLFmtCapsNone; + + for (RDD::AttachmentReference const &ar : input_references) { + if (ar.attachment == p_index) { + flags::set(caps, kMTLFmtCapsRead); + break; + } + } + + for (RDD::AttachmentReference const &ar : color_references) { + if (ar.attachment == p_index) { + flags::set(caps, kMTLFmtCapsColorAtt); + break; + } + } + + for (RDD::AttachmentReference const &ar : resolve_references) { + if (ar.attachment == p_index) { + flags::set(caps, kMTLFmtCapsResolve); + break; + } + } + + if (depth_stencil_reference.attachment == p_index) { + flags::set(caps, kMTLFmtCapsDSAtt); + } + + return caps; +} + +void MDAttachment::linkToSubpass(const MDRenderPass &p_pass) { + firstUseSubpassIndex = UINT32_MAX; + lastUseSubpassIndex = 0; + + for (MDSubpass const &subpass : p_pass.subpasses) { + MTLFmtCaps reqCaps = subpass.getRequiredFmtCapsForAttachmentAt(index); + if (reqCaps) { + firstUseSubpassIndex = MIN(subpass.subpass_index, firstUseSubpassIndex); + lastUseSubpassIndex = MAX(subpass.subpass_index, lastUseSubpassIndex); + } + } +} + +MTLStoreAction MDAttachment::getMTLStoreAction(MDSubpass const &p_subpass, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const { + if (!p_is_rendering_entire_area || !isLastUseOf(p_subpass)) { + return p_has_resolve && p_can_resolve ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore; + } + + switch (p_is_stencil ? stencilStoreAction : storeAction) { + case MTLStoreActionStore: + return p_has_resolve && p_can_resolve ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore; + case MTLStoreActionDontCare: + return p_has_resolve ? (p_can_resolve ? MTLStoreActionMultisampleResolve : MTLStoreActionStore) : MTLStoreActionDontCare; + + default: + return MTLStoreActionStore; + } +} + +bool MDAttachment::configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc, + PixelFormats &p_pf, + MDSubpass const &p_subpass, + id<MTLTexture> p_attachment, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const { + p_desc.texture = p_attachment; + + MTLLoadAction load; + if (!p_is_rendering_entire_area || !isFirstUseOf(p_subpass)) { + load = MTLLoadActionLoad; + } else { + load = p_is_stencil ? stencilLoadAction : loadAction; + } + + p_desc.loadAction = load; + + MTLPixelFormat mtlFmt = p_attachment.pixelFormat; + bool isDepthFormat = p_pf.isDepthFormat(mtlFmt); + bool isStencilFormat = p_pf.isStencilFormat(mtlFmt); + if (isStencilFormat && !p_is_stencil && !isDepthFormat) { + p_desc.storeAction = MTLStoreActionDontCare; + } else { + p_desc.storeAction = getMTLStoreAction(p_subpass, p_is_rendering_entire_area, p_has_resolve, p_can_resolve, p_is_stencil); + } + + return load == MTLLoadActionClear; +} + +bool MDAttachment::shouldClear(const MDSubpass &p_subpass, bool p_is_stencil) const { + // If the subpass is not the first subpass to use this attachment, don't clear this attachment. + if (p_subpass.subpass_index != firstUseSubpassIndex) { + return false; + } + return (p_is_stencil ? stencilLoadAction : loadAction) == MTLLoadActionClear; +} + +MDRenderPass::MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses) : + attachments(p_attachments), subpasses(p_subpasses) { + for (MDAttachment &att : attachments) { + att.linkToSubpass(*this); + } +} + +#pragma mark - Resource Factory + +id<MTLFunction> MDResourceFactory::new_func(NSString *p_source, NSString *p_name, NSError **p_error) { + @autoreleasepool { + NSError *err = nil; + MTLCompileOptions *options = [MTLCompileOptions new]; + id<MTLDevice> device = device_driver->get_device(); + id<MTLLibrary> mtlLib = [device newLibraryWithSource:p_source + options:options + error:&err]; + if (err) { + if (p_error != nil) { + *p_error = err; + } + } + return [mtlLib newFunctionWithName:p_name]; + } +} + +id<MTLFunction> MDResourceFactory::new_clear_vert_func(ClearAttKey &p_key) { + @autoreleasepool { + NSString *msl = [NSString stringWithFormat:@R"( +#include <metal_stdlib> +using namespace metal; + +typedef struct { + float4 a_position [[attribute(0)]]; +} AttributesPos; + +typedef struct { + float4 colors[9]; +} ClearColorsIn; + +typedef struct { + float4 v_position [[position]]; + uint layer; +} VaryingsPos; + +vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) { + VaryingsPos varyings; + varyings.v_position = float4(attributes.a_position.x, -attributes.a_position.y, ccIn.colors[%d].r, 1.0); + varyings.layer = uint(attributes.a_position.w); + return varyings; +} +)", + ClearAttKey::DEPTH_INDEX]; + + return new_func(msl, @"vertClear", nil); + } +} + +id<MTLFunction> MDResourceFactory::new_clear_frag_func(ClearAttKey &p_key) { + @autoreleasepool { + NSMutableString *msl = [NSMutableString stringWithCapacity:2048]; + + [msl appendFormat:@R"( +#include <metal_stdlib> +using namespace metal; + +typedef struct { + float4 v_position [[position]]; +} VaryingsPos; + +typedef struct { + float4 colors[9]; +} ClearColorsIn; + +typedef struct { +)"]; + + for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { + if (p_key.is_enabled(caIdx)) { + NSString *typeStr = get_format_type_string((MTLPixelFormat)p_key.pixel_formats[caIdx]); + [msl appendFormat:@" %@4 color%u [[color(%u)]];\n", typeStr, caIdx, caIdx]; + } + } + [msl appendFormat:@R"(} ClearColorsOut; + +fragment ClearColorsOut fragClear(VaryingsPos varyings [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) { + + ClearColorsOut ccOut; +)"]; + for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { + if (p_key.is_enabled(caIdx)) { + NSString *typeStr = get_format_type_string((MTLPixelFormat)p_key.pixel_formats[caIdx]); + [msl appendFormat:@" ccOut.color%u = %@4(ccIn.colors[%u]);\n", caIdx, typeStr, caIdx]; + } + } + [msl appendString:@R"( return ccOut; +})"]; + + return new_func(msl, @"fragClear", nil); + } +} + +NSString *MDResourceFactory::get_format_type_string(MTLPixelFormat p_fmt) { + switch (device_driver->get_pixel_formats().getFormatType(p_fmt)) { + case MTLFormatType::ColorInt8: + case MTLFormatType::ColorInt16: + return @"short"; + case MTLFormatType::ColorUInt8: + case MTLFormatType::ColorUInt16: + return @"ushort"; + case MTLFormatType::ColorInt32: + return @"int"; + case MTLFormatType::ColorUInt32: + return @"uint"; + case MTLFormatType::ColorHalf: + return @"half"; + case MTLFormatType::ColorFloat: + case MTLFormatType::DepthStencil: + case MTLFormatType::Compressed: + return @"float"; + case MTLFormatType::None: + return @"unexpected_MTLPixelFormatInvalid"; + } +} + +id<MTLDepthStencilState> MDResourceFactory::new_depth_stencil_state(bool p_use_depth, bool p_use_stencil) { + MTLDepthStencilDescriptor *dsDesc = [MTLDepthStencilDescriptor new]; + dsDesc.depthCompareFunction = MTLCompareFunctionAlways; + dsDesc.depthWriteEnabled = p_use_depth; + + if (p_use_stencil) { + MTLStencilDescriptor *sDesc = [MTLStencilDescriptor new]; + sDesc.stencilCompareFunction = MTLCompareFunctionAlways; + sDesc.stencilFailureOperation = MTLStencilOperationReplace; + sDesc.depthFailureOperation = MTLStencilOperationReplace; + sDesc.depthStencilPassOperation = MTLStencilOperationReplace; + + dsDesc.frontFaceStencil = sDesc; + dsDesc.backFaceStencil = sDesc; + } else { + dsDesc.frontFaceStencil = nil; + dsDesc.backFaceStencil = nil; + } + + return [device_driver->get_device() newDepthStencilStateWithDescriptor:dsDesc]; +} + +id<MTLRenderPipelineState> MDResourceFactory::new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error) { + PixelFormats &pixFmts = device_driver->get_pixel_formats(); + + id<MTLFunction> vtxFunc = new_clear_vert_func(p_key); + id<MTLFunction> fragFunc = new_clear_frag_func(p_key); + MTLRenderPipelineDescriptor *plDesc = [MTLRenderPipelineDescriptor new]; + plDesc.label = @"ClearRenderAttachments"; + plDesc.vertexFunction = vtxFunc; + plDesc.fragmentFunction = fragFunc; + plDesc.rasterSampleCount = p_key.sample_count; + plDesc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle; + + for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) { + MTLRenderPipelineColorAttachmentDescriptor *colorDesc = plDesc.colorAttachments[caIdx]; + colorDesc.pixelFormat = (MTLPixelFormat)p_key.pixel_formats[caIdx]; + colorDesc.writeMask = p_key.is_enabled(caIdx) ? MTLColorWriteMaskAll : MTLColorWriteMaskNone; + } + + MTLPixelFormat mtlDepthFormat = p_key.depth_format(); + if (pixFmts.isDepthFormat(mtlDepthFormat)) { + plDesc.depthAttachmentPixelFormat = mtlDepthFormat; + } + + MTLPixelFormat mtlStencilFormat = p_key.stencil_format(); + if (pixFmts.isStencilFormat(mtlStencilFormat)) { + plDesc.stencilAttachmentPixelFormat = mtlStencilFormat; + } + + MTLVertexDescriptor *vtxDesc = plDesc.vertexDescriptor; + + // Vertex attribute descriptors. + MTLVertexAttributeDescriptorArray *vaDescArray = vtxDesc.attributes; + MTLVertexAttributeDescriptor *vaDesc; + NSUInteger vtxBuffIdx = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX); + NSUInteger vtxStride = 0; + + // Vertex location. + vaDesc = vaDescArray[0]; + vaDesc.format = MTLVertexFormatFloat4; + vaDesc.bufferIndex = vtxBuffIdx; + vaDesc.offset = vtxStride; + vtxStride += sizeof(simd::float4); + + // Vertex attribute buffer. + MTLVertexBufferLayoutDescriptorArray *vbDescArray = vtxDesc.layouts; + MTLVertexBufferLayoutDescriptor *vbDesc = vbDescArray[vtxBuffIdx]; + vbDesc.stepFunction = MTLVertexStepFunctionPerVertex; + vbDesc.stepRate = 1; + vbDesc.stride = vtxStride; + + return [device_driver->get_device() newRenderPipelineStateWithDescriptor:plDesc error:p_error]; +} + +id<MTLRenderPipelineState> MDResourceCache::get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error) { + HashMap::ConstIterator it = clear_states.find(p_key); + if (it != clear_states.end()) { + return it->value; + } + + id<MTLRenderPipelineState> state = resource_factory->new_clear_pipeline_state(p_key, p_error); + clear_states[p_key] = state; + return state; +} + +id<MTLDepthStencilState> MDResourceCache::get_depth_stencil_state(bool p_use_depth, bool p_use_stencil) { + id<MTLDepthStencilState> __strong *val; + if (p_use_depth && p_use_stencil) { + val = &clear_depth_stencil_state.all; + } else if (p_use_depth) { + val = &clear_depth_stencil_state.depth_only; + } else if (p_use_stencil) { + val = &clear_depth_stencil_state.stencil_only; + } else { + val = &clear_depth_stencil_state.none; + } + DEV_ASSERT(val != nullptr); + + if (*val == nil) { + *val = resource_factory->new_depth_stencil_state(p_use_depth, p_use_stencil); + } + return *val; +} + +static const char *SHADER_STAGE_NAMES[] = { + [RD::SHADER_STAGE_VERTEX] = "vert", + [RD::SHADER_STAGE_FRAGMENT] = "frag", + [RD::SHADER_STAGE_TESSELATION_CONTROL] = "tess_ctrl", + [RD::SHADER_STAGE_TESSELATION_EVALUATION] = "tess_eval", + [RD::SHADER_STAGE_COMPUTE] = "comp", +}; + +void ShaderCacheEntry::notify_free() const { + owner.shader_cache_free_entry(key); +} + +@interface MDLibrary () +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry; +@end + +/// Loads the MTLLibrary when the library is first accessed. +@interface MDLazyLibrary : MDLibrary { + id<MTLLibrary> _library; + NSError *_error; + std::shared_mutex _mu; + bool _loaded; + id<MTLDevice> _device; + NSString *_source; + MTLCompileOptions *_options; +} +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id<MTLDevice>)device + source:(NSString *)source + options:(MTLCompileOptions *)options; +@end + +/// Loads the MTLLibrary immediately on initialization, using an asynchronous API. +@interface MDImmediateLibrary : MDLibrary { + id<MTLLibrary> _library; + NSError *_error; + std::mutex _cv_mutex; + std::condition_variable _cv; + std::atomic<bool> _complete; + bool _ready; +} +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id<MTLDevice>)device + source:(NSString *)source + options:(MTLCompileOptions *)options; +@end + +@implementation MDLibrary + ++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry + device:(id<MTLDevice>)device + source:(NSString *)source + options:(MTLCompileOptions *)options + strategy:(ShaderLoadStrategy)strategy { + switch (strategy) { + case ShaderLoadStrategy::DEFAULT: + [[fallthrough]]; + default: + return [[MDImmediateLibrary alloc] initWithCacheEntry:entry device:device source:source options:options]; + case ShaderLoadStrategy::LAZY: + return [[MDLazyLibrary alloc] initWithCacheEntry:entry device:device source:source options:options]; + } +} + +- (id<MTLLibrary>)library { + CRASH_NOW_MSG("Not implemented"); + return nil; +} + +- (NSError *)error { + CRASH_NOW_MSG("Not implemented"); + return nil; +} + +- (void)setLabel:(NSString *)label { +} + +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry { + self = [super init]; + _entry = entry; + _entry->library = self; + return self; +} + +- (void)dealloc { + _entry->notify_free(); +} + +@end + +@implementation MDImmediateLibrary + +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id<MTLDevice>)device + source:(NSString *)source + options:(MTLCompileOptions *)options { + self = [super initWithCacheEntry:entry]; + _complete = false; + _ready = false; + + __block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self; + os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", + "shader_name=%{public}s stage=%{public}s hash=%X", + entry->name.get_data(), SHADER_STAGE_NAMES[entry->stage], entry->key.short_sha()); + + [device newLibraryWithSource:source + options:options + completionHandler:^(id<MTLLibrary> library, NSError *error) { + os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); + self->_library = library; + self->_error = error; + if (error) { + ERR_PRINT(String(U"Error compiling shader %s: %s").format(entry->name.get_data(), error.localizedDescription.UTF8String)); + } + + { + std::lock_guard<std::mutex> lock(self->_cv_mutex); + _ready = true; + } + _cv.notify_all(); + _complete = true; + }]; + return self; +} + +- (id<MTLLibrary>)library { + if (!_complete) { + std::unique_lock<std::mutex> lock(_cv_mutex); + _cv.wait(lock, [&] { return _ready; }); + } + return _library; +} + +- (NSError *)error { + if (!_complete) { + std::unique_lock<std::mutex> lock(_cv_mutex); + _cv.wait(lock, [&] { return _ready; }); + } + return _error; +} + +@end + +@implementation MDLazyLibrary +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id<MTLDevice>)device + source:(NSString *)source + options:(MTLCompileOptions *)options { + self = [super initWithCacheEntry:entry]; + _device = device; + _source = source; + _options = options; + + return self; +} + +- (void)load { + { + std::shared_lock<std::shared_mutex> lock(_mu); + if (_loaded) { + return; + } + } + + std::unique_lock<std::shared_mutex> lock(_mu); + if (_loaded) { + return; + } + + __block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self; + os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", + "shader_name=%{public}s stage=%{public}s hash=%X", + _entry->name.get_data(), SHADER_STAGE_NAMES[_entry->stage], _entry->key.short_sha()); + NSError *error; + _library = [_device newLibraryWithSource:_source options:_options error:&error]; + os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); + _device = nil; + _source = nil; + _options = nil; + _loaded = true; +} + +- (id<MTLLibrary>)library { + [self load]; + return _library; +} + +- (NSError *)error { + [self load]; + return _error; +} + +@end diff --git a/drivers/metal/metal_utils.h b/drivers/metal/metal_utils.h new file mode 100644 index 0000000000..f3ee395d04 --- /dev/null +++ b/drivers/metal/metal_utils.h @@ -0,0 +1,101 @@ +/**************************************************************************/ +/* metal_utils.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef METAL_UTILS_H +#define METAL_UTILS_H + +#import <os/log.h> + +#pragma mark - Boolean flags + +namespace flags { + +/*! Sets the flags within the value parameter specified by the mask parameter. */ +template <typename Tv, typename Tm> +void set(Tv &p_value, Tm p_mask) { + using T = std::underlying_type_t<Tv>; + p_value = static_cast<Tv>(static_cast<T>(p_value) | static_cast<T>(p_mask)); +} + +/*! Clears the flags within the value parameter specified by the mask parameter. */ +template <typename Tv, typename Tm> +void clear(Tv &p_value, Tm p_mask) { + using T = std::underlying_type_t<Tv>; + p_value = static_cast<Tv>(static_cast<T>(p_value) & ~static_cast<T>(p_mask)); +} + +/*! Returns whether the specified value has any of the bits specified in mask set to 1. */ +template <typename Tv, typename Tm> +static constexpr bool any(Tv p_value, const Tm p_mask) { return ((p_value & p_mask) != 0); } + +/*! Returns whether the specified value has all of the bits specified in mask set to 1. */ +template <typename Tv, typename Tm> +static constexpr bool all(Tv p_value, const Tm p_mask) { return ((p_value & p_mask) == p_mask); } + +} //namespace flags + +#pragma mark - Alignment and Offsets + +static constexpr bool is_power_of_two(uint64_t p_value) { + return p_value && ((p_value & (p_value - 1)) == 0); +} + +static constexpr uint64_t round_up_to_alignment(uint64_t p_value, uint64_t p_alignment) { + DEV_ASSERT(is_power_of_two(p_alignment)); + + if (p_alignment == 0) { + return p_value; + } + + uint64_t mask = p_alignment - 1; + uint64_t aligned_value = (p_value + mask) & ~mask; + + return aligned_value; +} + +class Defer { +public: + Defer(std::function<void()> func) : + func_(func) {} + ~Defer() { func_(); } + +private: + std::function<void()> func_; +}; + +#define CONCAT_INTERNAL(x, y) x##y +#define CONCAT(x, y) CONCAT_INTERNAL(x, y) +#define DEFER const Defer &CONCAT(defer__, __LINE__) = Defer + +extern os_log_t LOG_DRIVER; +// Used for dynamic tracing. +extern os_log_t LOG_INTERVALS; + +#endif // METAL_UTILS_H diff --git a/drivers/metal/pixel_formats.h b/drivers/metal/pixel_formats.h new file mode 100644 index 0000000000..167c3d5600 --- /dev/null +++ b/drivers/metal/pixel_formats.h @@ -0,0 +1,416 @@ +/**************************************************************************/ +/* pixel_formats.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#ifndef PIXEL_FORMATS_H +#define PIXEL_FORMATS_H + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + +#import "servers/rendering/rendering_device.h" + +#import <Metal/Metal.h> + +static const uint32_t _mtlPixelFormatCount = 256; +static const uint32_t _mtlPixelFormatCoreCount = MTLPixelFormatX32_Stencil8 + 2; // The actual last enum value is not available on iOS. +static const uint32_t _mtlVertexFormatCount = MTLVertexFormatHalf + 1; + +#pragma mark - +#pragma mark Metal format capabilities + +typedef enum : uint16_t { + + kMTLFmtCapsNone = 0, + /*! The format can be used in a shader read operation. */ + kMTLFmtCapsRead = (1 << 0), + /*! The format can be used in a shader filter operation during sampling. */ + kMTLFmtCapsFilter = (1 << 1), + /*! The format can be used in a shader write operation. */ + kMTLFmtCapsWrite = (1 << 2), + /*! The format can be used with atomic operations. */ + kMTLFmtCapsAtomic = (1 << 3), + /*! The format can be used as a color attachment. */ + kMTLFmtCapsColorAtt = (1 << 4), + /*! The format can be used as a depth-stencil attachment. */ + kMTLFmtCapsDSAtt = (1 << 5), + /*! The format can be used with blend operations. */ + kMTLFmtCapsBlend = (1 << 6), + /*! The format can be used as a destination for multisample antialias (MSAA) data. */ + kMTLFmtCapsMSAA = (1 << 7), + /*! The format can be used as a resolve attachment. */ + kMTLFmtCapsResolve = (1 << 8), + kMTLFmtCapsVertex = (1 << 9), + + kMTLFmtCapsRF = (kMTLFmtCapsRead | kMTLFmtCapsFilter), + kMTLFmtCapsRC = (kMTLFmtCapsRead | kMTLFmtCapsColorAtt), + kMTLFmtCapsRCB = (kMTLFmtCapsRC | kMTLFmtCapsBlend), + kMTLFmtCapsRCM = (kMTLFmtCapsRC | kMTLFmtCapsMSAA), + kMTLFmtCapsRCMB = (kMTLFmtCapsRCM | kMTLFmtCapsBlend), + kMTLFmtCapsRWC = (kMTLFmtCapsRC | kMTLFmtCapsWrite), + kMTLFmtCapsRWCB = (kMTLFmtCapsRWC | kMTLFmtCapsBlend), + kMTLFmtCapsRWCM = (kMTLFmtCapsRWC | kMTLFmtCapsMSAA), + kMTLFmtCapsRWCMB = (kMTLFmtCapsRWCM | kMTLFmtCapsBlend), + kMTLFmtCapsRFCMRB = (kMTLFmtCapsRCMB | kMTLFmtCapsFilter | kMTLFmtCapsResolve), + kMTLFmtCapsRFWCMB = (kMTLFmtCapsRWCMB | kMTLFmtCapsFilter), + kMTLFmtCapsAll = (kMTLFmtCapsRFWCMB | kMTLFmtCapsResolve), + + kMTLFmtCapsDRM = (kMTLFmtCapsDSAtt | kMTLFmtCapsRead | kMTLFmtCapsMSAA), + kMTLFmtCapsDRFM = (kMTLFmtCapsDRM | kMTLFmtCapsFilter), + kMTLFmtCapsDRMR = (kMTLFmtCapsDRM | kMTLFmtCapsResolve), + kMTLFmtCapsDRFMR = (kMTLFmtCapsDRFM | kMTLFmtCapsResolve), + + kMTLFmtCapsChromaSubsampling = kMTLFmtCapsRF, + kMTLFmtCapsMultiPlanar = kMTLFmtCapsChromaSubsampling, +} MTLFmtCaps; + +inline MTLFmtCaps operator|(MTLFmtCaps p_left, MTLFmtCaps p_right) { + return static_cast<MTLFmtCaps>(static_cast<uint32_t>(p_left) | p_right); +} + +inline MTLFmtCaps &operator|=(MTLFmtCaps &p_left, MTLFmtCaps p_right) { + return (p_left = p_left | p_right); +} + +#pragma mark - +#pragma mark Metal view classes + +enum class MTLViewClass : uint8_t { + None, + Color8, + Color16, + Color32, + Color64, + Color128, + PVRTC_RGB_2BPP, + PVRTC_RGB_4BPP, + PVRTC_RGBA_2BPP, + PVRTC_RGBA_4BPP, + EAC_R11, + EAC_RG11, + EAC_RGBA8, + ETC2_RGB8, + ETC2_RGB8A1, + ASTC_4x4, + ASTC_5x4, + ASTC_5x5, + ASTC_6x5, + ASTC_6x6, + ASTC_8x5, + ASTC_8x6, + ASTC_8x8, + ASTC_10x5, + ASTC_10x6, + ASTC_10x8, + ASTC_10x10, + ASTC_12x10, + ASTC_12x12, + BC1_RGBA, + BC2_RGBA, + BC3_RGBA, + BC4_R, + BC5_RG, + BC6H_RGB, + BC7_RGBA, + Depth24_Stencil8, + Depth32_Stencil8, + BGRA10_XR, + BGR10_XR +}; + +#pragma mark - +#pragma mark Format descriptors + +/** Enumerates the data type of a format. */ +enum class MTLFormatType { + None, /**< Format type is unknown. */ + ColorHalf, /**< A 16-bit floating point color. */ + ColorFloat, /**< A 32-bit floating point color. */ + ColorInt8, /**< A signed 8-bit integer color. */ + ColorUInt8, /**< An unsigned 8-bit integer color. */ + ColorInt16, /**< A signed 16-bit integer color. */ + ColorUInt16, /**< An unsigned 16-bit integer color. */ + ColorInt32, /**< A signed 32-bit integer color. */ + ColorUInt32, /**< An unsigned 32-bit integer color. */ + DepthStencil, /**< A depth and stencil value. */ + Compressed, /**< A block-compressed color. */ +}; + +typedef struct Extent2D { + uint32_t width; + uint32_t height; +} Extent2D; + +/** Describes the properties of a DataFormat, including the corresponding Metal pixel and vertex format. */ +typedef struct DataFormatDesc { + RD::DataFormat dataFormat; + MTLPixelFormat mtlPixelFormat; + MTLPixelFormat mtlPixelFormatSubstitute; + MTLVertexFormat mtlVertexFormat; + MTLVertexFormat mtlVertexFormatSubstitute; + uint8_t chromaSubsamplingPlaneCount; + uint8_t chromaSubsamplingComponentBits; + Extent2D blockTexelSize; + uint32_t bytesPerBlock; + MTLFormatType formatType; + const char *name; + bool hasReportedSubstitution; + + inline double bytesPerTexel() const { return (double)bytesPerBlock / (double)(blockTexelSize.width * blockTexelSize.height); } + + inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid || chromaSubsamplingPlaneCount > 1); } + inline bool isSupportedOrSubstitutable() const { return isSupported() || (mtlPixelFormatSubstitute != MTLPixelFormatInvalid); } + + inline bool vertexIsSupported() const { return (mtlVertexFormat != MTLVertexFormatInvalid); } + inline bool vertexIsSupportedOrSubstitutable() const { return vertexIsSupported() || (mtlVertexFormatSubstitute != MTLVertexFormatInvalid); } +} DataFormatDesc; + +/** Describes the properties of a MTLPixelFormat or MTLVertexFormat. */ +typedef struct MTLFormatDesc { + union { + MTLPixelFormat mtlPixelFormat; + MTLVertexFormat mtlVertexFormat; + }; + RD::DataFormat dataFormat; + MTLFmtCaps mtlFmtCaps; + MTLViewClass mtlViewClass; + MTLPixelFormat mtlPixelFormatLinear; + const char *name = nullptr; + + inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid) && (mtlFmtCaps != kMTLFmtCapsNone); } +} MTLFormatDesc; + +class API_AVAILABLE(macos(11.0), ios(14.0)) PixelFormats { + using DataFormat = RD::DataFormat; + +public: + /** Returns whether the DataFormat is supported by the GPU bound to this instance. */ + bool isSupported(DataFormat p_format); + + /** Returns whether the DataFormat is supported by this implementation, or can be substituted by one that is. */ + bool isSupportedOrSubstitutable(DataFormat p_format); + + /** Returns whether the specified Metal MTLPixelFormat can be used as a depth format. */ + _FORCE_INLINE_ bool isDepthFormat(MTLPixelFormat p_format) { + switch (p_format) { + case MTLPixelFormatDepth32Float: + case MTLPixelFormatDepth16Unorm: + case MTLPixelFormatDepth32Float_Stencil8: +#if TARGET_OS_OSX + case MTLPixelFormatDepth24Unorm_Stencil8: +#endif + return true; + default: + return false; + } + } + + /** Returns whether the specified Metal MTLPixelFormat can be used as a stencil format. */ + _FORCE_INLINE_ bool isStencilFormat(MTLPixelFormat p_format) { + switch (p_format) { + case MTLPixelFormatStencil8: +#if TARGET_OS_OSX + case MTLPixelFormatDepth24Unorm_Stencil8: + case MTLPixelFormatX24_Stencil8: +#endif + case MTLPixelFormatDepth32Float_Stencil8: + case MTLPixelFormatX32_Stencil8: + return true; + default: + return false; + } + } + + /** Returns whether the specified Metal MTLPixelFormat is a PVRTC format. */ + bool isPVRTCFormat(MTLPixelFormat p_format); + + /** Returns the format type corresponding to the specified Godot pixel format, */ + MTLFormatType getFormatType(DataFormat p_format); + + /** Returns the format type corresponding to the specified Metal MTLPixelFormat, */ + MTLFormatType getFormatType(MTLPixelFormat p_formt); + + /** + * Returns the Metal MTLPixelFormat corresponding to the specified Godot pixel + * or returns MTLPixelFormatInvalid if no corresponding MTLPixelFormat exists. + */ + MTLPixelFormat getMTLPixelFormat(DataFormat p_format); + + /** + * Returns the DataFormat corresponding to the specified Metal MTLPixelFormat, + * or returns DATA_FORMAT_MAX if no corresponding DataFormat exists. + */ + DataFormat getDataFormat(MTLPixelFormat p_format); + + /** + * Returns the size, in bytes, of a texel block of the specified Godot pixel. + * For uncompressed formats, the returned value corresponds to the size in bytes of a single texel. + */ + uint32_t getBytesPerBlock(DataFormat p_format); + + /** + * Returns the size, in bytes, of a texel block of the specified Metal format. + * For uncompressed formats, the returned value corresponds to the size in bytes of a single texel. + */ + uint32_t getBytesPerBlock(MTLPixelFormat p_format); + + /** Returns the number of planes of the specified chroma-subsampling (YCbCr) DataFormat */ + uint8_t getChromaSubsamplingPlaneCount(DataFormat p_format); + + /** Returns the number of bits per channel of the specified chroma-subsampling (YCbCr) DataFormat */ + uint8_t getChromaSubsamplingComponentBits(DataFormat p_format); + + /** + * Returns the size, in bytes, of a texel of the specified Godot format. + * The returned value may be fractional for certain compressed formats. + */ + float getBytesPerTexel(DataFormat p_format); + + /** + * Returns the size, in bytes, of a texel of the specified Metal format. + * The returned value may be fractional for certain compressed formats. + */ + float getBytesPerTexel(MTLPixelFormat p_format); + + /** + * Returns the size, in bytes, of a row of texels of the specified Godot pixel format. + * + * For compressed formats, this takes into consideration the compression block size, + * and p_texels_per_row should specify the width in texels, not blocks. The result is rounded + * up if p_texels_per_row is not an integer multiple of the compression block width. + */ + size_t getBytesPerRow(DataFormat p_format, uint32_t p_texels_per_row); + + /** + * Returns the size, in bytes, of a row of texels of the specified Metal format. + * + * For compressed formats, this takes into consideration the compression block size, + * and texelsPerRow should specify the width in texels, not blocks. The result is rounded + * up if texelsPerRow is not an integer multiple of the compression block width. + */ + size_t getBytesPerRow(MTLPixelFormat p_format, uint32_t p_texels_per_row); + + /** + * Returns the size, in bytes, of a texture layer of the specified Godot pixel format. + * + * For compressed formats, this takes into consideration the compression block size, + * and p_texel_rows_per_layer should specify the height in texels, not blocks. The result is + * rounded up if p_texel_rows_per_layer is not an integer multiple of the compression block height. + */ + size_t getBytesPerLayer(DataFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer); + + /** + * Returns the size, in bytes, of a texture layer of the specified Metal format. + * For compressed formats, this takes into consideration the compression block size, + * and p_texel_rows_per_layer should specify the height in texels, not blocks. The result is + * rounded up if p_texel_rows_per_layer is not an integer multiple of the compression block height. + */ + size_t getBytesPerLayer(MTLPixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer); + + /** Returns the Metal format capabilities supported by the specified Godot format, without substitution. */ + MTLFmtCaps getCapabilities(DataFormat p_format, bool p_extended = false); + + /** Returns the Metal format capabilities supported by the specified Metal format. */ + MTLFmtCaps getCapabilities(MTLPixelFormat p_format, bool p_extended = false); + + /** + * Returns the Metal MTLVertexFormat corresponding to the specified + * DataFormat as used as a vertex attribute format. + */ + MTLVertexFormat getMTLVertexFormat(DataFormat p_format); + +#pragma mark Construction + + explicit PixelFormats(id<MTLDevice> p_device); + +protected: + id<MTLDevice> device; + + DataFormatDesc &getDataFormatDesc(DataFormat p_format); + DataFormatDesc &getDataFormatDesc(MTLPixelFormat p_format); + MTLFormatDesc &getMTLPixelFormatDesc(MTLPixelFormat p_format); + MTLFormatDesc &getMTLVertexFormatDesc(MTLVertexFormat p_format); + void initDataFormatCapabilities(); + void initMTLPixelFormatCapabilities(); + void initMTLVertexFormatCapabilities(); + void buildMTLFormatMaps(); + void buildDFFormatMaps(); + void modifyMTLFormatCapabilities(); + void modifyMTLFormatCapabilities(id<MTLDevice> p_device); + void addMTLPixelFormatCapabilities(id<MTLDevice> p_device, + MTLFeatureSet p_feature_set, + MTLPixelFormat p_format, + MTLFmtCaps p_caps); + void addMTLPixelFormatCapabilities(id<MTLDevice> p_device, + MTLGPUFamily p_family, + MTLPixelFormat p_format, + MTLFmtCaps p_caps); + void disableMTLPixelFormatCapabilities(MTLPixelFormat p_format, + MTLFmtCaps p_caps); + void disableAllMTLPixelFormatCapabilities(MTLPixelFormat p_format); + void addMTLVertexFormatCapabilities(id<MTLDevice> p_device, + MTLFeatureSet p_feature_set, + MTLVertexFormat p_format, + MTLFmtCaps p_caps); + + DataFormatDesc _dataFormatDescriptions[RD::DATA_FORMAT_MAX]; + MTLFormatDesc _mtlPixelFormatDescriptions[_mtlPixelFormatCount]; + MTLFormatDesc _mtlVertexFormatDescriptions[_mtlVertexFormatCount]; + + // Most Metal formats have small values and are mapped by simple lookup array. + // Outliers are mapped by a map. + uint16_t _mtlFormatDescIndicesByMTLPixelFormatsCore[_mtlPixelFormatCoreCount]; + HashMap<uint32_t, uint32_t> _mtlFormatDescIndicesByMTLPixelFormatsExt; + + uint16_t _mtlFormatDescIndicesByMTLVertexFormats[_mtlVertexFormatCount]; +}; + +#pragma clang diagnostic pop + +#endif // PIXEL_FORMATS_H diff --git a/drivers/metal/pixel_formats.mm b/drivers/metal/pixel_formats.mm new file mode 100644 index 0000000000..ac737b3f0a --- /dev/null +++ b/drivers/metal/pixel_formats.mm @@ -0,0 +1,1298 @@ +/**************************************************************************/ +/* pixel_formats.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#import "pixel_formats.h" + +#import "metal_utils.h" + +#if TARGET_OS_IPHONE || TARGET_OS_TV +#if !(__IPHONE_OS_VERSION_MAX_ALLOWED >= 160400) // iOS/tvOS 16.4 +#define MTLPixelFormatBC1_RGBA MTLPixelFormatInvalid +#define MTLPixelFormatBC1_RGBA_sRGB MTLPixelFormatInvalid +#define MTLPixelFormatBC2_RGBA MTLPixelFormatInvalid +#define MTLPixelFormatBC2_RGBA_sRGB MTLPixelFormatInvalid +#define MTLPixelFormatBC3_RGBA MTLPixelFormatInvalid +#define MTLPixelFormatBC3_RGBA_sRGB MTLPixelFormatInvalid +#define MTLPixelFormatBC4_RUnorm MTLPixelFormatInvalid +#define MTLPixelFormatBC4_RSnorm MTLPixelFormatInvalid +#define MTLPixelFormatBC5_RGUnorm MTLPixelFormatInvalid +#define MTLPixelFormatBC5_RGSnorm MTLPixelFormatInvalid +#define MTLPixelFormatBC6H_RGBUfloat MTLPixelFormatInvalid +#define MTLPixelFormatBC6H_RGBFloat MTLPixelFormatInvalid +#define MTLPixelFormatBC7_RGBAUnorm MTLPixelFormatInvalid +#define MTLPixelFormatBC7_RGBAUnorm_sRGB MTLPixelFormatInvalid +#endif + +#define MTLPixelFormatDepth16Unorm_Stencil8 MTLPixelFormatDepth32Float_Stencil8 +#define MTLPixelFormatDepth24Unorm_Stencil8 MTLPixelFormatInvalid +#define MTLPixelFormatX24_Stencil8 MTLPixelFormatInvalid +#endif + +#if TARGET_OS_TV +#define MTLPixelFormatASTC_4x4_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_5x4_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_5x5_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_6x5_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_6x6_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_8x5_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_8x6_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_8x8_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_10x5_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_10x6_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_10x8_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_10x10_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_12x10_HDR MTLPixelFormatInvalid +#define MTLPixelFormatASTC_12x12_HDR MTLPixelFormatInvalid +#endif + +#if !((__MAC_OS_X_VERSION_MAX_ALLOWED >= 140000) || (__IPHONE_OS_VERSION_MAX_ALLOWED >= 170000)) // Xcode 15 +#define MTLVertexFormatFloatRG11B10 MTLVertexFormatInvalid +#define MTLVertexFormatFloatRGB9E5 MTLVertexFormatInvalid +#endif + +/** Selects and returns one of the values, based on the platform OS. */ +_FORCE_INLINE_ constexpr MTLFmtCaps select_platform_caps(MTLFmtCaps p_macOS_val, MTLFmtCaps p_iOS_val) { +#if (TARGET_OS_IOS || TARGET_OS_TV) && !TARGET_OS_MACCATALYST + return p_iOS_val; +#elif TARGET_OS_OSX + return p_macOS_val; +#else +#error "unsupported platform" +#endif +} + +template <typename T> +void clear(T *p_val, size_t p_count = 1) { + memset(p_val, 0, sizeof(T) * p_count); +} + +#pragma mark - +#pragma mark PixelFormats + +bool PixelFormats::isSupported(DataFormat p_format) { + return getDataFormatDesc(p_format).isSupported(); +} + +bool PixelFormats::isSupportedOrSubstitutable(DataFormat p_format) { + return getDataFormatDesc(p_format).isSupportedOrSubstitutable(); +} + +bool PixelFormats::isPVRTCFormat(MTLPixelFormat p_format) { + switch (p_format) { + case MTLPixelFormatPVRTC_RGBA_2BPP: + case MTLPixelFormatPVRTC_RGBA_2BPP_sRGB: + case MTLPixelFormatPVRTC_RGBA_4BPP: + case MTLPixelFormatPVRTC_RGBA_4BPP_sRGB: + case MTLPixelFormatPVRTC_RGB_2BPP: + case MTLPixelFormatPVRTC_RGB_2BPP_sRGB: + case MTLPixelFormatPVRTC_RGB_4BPP: + case MTLPixelFormatPVRTC_RGB_4BPP_sRGB: + return true; + default: + return false; + } +} + +MTLFormatType PixelFormats::getFormatType(DataFormat p_format) { + return getDataFormatDesc(p_format).formatType; +} + +MTLFormatType PixelFormats::getFormatType(MTLPixelFormat p_formt) { + return getDataFormatDesc(p_formt).formatType; +} + +MTLPixelFormat PixelFormats::getMTLPixelFormat(DataFormat p_format) { + DataFormatDesc &dfDesc = getDataFormatDesc(p_format); + MTLPixelFormat mtlPixFmt = dfDesc.mtlPixelFormat; + + // If the MTLPixelFormat is not supported but DataFormat is valid, + // attempt to substitute a different format. + if (mtlPixFmt == MTLPixelFormatInvalid && p_format != RD::DATA_FORMAT_MAX && dfDesc.chromaSubsamplingPlaneCount <= 1) { + mtlPixFmt = dfDesc.mtlPixelFormatSubstitute; + } + + return mtlPixFmt; +} + +RD::DataFormat PixelFormats::getDataFormat(MTLPixelFormat p_format) { + return getMTLPixelFormatDesc(p_format).dataFormat; +} + +uint32_t PixelFormats::getBytesPerBlock(DataFormat p_format) { + return getDataFormatDesc(p_format).bytesPerBlock; +} + +uint32_t PixelFormats::getBytesPerBlock(MTLPixelFormat p_format) { + return getDataFormatDesc(p_format).bytesPerBlock; +} + +uint8_t PixelFormats::getChromaSubsamplingPlaneCount(DataFormat p_format) { + return getDataFormatDesc(p_format).chromaSubsamplingPlaneCount; +} + +uint8_t PixelFormats::getChromaSubsamplingComponentBits(DataFormat p_format) { + return getDataFormatDesc(p_format).chromaSubsamplingComponentBits; +} + +float PixelFormats::getBytesPerTexel(DataFormat p_format) { + return getDataFormatDesc(p_format).bytesPerTexel(); +} + +float PixelFormats::getBytesPerTexel(MTLPixelFormat p_format) { + return getDataFormatDesc(p_format).bytesPerTexel(); +} + +size_t PixelFormats::getBytesPerRow(DataFormat p_format, uint32_t p_texels_per_row) { + DataFormatDesc &dfDesc = getDataFormatDesc(p_format); + return Math::division_round_up(p_texels_per_row, dfDesc.blockTexelSize.width) * dfDesc.bytesPerBlock; +} + +size_t PixelFormats::getBytesPerRow(MTLPixelFormat p_format, uint32_t p_texels_per_row) { + DataFormatDesc &dfDesc = getDataFormatDesc(p_format); + return Math::division_round_up(p_texels_per_row, dfDesc.blockTexelSize.width) * dfDesc.bytesPerBlock; +} + +size_t PixelFormats::getBytesPerLayer(DataFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer) { + return Math::division_round_up(p_texel_rows_per_layer, getDataFormatDesc(p_format).blockTexelSize.height) * p_bytes_per_row; +} + +size_t PixelFormats::getBytesPerLayer(MTLPixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer) { + return Math::division_round_up(p_texel_rows_per_layer, getDataFormatDesc(p_format).blockTexelSize.height) * p_bytes_per_row; +} + +MTLFmtCaps PixelFormats::getCapabilities(DataFormat p_format, bool p_extended) { + return getCapabilities(getDataFormatDesc(p_format).mtlPixelFormat, p_extended); +} + +MTLFmtCaps PixelFormats::getCapabilities(MTLPixelFormat p_format, bool p_extended) { + MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(p_format); + MTLFmtCaps caps = mtlDesc.mtlFmtCaps; + if (!p_extended || mtlDesc.mtlViewClass == MTLViewClass::None) { + return caps; + } + // Now get caps of all formats in the view class. + for (MTLFormatDesc &otherDesc : _mtlPixelFormatDescriptions) { + if (otherDesc.mtlViewClass == mtlDesc.mtlViewClass) { + caps |= otherDesc.mtlFmtCaps; + } + } + return caps; +} + +MTLVertexFormat PixelFormats::getMTLVertexFormat(DataFormat p_format) { + DataFormatDesc &dfDesc = getDataFormatDesc(p_format); + MTLVertexFormat format = dfDesc.mtlVertexFormat; + + if (format == MTLVertexFormatInvalid) { + String errMsg; + errMsg += "DataFormat "; + errMsg += dfDesc.name; + errMsg += " is not supported for vertex buffers on this device."; + + if (dfDesc.vertexIsSupportedOrSubstitutable()) { + format = dfDesc.mtlVertexFormatSubstitute; + + DataFormatDesc &dfDescSubs = getDataFormatDesc(getMTLVertexFormatDesc(format).dataFormat); + errMsg += " Using DataFormat "; + errMsg += dfDescSubs.name; + errMsg += " instead."; + } + WARN_PRINT(errMsg); + } + + return format; +} + +DataFormatDesc &PixelFormats::getDataFormatDesc(DataFormat p_format) { + CRASH_BAD_INDEX_MSG(p_format, RD::DATA_FORMAT_MAX, "Attempting to describe an invalid DataFormat"); + return _dataFormatDescriptions[p_format]; +} + +DataFormatDesc &PixelFormats::getDataFormatDesc(MTLPixelFormat p_format) { + return getDataFormatDesc(getMTLPixelFormatDesc(p_format).dataFormat); +} + +// Return a reference to the Metal format descriptor corresponding to the MTLPixelFormat. +MTLFormatDesc &PixelFormats::getMTLPixelFormatDesc(MTLPixelFormat p_format) { + uint16_t fmtIdx = ((p_format < _mtlPixelFormatCoreCount) + ? _mtlFormatDescIndicesByMTLPixelFormatsCore[p_format] + : _mtlFormatDescIndicesByMTLPixelFormatsExt[p_format]); + return _mtlPixelFormatDescriptions[fmtIdx]; +} + +// Return a reference to the Metal format descriptor corresponding to the MTLVertexFormat. +MTLFormatDesc &PixelFormats::getMTLVertexFormatDesc(MTLVertexFormat p_format) { + uint16_t fmtIdx = (p_format < _mtlVertexFormatCount) ? _mtlFormatDescIndicesByMTLVertexFormats[p_format] : 0; + return _mtlVertexFormatDescriptions[fmtIdx]; +} + +PixelFormats::PixelFormats(id<MTLDevice> p_device) : + device(p_device) { + initMTLPixelFormatCapabilities(); + initMTLVertexFormatCapabilities(); + buildMTLFormatMaps(); + modifyMTLFormatCapabilities(); + + initDataFormatCapabilities(); + buildDFFormatMaps(); +} + +#define addDfFormatDescFull(DATA_FMT, MTL_FMT, MTL_FMT_ALT, MTL_VTX_FMT, MTL_VTX_FMT_ALT, CSPC, CSCB, BLK_W, BLK_H, BLK_BYTE_CNT, MVK_FMT_TYPE) \ + CRASH_BAD_INDEX_MSG(RD::DATA_FORMAT_##DATA_FMT, RD::DATA_FORMAT_MAX, "Attempting to describe too many DataFormats"); \ + _dataFormatDescriptions[RD::DATA_FORMAT_##DATA_FMT] = { RD::DATA_FORMAT_##DATA_FMT, MTLPixelFormat##MTL_FMT, MTLPixelFormat##MTL_FMT_ALT, MTLVertexFormat##MTL_VTX_FMT, MTLVertexFormat##MTL_VTX_FMT_ALT, \ + CSPC, CSCB, { BLK_W, BLK_H }, BLK_BYTE_CNT, MTLFormatType::MVK_FMT_TYPE, "DATA_FORMAT_" #DATA_FMT, false } + +#define addDataFormatDesc(DATA_FMT, MTL_FMT, MTL_FMT_ALT, MTL_VTX_FMT, MTL_VTX_FMT_ALT, BLK_W, BLK_H, BLK_BYTE_CNT, MVK_FMT_TYPE) \ + addDfFormatDescFull(DATA_FMT, MTL_FMT, MTL_FMT_ALT, MTL_VTX_FMT, MTL_VTX_FMT_ALT, 0, 0, BLK_W, BLK_H, BLK_BYTE_CNT, MVK_FMT_TYPE) + +#define addDfFormatDescChromaSubsampling(DATA_FMT, MTL_FMT, CSPC, CSCB, BLK_W, BLK_H, BLK_BYTE_CNT) \ + addDfFormatDescFull(DATA_FMT, MTL_FMT, Invalid, Invalid, Invalid, CSPC, CSCB, BLK_W, BLK_H, BLK_BYTE_CNT, ColorFloat) + +void PixelFormats::initDataFormatCapabilities() { + clear(_dataFormatDescriptions, RD::DATA_FORMAT_MAX); + + addDataFormatDesc(R4G4_UNORM_PACK8, Invalid, Invalid, Invalid, Invalid, 1, 1, 1, ColorFloat); + addDataFormatDesc(R4G4B4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(B4G4R4A4_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat); + + addDataFormatDesc(R5G6B5_UNORM_PACK16, B5G6R5Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(B5G6R5_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(R5G5B5A1_UNORM_PACK16, A1BGR5Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(B5G5R5A1_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(A1R5G5B5_UNORM_PACK16, BGR5A1Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat); + + addDataFormatDesc(R8_UNORM, R8Unorm, Invalid, UCharNormalized, UChar2Normalized, 1, 1, 1, ColorFloat); + addDataFormatDesc(R8_SNORM, R8Snorm, Invalid, CharNormalized, Char2Normalized, 1, 1, 1, ColorFloat); + addDataFormatDesc(R8_USCALED, Invalid, Invalid, UChar, UChar2, 1, 1, 1, ColorFloat); + addDataFormatDesc(R8_SSCALED, Invalid, Invalid, Char, Char2, 1, 1, 1, ColorFloat); + addDataFormatDesc(R8_UINT, R8Uint, Invalid, UChar, UChar2, 1, 1, 1, ColorUInt8); + addDataFormatDesc(R8_SINT, R8Sint, Invalid, Char, Char2, 1, 1, 1, ColorInt8); + addDataFormatDesc(R8_SRGB, R8Unorm_sRGB, Invalid, UCharNormalized, UChar2Normalized, 1, 1, 1, ColorFloat); + + addDataFormatDesc(R8G8_UNORM, RG8Unorm, Invalid, UChar2Normalized, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(R8G8_SNORM, RG8Snorm, Invalid, Char2Normalized, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(R8G8_USCALED, Invalid, Invalid, UChar2, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(R8G8_SSCALED, Invalid, Invalid, Char2, Invalid, 1, 1, 2, ColorFloat); + addDataFormatDesc(R8G8_UINT, RG8Uint, Invalid, UChar2, Invalid, 1, 1, 2, ColorUInt8); + addDataFormatDesc(R8G8_SINT, RG8Sint, Invalid, Char2, Invalid, 1, 1, 2, ColorInt8); + addDataFormatDesc(R8G8_SRGB, RG8Unorm_sRGB, Invalid, UChar2Normalized, Invalid, 1, 1, 2, ColorFloat); + + addDataFormatDesc(R8G8B8_UNORM, Invalid, Invalid, UChar3Normalized, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(R8G8B8_SNORM, Invalid, Invalid, Char3Normalized, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(R8G8B8_USCALED, Invalid, Invalid, UChar3, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(R8G8B8_SSCALED, Invalid, Invalid, Char3, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(R8G8B8_UINT, Invalid, Invalid, UChar3, Invalid, 1, 1, 3, ColorUInt8); + addDataFormatDesc(R8G8B8_SINT, Invalid, Invalid, Char3, Invalid, 1, 1, 3, ColorInt8); + addDataFormatDesc(R8G8B8_SRGB, Invalid, Invalid, UChar3Normalized, Invalid, 1, 1, 3, ColorFloat); + + addDataFormatDesc(B8G8R8_UNORM, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(B8G8R8_SNORM, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(B8G8R8_USCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(B8G8R8_SSCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat); + addDataFormatDesc(B8G8R8_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorUInt8); + addDataFormatDesc(B8G8R8_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorInt8); + addDataFormatDesc(B8G8R8_SRGB, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat); + + addDataFormatDesc(R8G8B8A8_UNORM, RGBA8Unorm, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R8G8B8A8_SNORM, RGBA8Snorm, Invalid, Char4Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R8G8B8A8_USCALED, Invalid, Invalid, UChar4, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R8G8B8A8_SSCALED, Invalid, Invalid, Char4, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R8G8B8A8_UINT, RGBA8Uint, Invalid, UChar4, Invalid, 1, 1, 4, ColorUInt8); + addDataFormatDesc(R8G8B8A8_SINT, RGBA8Sint, Invalid, Char4, Invalid, 1, 1, 4, ColorInt8); + addDataFormatDesc(R8G8B8A8_SRGB, RGBA8Unorm_sRGB, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat); + + addDataFormatDesc(B8G8R8A8_UNORM, BGRA8Unorm, Invalid, UChar4Normalized_BGRA, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(B8G8R8A8_SNORM, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(B8G8R8A8_USCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(B8G8R8A8_SSCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(B8G8R8A8_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorUInt8); + addDataFormatDesc(B8G8R8A8_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorInt8); + addDataFormatDesc(B8G8R8A8_SRGB, BGRA8Unorm_sRGB, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + + addDataFormatDesc(A8B8G8R8_UNORM_PACK32, RGBA8Unorm, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A8B8G8R8_SNORM_PACK32, RGBA8Snorm, Invalid, Char4Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A8B8G8R8_USCALED_PACK32, Invalid, Invalid, UChar4, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A8B8G8R8_SSCALED_PACK32, Invalid, Invalid, Char4, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A8B8G8R8_UINT_PACK32, RGBA8Uint, Invalid, UChar4, Invalid, 1, 1, 4, ColorUInt8); + addDataFormatDesc(A8B8G8R8_SINT_PACK32, RGBA8Sint, Invalid, Char4, Invalid, 1, 1, 4, ColorInt8); + addDataFormatDesc(A8B8G8R8_SRGB_PACK32, RGBA8Unorm_sRGB, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat); + + addDataFormatDesc(A2R10G10B10_UNORM_PACK32, BGR10A2Unorm, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2R10G10B10_SNORM_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2R10G10B10_USCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2R10G10B10_SSCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2R10G10B10_UINT_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorUInt16); + addDataFormatDesc(A2R10G10B10_SINT_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorInt16); + + addDataFormatDesc(A2B10G10R10_UNORM_PACK32, RGB10A2Unorm, Invalid, UInt1010102Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2B10G10R10_SNORM_PACK32, Invalid, Invalid, Int1010102Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2B10G10R10_USCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2B10G10R10_SSCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(A2B10G10R10_UINT_PACK32, RGB10A2Uint, Invalid, Invalid, Invalid, 1, 1, 4, ColorUInt16); + addDataFormatDesc(A2B10G10R10_SINT_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorInt16); + + addDataFormatDesc(R16_UNORM, R16Unorm, Invalid, UShortNormalized, UShort2Normalized, 1, 1, 2, ColorFloat); + addDataFormatDesc(R16_SNORM, R16Snorm, Invalid, ShortNormalized, Short2Normalized, 1, 1, 2, ColorFloat); + addDataFormatDesc(R16_USCALED, Invalid, Invalid, UShort, UShort2, 1, 1, 2, ColorFloat); + addDataFormatDesc(R16_SSCALED, Invalid, Invalid, Short, Short2, 1, 1, 2, ColorFloat); + addDataFormatDesc(R16_UINT, R16Uint, Invalid, UShort, UShort2, 1, 1, 2, ColorUInt16); + addDataFormatDesc(R16_SINT, R16Sint, Invalid, Short, Short2, 1, 1, 2, ColorInt16); + addDataFormatDesc(R16_SFLOAT, R16Float, Invalid, Half, Half2, 1, 1, 2, ColorFloat); + + addDataFormatDesc(R16G16_UNORM, RG16Unorm, Invalid, UShort2Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R16G16_SNORM, RG16Snorm, Invalid, Short2Normalized, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R16G16_USCALED, Invalid, Invalid, UShort2, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R16G16_SSCALED, Invalid, Invalid, Short2, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(R16G16_UINT, RG16Uint, Invalid, UShort2, Invalid, 1, 1, 4, ColorUInt16); + addDataFormatDesc(R16G16_SINT, RG16Sint, Invalid, Short2, Invalid, 1, 1, 4, ColorInt16); + addDataFormatDesc(R16G16_SFLOAT, RG16Float, Invalid, Half2, Invalid, 1, 1, 4, ColorFloat); + + addDataFormatDesc(R16G16B16_UNORM, Invalid, Invalid, UShort3Normalized, Invalid, 1, 1, 6, ColorFloat); + addDataFormatDesc(R16G16B16_SNORM, Invalid, Invalid, Short3Normalized, Invalid, 1, 1, 6, ColorFloat); + addDataFormatDesc(R16G16B16_USCALED, Invalid, Invalid, UShort3, Invalid, 1, 1, 6, ColorFloat); + addDataFormatDesc(R16G16B16_SSCALED, Invalid, Invalid, Short3, Invalid, 1, 1, 6, ColorFloat); + addDataFormatDesc(R16G16B16_UINT, Invalid, Invalid, UShort3, Invalid, 1, 1, 6, ColorUInt16); + addDataFormatDesc(R16G16B16_SINT, Invalid, Invalid, Short3, Invalid, 1, 1, 6, ColorInt16); + addDataFormatDesc(R16G16B16_SFLOAT, Invalid, Invalid, Half3, Invalid, 1, 1, 6, ColorFloat); + + addDataFormatDesc(R16G16B16A16_UNORM, RGBA16Unorm, Invalid, UShort4Normalized, Invalid, 1, 1, 8, ColorFloat); + addDataFormatDesc(R16G16B16A16_SNORM, RGBA16Snorm, Invalid, Short4Normalized, Invalid, 1, 1, 8, ColorFloat); + addDataFormatDesc(R16G16B16A16_USCALED, Invalid, Invalid, UShort4, Invalid, 1, 1, 8, ColorFloat); + addDataFormatDesc(R16G16B16A16_SSCALED, Invalid, Invalid, Short4, Invalid, 1, 1, 8, ColorFloat); + addDataFormatDesc(R16G16B16A16_UINT, RGBA16Uint, Invalid, UShort4, Invalid, 1, 1, 8, ColorUInt16); + addDataFormatDesc(R16G16B16A16_SINT, RGBA16Sint, Invalid, Short4, Invalid, 1, 1, 8, ColorInt16); + addDataFormatDesc(R16G16B16A16_SFLOAT, RGBA16Float, Invalid, Half4, Invalid, 1, 1, 8, ColorFloat); + + addDataFormatDesc(R32_UINT, R32Uint, Invalid, UInt, Invalid, 1, 1, 4, ColorUInt32); + addDataFormatDesc(R32_SINT, R32Sint, Invalid, Int, Invalid, 1, 1, 4, ColorInt32); + addDataFormatDesc(R32_SFLOAT, R32Float, Invalid, Float, Invalid, 1, 1, 4, ColorFloat); + + addDataFormatDesc(R32G32_UINT, RG32Uint, Invalid, UInt2, Invalid, 1, 1, 8, ColorUInt32); + addDataFormatDesc(R32G32_SINT, RG32Sint, Invalid, Int2, Invalid, 1, 1, 8, ColorInt32); + addDataFormatDesc(R32G32_SFLOAT, RG32Float, Invalid, Float2, Invalid, 1, 1, 8, ColorFloat); + + addDataFormatDesc(R32G32B32_UINT, Invalid, Invalid, UInt3, Invalid, 1, 1, 12, ColorUInt32); + addDataFormatDesc(R32G32B32_SINT, Invalid, Invalid, Int3, Invalid, 1, 1, 12, ColorInt32); + addDataFormatDesc(R32G32B32_SFLOAT, Invalid, Invalid, Float3, Invalid, 1, 1, 12, ColorFloat); + + addDataFormatDesc(R32G32B32A32_UINT, RGBA32Uint, Invalid, UInt4, Invalid, 1, 1, 16, ColorUInt32); + addDataFormatDesc(R32G32B32A32_SINT, RGBA32Sint, Invalid, Int4, Invalid, 1, 1, 16, ColorInt32); + addDataFormatDesc(R32G32B32A32_SFLOAT, RGBA32Float, Invalid, Float4, Invalid, 1, 1, 16, ColorFloat); + + addDataFormatDesc(R64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 8, ColorFloat); + addDataFormatDesc(R64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 8, ColorFloat); + addDataFormatDesc(R64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 8, ColorFloat); + + addDataFormatDesc(R64G64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 16, ColorFloat); + addDataFormatDesc(R64G64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 16, ColorFloat); + addDataFormatDesc(R64G64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 16, ColorFloat); + + addDataFormatDesc(R64G64B64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 24, ColorFloat); + addDataFormatDesc(R64G64B64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 24, ColorFloat); + addDataFormatDesc(R64G64B64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 24, ColorFloat); + + addDataFormatDesc(R64G64B64A64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 32, ColorFloat); + addDataFormatDesc(R64G64B64A64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 32, ColorFloat); + addDataFormatDesc(R64G64B64A64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 32, ColorFloat); + + addDataFormatDesc(B10G11R11_UFLOAT_PACK32, RG11B10Float, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + addDataFormatDesc(E5B9G9R9_UFLOAT_PACK32, RGB9E5Float, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat); + + addDataFormatDesc(D32_SFLOAT, Depth32Float, Invalid, Invalid, Invalid, 1, 1, 4, DepthStencil); + addDataFormatDesc(D32_SFLOAT_S8_UINT, Depth32Float_Stencil8, Invalid, Invalid, Invalid, 1, 1, 5, DepthStencil); + + addDataFormatDesc(S8_UINT, Stencil8, Invalid, Invalid, Invalid, 1, 1, 1, DepthStencil); + + addDataFormatDesc(D16_UNORM, Depth16Unorm, Depth32Float, Invalid, Invalid, 1, 1, 2, DepthStencil); + addDataFormatDesc(D16_UNORM_S8_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, DepthStencil); + addDataFormatDesc(D24_UNORM_S8_UINT, Depth24Unorm_Stencil8, Depth32Float_Stencil8, Invalid, Invalid, 1, 1, 4, DepthStencil); + + addDataFormatDesc(X8_D24_UNORM_PACK32, Invalid, Depth24Unorm_Stencil8, Invalid, Invalid, 1, 1, 4, DepthStencil); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability" + + addDataFormatDesc(BC1_RGB_UNORM_BLOCK, BC1_RGBA, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(BC1_RGB_SRGB_BLOCK, BC1_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(BC1_RGBA_UNORM_BLOCK, BC1_RGBA, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(BC1_RGBA_SRGB_BLOCK, BC1_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + + addDataFormatDesc(BC2_UNORM_BLOCK, BC2_RGBA, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(BC2_SRGB_BLOCK, BC2_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + + addDataFormatDesc(BC3_UNORM_BLOCK, BC3_RGBA, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(BC3_SRGB_BLOCK, BC3_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + + addDataFormatDesc(BC4_UNORM_BLOCK, BC4_RUnorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(BC4_SNORM_BLOCK, BC4_RSnorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + + addDataFormatDesc(BC5_UNORM_BLOCK, BC5_RGUnorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(BC5_SNORM_BLOCK, BC5_RGSnorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + + addDataFormatDesc(BC6H_UFLOAT_BLOCK, BC6H_RGBUfloat, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(BC6H_SFLOAT_BLOCK, BC6H_RGBFloat, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + + addDataFormatDesc(BC7_UNORM_BLOCK, BC7_RGBAUnorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(BC7_SRGB_BLOCK, BC7_RGBAUnorm_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + +#pragma clang diagnostic pop + + addDataFormatDesc(ETC2_R8G8B8_UNORM_BLOCK, ETC2_RGB8, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(ETC2_R8G8B8_SRGB_BLOCK, ETC2_RGB8_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(ETC2_R8G8B8A1_UNORM_BLOCK, ETC2_RGB8A1, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(ETC2_R8G8B8A1_SRGB_BLOCK, ETC2_RGB8A1_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + + addDataFormatDesc(ETC2_R8G8B8A8_UNORM_BLOCK, EAC_RGBA8, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(ETC2_R8G8B8A8_SRGB_BLOCK, EAC_RGBA8_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + + addDataFormatDesc(EAC_R11_UNORM_BLOCK, EAC_R11Unorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + addDataFormatDesc(EAC_R11_SNORM_BLOCK, EAC_R11Snorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed); + + addDataFormatDesc(EAC_R11G11_UNORM_BLOCK, EAC_RG11Unorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(EAC_R11G11_SNORM_BLOCK, EAC_RG11Snorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + + addDataFormatDesc(ASTC_4x4_UNORM_BLOCK, ASTC_4x4_LDR, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(ASTC_4x4_SRGB_BLOCK, ASTC_4x4_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed); + addDataFormatDesc(ASTC_5x4_UNORM_BLOCK, ASTC_5x4_LDR, Invalid, Invalid, Invalid, 5, 4, 16, Compressed); + addDataFormatDesc(ASTC_5x4_SRGB_BLOCK, ASTC_5x4_sRGB, Invalid, Invalid, Invalid, 5, 4, 16, Compressed); + addDataFormatDesc(ASTC_5x5_UNORM_BLOCK, ASTC_5x5_LDR, Invalid, Invalid, Invalid, 5, 5, 16, Compressed); + addDataFormatDesc(ASTC_5x5_SRGB_BLOCK, ASTC_5x5_sRGB, Invalid, Invalid, Invalid, 5, 5, 16, Compressed); + addDataFormatDesc(ASTC_6x5_UNORM_BLOCK, ASTC_6x5_LDR, Invalid, Invalid, Invalid, 6, 5, 16, Compressed); + addDataFormatDesc(ASTC_6x5_SRGB_BLOCK, ASTC_6x5_sRGB, Invalid, Invalid, Invalid, 6, 5, 16, Compressed); + addDataFormatDesc(ASTC_6x6_UNORM_BLOCK, ASTC_6x6_LDR, Invalid, Invalid, Invalid, 6, 6, 16, Compressed); + addDataFormatDesc(ASTC_6x6_SRGB_BLOCK, ASTC_6x6_sRGB, Invalid, Invalid, Invalid, 6, 6, 16, Compressed); + addDataFormatDesc(ASTC_8x5_UNORM_BLOCK, ASTC_8x5_LDR, Invalid, Invalid, Invalid, 8, 5, 16, Compressed); + addDataFormatDesc(ASTC_8x5_SRGB_BLOCK, ASTC_8x5_sRGB, Invalid, Invalid, Invalid, 8, 5, 16, Compressed); + addDataFormatDesc(ASTC_8x6_UNORM_BLOCK, ASTC_8x6_LDR, Invalid, Invalid, Invalid, 8, 6, 16, Compressed); + addDataFormatDesc(ASTC_8x6_SRGB_BLOCK, ASTC_8x6_sRGB, Invalid, Invalid, Invalid, 8, 6, 16, Compressed); + addDataFormatDesc(ASTC_8x8_UNORM_BLOCK, ASTC_8x8_LDR, Invalid, Invalid, Invalid, 8, 8, 16, Compressed); + addDataFormatDesc(ASTC_8x8_SRGB_BLOCK, ASTC_8x8_sRGB, Invalid, Invalid, Invalid, 8, 8, 16, Compressed); + addDataFormatDesc(ASTC_10x5_UNORM_BLOCK, ASTC_10x5_LDR, Invalid, Invalid, Invalid, 10, 5, 16, Compressed); + addDataFormatDesc(ASTC_10x5_SRGB_BLOCK, ASTC_10x5_sRGB, Invalid, Invalid, Invalid, 10, 5, 16, Compressed); + addDataFormatDesc(ASTC_10x6_UNORM_BLOCK, ASTC_10x6_LDR, Invalid, Invalid, Invalid, 10, 6, 16, Compressed); + addDataFormatDesc(ASTC_10x6_SRGB_BLOCK, ASTC_10x6_sRGB, Invalid, Invalid, Invalid, 10, 6, 16, Compressed); + addDataFormatDesc(ASTC_10x8_UNORM_BLOCK, ASTC_10x8_LDR, Invalid, Invalid, Invalid, 10, 8, 16, Compressed); + addDataFormatDesc(ASTC_10x8_SRGB_BLOCK, ASTC_10x8_sRGB, Invalid, Invalid, Invalid, 10, 8, 16, Compressed); + addDataFormatDesc(ASTC_10x10_UNORM_BLOCK, ASTC_10x10_LDR, Invalid, Invalid, Invalid, 10, 10, 16, Compressed); + addDataFormatDesc(ASTC_10x10_SRGB_BLOCK, ASTC_10x10_sRGB, Invalid, Invalid, Invalid, 10, 10, 16, Compressed); + addDataFormatDesc(ASTC_12x10_UNORM_BLOCK, ASTC_12x10_LDR, Invalid, Invalid, Invalid, 12, 10, 16, Compressed); + addDataFormatDesc(ASTC_12x10_SRGB_BLOCK, ASTC_12x10_sRGB, Invalid, Invalid, Invalid, 12, 10, 16, Compressed); + addDataFormatDesc(ASTC_12x12_UNORM_BLOCK, ASTC_12x12_LDR, Invalid, Invalid, Invalid, 12, 12, 16, Compressed); + addDataFormatDesc(ASTC_12x12_SRGB_BLOCK, ASTC_12x12_sRGB, Invalid, Invalid, Invalid, 12, 12, 16, Compressed); + + addDfFormatDescChromaSubsampling(G8B8G8R8_422_UNORM, GBGR422, 1, 8, 2, 1, 4); + addDfFormatDescChromaSubsampling(B8G8R8G8_422_UNORM, BGRG422, 1, 8, 2, 1, 4); + addDfFormatDescChromaSubsampling(G8_B8_R8_3PLANE_420_UNORM, Invalid, 3, 8, 2, 2, 6); + addDfFormatDescChromaSubsampling(G8_B8R8_2PLANE_420_UNORM, Invalid, 2, 8, 2, 2, 6); + addDfFormatDescChromaSubsampling(G8_B8_R8_3PLANE_422_UNORM, Invalid, 3, 8, 2, 1, 4); + addDfFormatDescChromaSubsampling(G8_B8R8_2PLANE_422_UNORM, Invalid, 2, 8, 2, 1, 4); + addDfFormatDescChromaSubsampling(G8_B8_R8_3PLANE_444_UNORM, Invalid, 3, 8, 1, 1, 3); + addDfFormatDescChromaSubsampling(R10X6_UNORM_PACK16, R16Unorm, 0, 10, 1, 1, 2); + addDfFormatDescChromaSubsampling(R10X6G10X6_UNORM_2PACK16, RG16Unorm, 0, 10, 1, 1, 4); + addDfFormatDescChromaSubsampling(R10X6G10X6B10X6A10X6_UNORM_4PACK16, RGBA16Unorm, 0, 10, 1, 1, 8); + addDfFormatDescChromaSubsampling(G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, Invalid, 1, 10, 2, 1, 8); + addDfFormatDescChromaSubsampling(B10X6G10X6R10X6G10X6_422_UNORM_4PACK16, Invalid, 1, 10, 2, 1, 8); + addDfFormatDescChromaSubsampling(G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16, Invalid, 3, 10, 2, 2, 12); + addDfFormatDescChromaSubsampling(G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, Invalid, 2, 10, 2, 2, 12); + addDfFormatDescChromaSubsampling(G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16, Invalid, 3, 10, 2, 1, 8); + addDfFormatDescChromaSubsampling(G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, Invalid, 2, 10, 2, 1, 8); + addDfFormatDescChromaSubsampling(G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16, Invalid, 3, 10, 1, 1, 6); + addDfFormatDescChromaSubsampling(R12X4_UNORM_PACK16, R16Unorm, 0, 12, 1, 1, 2); + addDfFormatDescChromaSubsampling(R12X4G12X4_UNORM_2PACK16, RG16Unorm, 0, 12, 1, 1, 4); + addDfFormatDescChromaSubsampling(R12X4G12X4B12X4A12X4_UNORM_4PACK16, RGBA16Unorm, 0, 12, 1, 1, 8); + addDfFormatDescChromaSubsampling(G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, Invalid, 1, 12, 2, 1, 8); + addDfFormatDescChromaSubsampling(B12X4G12X4R12X4G12X4_422_UNORM_4PACK16, Invalid, 1, 12, 2, 1, 8); + addDfFormatDescChromaSubsampling(G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16, Invalid, 3, 12, 2, 2, 12); + addDfFormatDescChromaSubsampling(G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, Invalid, 2, 12, 2, 2, 12); + addDfFormatDescChromaSubsampling(G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16, Invalid, 3, 12, 2, 1, 8); + addDfFormatDescChromaSubsampling(G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, Invalid, 2, 12, 2, 1, 8); + addDfFormatDescChromaSubsampling(G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16, Invalid, 3, 12, 1, 1, 6); + addDfFormatDescChromaSubsampling(G16B16G16R16_422_UNORM, Invalid, 1, 16, 2, 1, 8); + addDfFormatDescChromaSubsampling(B16G16R16G16_422_UNORM, Invalid, 1, 16, 2, 1, 8); + addDfFormatDescChromaSubsampling(G16_B16_R16_3PLANE_420_UNORM, Invalid, 3, 16, 2, 2, 12); + addDfFormatDescChromaSubsampling(G16_B16R16_2PLANE_420_UNORM, Invalid, 2, 16, 2, 2, 12); + addDfFormatDescChromaSubsampling(G16_B16_R16_3PLANE_422_UNORM, Invalid, 3, 16, 2, 1, 8); + addDfFormatDescChromaSubsampling(G16_B16R16_2PLANE_422_UNORM, Invalid, 2, 16, 2, 1, 8); + addDfFormatDescChromaSubsampling(G16_B16_R16_3PLANE_444_UNORM, Invalid, 3, 16, 1, 1, 6); +} + +#define addMTLPixelFormatDescFull(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT_LINEAR) \ + CRASH_BAD_INDEX_MSG(fmtIdx, _mtlPixelFormatCount, "Adding too many pixel formats"); \ + _mtlPixelFormatDescriptions[fmtIdx++] = { .mtlPixelFormat = MTLPixelFormat##MTL_FMT, RD::DATA_FORMAT_MAX, select_platform_caps(kMTLFmtCaps##MACOS_CAPS, kMTLFmtCaps##IOS_CAPS), MTLViewClass::VIEW_CLASS, MTLPixelFormat##MTL_FMT_LINEAR, "MTLPixelFormat" #MTL_FMT } + +#define addMTLPixelFormatDesc(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS) \ + addMTLPixelFormatDescFull(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT) + +#define addMTLPixelFormatDescSRGB(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT_LINEAR) \ + addMTLPixelFormatDescFull(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT_LINEAR) + +void PixelFormats::initMTLPixelFormatCapabilities() { + clear(_mtlPixelFormatDescriptions, _mtlPixelFormatCount); + + uint32_t fmtIdx = 0; + + // When adding to this list, be sure to ensure _mtlPixelFormatCount is large enough for the format count. + + // MTLPixelFormatInvalid must come first. + addMTLPixelFormatDesc(Invalid, None, None, None); + + // Ordinary 8-bit pixel formats. + addMTLPixelFormatDesc(A8Unorm, Color8, RF, RF); + addMTLPixelFormatDesc(R8Unorm, Color8, All, All); + addMTLPixelFormatDescSRGB(R8Unorm_sRGB, Color8, RFCMRB, None, R8Unorm); + addMTLPixelFormatDesc(R8Snorm, Color8, RFWCMB, All); + addMTLPixelFormatDesc(R8Uint, Color8, RWCM, RWCM); + addMTLPixelFormatDesc(R8Sint, Color8, RWCM, RWCM); + + // Ordinary 16-bit pixel formats. + addMTLPixelFormatDesc(R16Unorm, Color16, RFWCMB, All); + addMTLPixelFormatDesc(R16Snorm, Color16, RFWCMB, All); + addMTLPixelFormatDesc(R16Uint, Color16, RWCM, RWCM); + addMTLPixelFormatDesc(R16Sint, Color16, RWCM, RWCM); + addMTLPixelFormatDesc(R16Float, Color16, All, All); + + addMTLPixelFormatDesc(RG8Unorm, Color16, All, All); + addMTLPixelFormatDescSRGB(RG8Unorm_sRGB, Color16, RFCMRB, None, RG8Unorm); + addMTLPixelFormatDesc(RG8Snorm, Color16, RFWCMB, All); + addMTLPixelFormatDesc(RG8Uint, Color16, RWCM, RWCM); + addMTLPixelFormatDesc(RG8Sint, Color16, RWCM, RWCM); + + // Packed 16-bit pixel formats. + addMTLPixelFormatDesc(B5G6R5Unorm, Color16, RFCMRB, None); + addMTLPixelFormatDesc(A1BGR5Unorm, Color16, RFCMRB, None); + addMTLPixelFormatDesc(ABGR4Unorm, Color16, RFCMRB, None); + addMTLPixelFormatDesc(BGR5A1Unorm, Color16, RFCMRB, None); + + // Ordinary 32-bit pixel formats. + addMTLPixelFormatDesc(R32Uint, Color32, RC, RWCM); + addMTLPixelFormatDesc(R32Sint, Color32, RC, RWCM); + addMTLPixelFormatDesc(R32Float, Color32, RCMB, All); + + addMTLPixelFormatDesc(RG16Unorm, Color32, RFWCMB, All); + addMTLPixelFormatDesc(RG16Snorm, Color32, RFWCMB, All); + addMTLPixelFormatDesc(RG16Uint, Color32, RWCM, RWCM); + addMTLPixelFormatDesc(RG16Sint, Color32, RWCM, RWCM); + addMTLPixelFormatDesc(RG16Float, Color32, All, All); + + addMTLPixelFormatDesc(RGBA8Unorm, Color32, All, All); + addMTLPixelFormatDescSRGB(RGBA8Unorm_sRGB, Color32, RFCMRB, RFCMRB, RGBA8Unorm); + addMTLPixelFormatDesc(RGBA8Snorm, Color32, RFWCMB, All); + addMTLPixelFormatDesc(RGBA8Uint, Color32, RWCM, RWCM); + addMTLPixelFormatDesc(RGBA8Sint, Color32, RWCM, RWCM); + + addMTLPixelFormatDesc(BGRA8Unorm, Color32, All, All); + addMTLPixelFormatDescSRGB(BGRA8Unorm_sRGB, Color32, RFCMRB, RFCMRB, BGRA8Unorm); + + // Packed 32-bit pixel formats. + addMTLPixelFormatDesc(RGB10A2Unorm, Color32, RFCMRB, All); + addMTLPixelFormatDesc(RGB10A2Uint, Color32, RCM, RWCM); + addMTLPixelFormatDesc(RG11B10Float, Color32, RFCMRB, All); + addMTLPixelFormatDesc(RGB9E5Float, Color32, RFCMRB, RF); + + // Ordinary 64-bit pixel formats. + addMTLPixelFormatDesc(RG32Uint, Color64, RC, RWCM); + addMTLPixelFormatDesc(RG32Sint, Color64, RC, RWCM); + addMTLPixelFormatDesc(RG32Float, Color64, RCB, All); + + addMTLPixelFormatDesc(RGBA16Unorm, Color64, RFWCMB, All); + addMTLPixelFormatDesc(RGBA16Snorm, Color64, RFWCMB, All); + addMTLPixelFormatDesc(RGBA16Uint, Color64, RWCM, RWCM); + addMTLPixelFormatDesc(RGBA16Sint, Color64, RWCM, RWCM); + addMTLPixelFormatDesc(RGBA16Float, Color64, All, All); + + // Ordinary 128-bit pixel formats. + addMTLPixelFormatDesc(RGBA32Uint, Color128, RC, RWCM); + addMTLPixelFormatDesc(RGBA32Sint, Color128, RC, RWCM); + addMTLPixelFormatDesc(RGBA32Float, Color128, RC, All); + + // Compressed pixel formats. + addMTLPixelFormatDesc(PVRTC_RGBA_2BPP, PVRTC_RGBA_2BPP, RF, None); + addMTLPixelFormatDescSRGB(PVRTC_RGBA_2BPP_sRGB, PVRTC_RGBA_2BPP, RF, None, PVRTC_RGBA_2BPP); + addMTLPixelFormatDesc(PVRTC_RGBA_4BPP, PVRTC_RGBA_4BPP, RF, None); + addMTLPixelFormatDescSRGB(PVRTC_RGBA_4BPP_sRGB, PVRTC_RGBA_4BPP, RF, None, PVRTC_RGBA_4BPP); + + addMTLPixelFormatDesc(ETC2_RGB8, ETC2_RGB8, RF, None); + addMTLPixelFormatDescSRGB(ETC2_RGB8_sRGB, ETC2_RGB8, RF, None, ETC2_RGB8); + addMTLPixelFormatDesc(ETC2_RGB8A1, ETC2_RGB8A1, RF, None); + addMTLPixelFormatDescSRGB(ETC2_RGB8A1_sRGB, ETC2_RGB8A1, RF, None, ETC2_RGB8A1); + addMTLPixelFormatDesc(EAC_RGBA8, EAC_RGBA8, RF, None); + addMTLPixelFormatDescSRGB(EAC_RGBA8_sRGB, EAC_RGBA8, RF, None, EAC_RGBA8); + addMTLPixelFormatDesc(EAC_R11Unorm, EAC_R11, RF, None); + addMTLPixelFormatDesc(EAC_R11Snorm, EAC_R11, RF, None); + addMTLPixelFormatDesc(EAC_RG11Unorm, EAC_RG11, RF, None); + addMTLPixelFormatDesc(EAC_RG11Snorm, EAC_RG11, RF, None); + + addMTLPixelFormatDesc(ASTC_4x4_LDR, ASTC_4x4, None, None); + addMTLPixelFormatDescSRGB(ASTC_4x4_sRGB, ASTC_4x4, None, None, ASTC_4x4_LDR); + addMTLPixelFormatDesc(ASTC_4x4_HDR, ASTC_4x4, None, None); + addMTLPixelFormatDesc(ASTC_5x4_LDR, ASTC_5x4, None, None); + addMTLPixelFormatDescSRGB(ASTC_5x4_sRGB, ASTC_5x4, None, None, ASTC_5x4_LDR); + addMTLPixelFormatDesc(ASTC_5x4_HDR, ASTC_5x4, None, None); + addMTLPixelFormatDesc(ASTC_5x5_LDR, ASTC_5x5, None, None); + addMTLPixelFormatDescSRGB(ASTC_5x5_sRGB, ASTC_5x5, None, None, ASTC_5x5_LDR); + addMTLPixelFormatDesc(ASTC_5x5_HDR, ASTC_5x5, None, None); + addMTLPixelFormatDesc(ASTC_6x5_LDR, ASTC_6x5, None, None); + addMTLPixelFormatDescSRGB(ASTC_6x5_sRGB, ASTC_6x5, None, None, ASTC_6x5_LDR); + addMTLPixelFormatDesc(ASTC_6x5_HDR, ASTC_6x5, None, None); + addMTLPixelFormatDesc(ASTC_6x6_LDR, ASTC_6x6, None, None); + addMTLPixelFormatDescSRGB(ASTC_6x6_sRGB, ASTC_6x6, None, None, ASTC_6x6_LDR); + addMTLPixelFormatDesc(ASTC_6x6_HDR, ASTC_6x6, None, None); + addMTLPixelFormatDesc(ASTC_8x5_LDR, ASTC_8x5, None, None); + addMTLPixelFormatDescSRGB(ASTC_8x5_sRGB, ASTC_8x5, None, None, ASTC_8x5_LDR); + addMTLPixelFormatDesc(ASTC_8x5_HDR, ASTC_8x5, None, None); + addMTLPixelFormatDesc(ASTC_8x6_LDR, ASTC_8x6, None, None); + addMTLPixelFormatDescSRGB(ASTC_8x6_sRGB, ASTC_8x6, None, None, ASTC_8x6_LDR); + addMTLPixelFormatDesc(ASTC_8x6_HDR, ASTC_8x6, None, None); + addMTLPixelFormatDesc(ASTC_8x8_LDR, ASTC_8x8, None, None); + addMTLPixelFormatDescSRGB(ASTC_8x8_sRGB, ASTC_8x8, None, None, ASTC_8x8_LDR); + addMTLPixelFormatDesc(ASTC_8x8_HDR, ASTC_8x8, None, None); + addMTLPixelFormatDesc(ASTC_10x5_LDR, ASTC_10x5, None, None); + addMTLPixelFormatDescSRGB(ASTC_10x5_sRGB, ASTC_10x5, None, None, ASTC_10x5_LDR); + addMTLPixelFormatDesc(ASTC_10x5_HDR, ASTC_10x5, None, None); + addMTLPixelFormatDesc(ASTC_10x6_LDR, ASTC_10x6, None, None); + addMTLPixelFormatDescSRGB(ASTC_10x6_sRGB, ASTC_10x6, None, None, ASTC_10x6_LDR); + addMTLPixelFormatDesc(ASTC_10x6_HDR, ASTC_10x6, None, None); + addMTLPixelFormatDesc(ASTC_10x8_LDR, ASTC_10x8, None, None); + addMTLPixelFormatDescSRGB(ASTC_10x8_sRGB, ASTC_10x8, None, None, ASTC_10x8_LDR); + addMTLPixelFormatDesc(ASTC_10x8_HDR, ASTC_10x8, None, None); + addMTLPixelFormatDesc(ASTC_10x10_LDR, ASTC_10x10, None, None); + addMTLPixelFormatDescSRGB(ASTC_10x10_sRGB, ASTC_10x10, None, None, ASTC_10x10_LDR); + addMTLPixelFormatDesc(ASTC_10x10_HDR, ASTC_10x10, None, None); + addMTLPixelFormatDesc(ASTC_12x10_LDR, ASTC_12x10, None, None); + addMTLPixelFormatDescSRGB(ASTC_12x10_sRGB, ASTC_12x10, None, None, ASTC_12x10_LDR); + addMTLPixelFormatDesc(ASTC_12x10_HDR, ASTC_12x10, None, None); + addMTLPixelFormatDesc(ASTC_12x12_LDR, ASTC_12x12, None, None); + addMTLPixelFormatDescSRGB(ASTC_12x12_sRGB, ASTC_12x12, None, None, ASTC_12x12_LDR); + addMTLPixelFormatDesc(ASTC_12x12_HDR, ASTC_12x12, None, None); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability" + + addMTLPixelFormatDesc(BC1_RGBA, BC1_RGBA, RF, RF); + addMTLPixelFormatDescSRGB(BC1_RGBA_sRGB, BC1_RGBA, RF, RF, BC1_RGBA); + addMTLPixelFormatDesc(BC2_RGBA, BC2_RGBA, RF, RF); + addMTLPixelFormatDescSRGB(BC2_RGBA_sRGB, BC2_RGBA, RF, RF, BC2_RGBA); + addMTLPixelFormatDesc(BC3_RGBA, BC3_RGBA, RF, RF); + addMTLPixelFormatDescSRGB(BC3_RGBA_sRGB, BC3_RGBA, RF, RF, BC3_RGBA); + addMTLPixelFormatDesc(BC4_RUnorm, BC4_R, RF, RF); + addMTLPixelFormatDesc(BC4_RSnorm, BC4_R, RF, RF); + addMTLPixelFormatDesc(BC5_RGUnorm, BC5_RG, RF, RF); + addMTLPixelFormatDesc(BC5_RGSnorm, BC5_RG, RF, RF); + addMTLPixelFormatDesc(BC6H_RGBUfloat, BC6H_RGB, RF, RF); + addMTLPixelFormatDesc(BC6H_RGBFloat, BC6H_RGB, RF, RF); + addMTLPixelFormatDesc(BC7_RGBAUnorm, BC7_RGBA, RF, RF); + addMTLPixelFormatDescSRGB(BC7_RGBAUnorm_sRGB, BC7_RGBA, RF, RF, BC7_RGBAUnorm); + +#pragma clang diagnostic pop + + // YUV pixel formats. + addMTLPixelFormatDesc(GBGR422, None, RF, RF); + addMTLPixelFormatDesc(BGRG422, None, RF, RF); + + // Extended range and wide color pixel formats. + addMTLPixelFormatDesc(BGRA10_XR, BGRA10_XR, None, None); + addMTLPixelFormatDescSRGB(BGRA10_XR_sRGB, BGRA10_XR, None, None, BGRA10_XR); + addMTLPixelFormatDesc(BGR10_XR, BGR10_XR, None, None); + addMTLPixelFormatDescSRGB(BGR10_XR_sRGB, BGR10_XR, None, None, BGR10_XR); + addMTLPixelFormatDesc(BGR10A2Unorm, Color32, None, None); + + // Depth and stencil pixel formats. + addMTLPixelFormatDesc(Depth16Unorm, None, None, None); + addMTLPixelFormatDesc(Depth32Float, None, DRM, DRFMR); + addMTLPixelFormatDesc(Stencil8, None, DRM, DRMR); + addMTLPixelFormatDesc(Depth24Unorm_Stencil8, Depth24_Stencil8, None, None); + addMTLPixelFormatDesc(Depth32Float_Stencil8, Depth32_Stencil8, DRM, DRFMR); + addMTLPixelFormatDesc(X24_Stencil8, Depth24_Stencil8, None, DRMR); + addMTLPixelFormatDesc(X32_Stencil8, Depth32_Stencil8, DRM, DRMR); + + // When adding to this list, be sure to ensure _mtlPixelFormatCount is large enough for the format count. +} + +#define addMTLVertexFormatDesc(MTL_VTX_FMT, IOS_CAPS, MACOS_CAPS) \ + CRASH_BAD_INDEX_MSG(fmtIdx, _mtlVertexFormatCount, "Attempting to describe too many MTLVertexFormats"); \ + _mtlVertexFormatDescriptions[fmtIdx++] = { .mtlVertexFormat = MTLVertexFormat##MTL_VTX_FMT, RD::DATA_FORMAT_MAX, select_platform_caps(kMTLFmtCaps##MACOS_CAPS, kMTLFmtCaps##IOS_CAPS), MTLViewClass::None, MTLPixelFormatInvalid, "MTLVertexFormat" #MTL_VTX_FMT } + +void PixelFormats::initMTLVertexFormatCapabilities() { + clear(_mtlVertexFormatDescriptions, _mtlVertexFormatCount); + + uint32_t fmtIdx = 0; + + // When adding to this list, be sure to ensure _mtlVertexFormatCount is large enough for the format count. + + // MTLVertexFormatInvalid must come first. + addMTLVertexFormatDesc(Invalid, None, None); + + addMTLVertexFormatDesc(UChar2Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(Char2Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(UChar2, Vertex, Vertex); + addMTLVertexFormatDesc(Char2, Vertex, Vertex); + + addMTLVertexFormatDesc(UChar3Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(Char3Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(UChar3, Vertex, Vertex); + addMTLVertexFormatDesc(Char3, Vertex, Vertex); + + addMTLVertexFormatDesc(UChar4Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(Char4Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(UChar4, Vertex, Vertex); + addMTLVertexFormatDesc(Char4, Vertex, Vertex); + + addMTLVertexFormatDesc(UInt1010102Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(Int1010102Normalized, Vertex, Vertex); + + addMTLVertexFormatDesc(UShort2Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(Short2Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(UShort2, Vertex, Vertex); + addMTLVertexFormatDesc(Short2, Vertex, Vertex); + addMTLVertexFormatDesc(Half2, Vertex, Vertex); + + addMTLVertexFormatDesc(UShort3Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(Short3Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(UShort3, Vertex, Vertex); + addMTLVertexFormatDesc(Short3, Vertex, Vertex); + addMTLVertexFormatDesc(Half3, Vertex, Vertex); + + addMTLVertexFormatDesc(UShort4Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(Short4Normalized, Vertex, Vertex); + addMTLVertexFormatDesc(UShort4, Vertex, Vertex); + addMTLVertexFormatDesc(Short4, Vertex, Vertex); + addMTLVertexFormatDesc(Half4, Vertex, Vertex); + + addMTLVertexFormatDesc(UInt, Vertex, Vertex); + addMTLVertexFormatDesc(Int, Vertex, Vertex); + addMTLVertexFormatDesc(Float, Vertex, Vertex); + + addMTLVertexFormatDesc(UInt2, Vertex, Vertex); + addMTLVertexFormatDesc(Int2, Vertex, Vertex); + addMTLVertexFormatDesc(Float2, Vertex, Vertex); + + addMTLVertexFormatDesc(UInt3, Vertex, Vertex); + addMTLVertexFormatDesc(Int3, Vertex, Vertex); + addMTLVertexFormatDesc(Float3, Vertex, Vertex); + + addMTLVertexFormatDesc(UInt4, Vertex, Vertex); + addMTLVertexFormatDesc(Int4, Vertex, Vertex); + addMTLVertexFormatDesc(Float4, Vertex, Vertex); + + addMTLVertexFormatDesc(UCharNormalized, None, None); + addMTLVertexFormatDesc(CharNormalized, None, None); + addMTLVertexFormatDesc(UChar, None, None); + addMTLVertexFormatDesc(Char, None, None); + + addMTLVertexFormatDesc(UShortNormalized, None, None); + addMTLVertexFormatDesc(ShortNormalized, None, None); + addMTLVertexFormatDesc(UShort, None, None); + addMTLVertexFormatDesc(Short, None, None); + addMTLVertexFormatDesc(Half, None, None); + + addMTLVertexFormatDesc(UChar4Normalized_BGRA, None, None); + + // When adding to this list, be sure to ensure _mtlVertexFormatCount is large enough for the format count. +} + +void PixelFormats::buildMTLFormatMaps() { + // Set all MTLPixelFormats and MTLVertexFormats to undefined/invalid. + clear(_mtlFormatDescIndicesByMTLPixelFormatsCore, _mtlPixelFormatCoreCount); + clear(_mtlFormatDescIndicesByMTLVertexFormats, _mtlVertexFormatCount); + + // Build lookup table for MTLPixelFormat specs. + // For most Metal format values, which are small and consecutive, use a simple lookup array. + // For outlier format values, which can be large, use a map. + for (uint32_t fmtIdx = 0; fmtIdx < _mtlPixelFormatCount; fmtIdx++) { + MTLPixelFormat fmt = _mtlPixelFormatDescriptions[fmtIdx].mtlPixelFormat; + if (fmt) { + if (fmt < _mtlPixelFormatCoreCount) { + _mtlFormatDescIndicesByMTLPixelFormatsCore[fmt] = fmtIdx; + } else { + _mtlFormatDescIndicesByMTLPixelFormatsExt[fmt] = fmtIdx; + } + } + } + + // Build lookup table for MTLVertexFormat specs. + for (uint32_t fmtIdx = 0; fmtIdx < _mtlVertexFormatCount; fmtIdx++) { + MTLVertexFormat fmt = _mtlVertexFormatDescriptions[fmtIdx].mtlVertexFormat; + if (fmt) { + _mtlFormatDescIndicesByMTLVertexFormats[fmt] = fmtIdx; + } + } +} + +// If the device supports the feature set, add additional capabilities to a MTLPixelFormat. +void PixelFormats::addMTLPixelFormatCapabilities(id<MTLDevice> p_device, + MTLFeatureSet p_feature_set, + MTLPixelFormat p_format, + MTLFmtCaps p_caps) { + if ([p_device supportsFeatureSet:p_feature_set]) { + flags::set(getMTLPixelFormatDesc(p_format).mtlFmtCaps, p_caps); + } +} + +// If the device supports the GPU family, add additional capabilities to a MTLPixelFormat. +void PixelFormats::addMTLPixelFormatCapabilities(id<MTLDevice> p_device, + MTLGPUFamily p_family, + MTLPixelFormat p_format, + MTLFmtCaps p_caps) { + if ([p_device supportsFamily:p_family]) { + flags::set(getMTLPixelFormatDesc(p_format).mtlFmtCaps, p_caps); + } +} + +// Disable capability flags in the Metal pixel format. +void PixelFormats::disableMTLPixelFormatCapabilities(MTLPixelFormat p_format, + MTLFmtCaps p_caps) { + flags::clear(getMTLPixelFormatDesc(p_format).mtlFmtCaps, p_caps); +} + +void PixelFormats::disableAllMTLPixelFormatCapabilities(MTLPixelFormat p_format) { + getMTLPixelFormatDesc(p_format).mtlFmtCaps = kMTLFmtCapsNone; +} + +// If the device supports the feature set, add additional capabilities to a MTLVertexFormat. +void PixelFormats::addMTLVertexFormatCapabilities(id<MTLDevice> p_device, + MTLFeatureSet p_feature_set, + MTLVertexFormat p_format, + MTLFmtCaps p_caps) { + if ([p_device supportsFeatureSet:p_feature_set]) { + flags::set(getMTLVertexFormatDesc(p_format).mtlFmtCaps, p_caps); + } +} + +void PixelFormats::modifyMTLFormatCapabilities() { + modifyMTLFormatCapabilities(device); +} + +// If the supportsBCTextureCompression query is available, use it. +bool supports_bc_texture_compression(id<MTLDevice> p_device) { +#if (TARGET_OS_OSX || TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED >= 160400) + if (@available(macOS 11.0, iOS 16.4, *)) { + return p_device.supportsBCTextureCompression; + } +#endif + return false; +} + +#define addFeatSetMTLPixFmtCaps(FEAT_SET, MTL_FMT, CAPS) \ + addMTLPixelFormatCapabilities(p_device, MTLFeatureSet_##FEAT_SET, MTLPixelFormat##MTL_FMT, kMTLFmtCaps##CAPS) + +#define addFeatSetMTLVtxFmtCaps(FEAT_SET, MTL_FMT, CAPS) \ + addMTLVertexFormatCapabilities(p_device, MTLFeatureSet_##FEAT_SET, MTLVertexFormat##MTL_FMT, kMTLFmtCaps##CAPS) + +#define addGPUMTLPixFmtCaps(GPU_FAM, MTL_FMT, CAPS) \ + addMTLPixelFormatCapabilities(p_device, MTLGPUFamily##GPU_FAM, MTLPixelFormat##MTL_FMT, kMTLFmtCaps##CAPS) + +#define disableAllMTLPixFmtCaps(MTL_FMT) \ + disableAllMTLPixelFormatCapabilities(MTLPixelFormat##MTL_FMT) + +#define disableMTLPixFmtCaps(MTL_FMT, CAPS) \ + disableMTLPixelFormatCapabilities(MTLPixelFormat##MTL_FMT, kMTLFmtCaps##CAPS) + +void PixelFormats::modifyMTLFormatCapabilities(id<MTLDevice> p_device) { + if (!supports_bc_texture_compression(p_device)) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability" + + disableAllMTLPixFmtCaps(BC1_RGBA); + disableAllMTLPixFmtCaps(BC1_RGBA_sRGB); + disableAllMTLPixFmtCaps(BC2_RGBA); + disableAllMTLPixFmtCaps(BC2_RGBA_sRGB); + disableAllMTLPixFmtCaps(BC3_RGBA); + disableAllMTLPixFmtCaps(BC3_RGBA_sRGB); + disableAllMTLPixFmtCaps(BC4_RUnorm); + disableAllMTLPixFmtCaps(BC4_RSnorm); + disableAllMTLPixFmtCaps(BC5_RGUnorm); + disableAllMTLPixFmtCaps(BC5_RGSnorm); + disableAllMTLPixFmtCaps(BC6H_RGBUfloat); + disableAllMTLPixFmtCaps(BC6H_RGBFloat); + disableAllMTLPixFmtCaps(BC7_RGBAUnorm); + disableAllMTLPixFmtCaps(BC7_RGBAUnorm_sRGB); + +#pragma clang diagnostic pop + } + + if (!p_device.supports32BitMSAA) { + disableMTLPixFmtCaps(R32Uint, MSAA); + disableMTLPixFmtCaps(R32Uint, Resolve); + disableMTLPixFmtCaps(R32Sint, MSAA); + disableMTLPixFmtCaps(R32Sint, Resolve); + disableMTLPixFmtCaps(R32Float, MSAA); + disableMTLPixFmtCaps(R32Float, Resolve); + disableMTLPixFmtCaps(RG32Uint, MSAA); + disableMTLPixFmtCaps(RG32Uint, Resolve); + disableMTLPixFmtCaps(RG32Sint, MSAA); + disableMTLPixFmtCaps(RG32Sint, Resolve); + disableMTLPixFmtCaps(RG32Float, MSAA); + disableMTLPixFmtCaps(RG32Float, Resolve); + disableMTLPixFmtCaps(RGBA32Uint, MSAA); + disableMTLPixFmtCaps(RGBA32Uint, Resolve); + disableMTLPixFmtCaps(RGBA32Sint, MSAA); + disableMTLPixFmtCaps(RGBA32Sint, Resolve); + disableMTLPixFmtCaps(RGBA32Float, MSAA); + disableMTLPixFmtCaps(RGBA32Float, Resolve); + } + + if (!p_device.supports32BitFloatFiltering) { + disableMTLPixFmtCaps(R32Float, Filter); + disableMTLPixFmtCaps(RG32Float, Filter); + disableMTLPixFmtCaps(RGBA32Float, Filter); + } + +#if TARGET_OS_OSX + addGPUMTLPixFmtCaps(Apple1, R32Uint, Atomic); + addGPUMTLPixFmtCaps(Apple1, R32Sint, Atomic); + + if (p_device.isDepth24Stencil8PixelFormatSupported) { + addGPUMTLPixFmtCaps(Apple1, Depth24Unorm_Stencil8, DRFMR); + } + + addFeatSetMTLPixFmtCaps(macOS_GPUFamily1_v2, Depth16Unorm, DRFMR); + + addFeatSetMTLPixFmtCaps(macOS_GPUFamily1_v3, BGR10A2Unorm, RFCMRB); + + addGPUMTLPixFmtCaps(Apple5, R8Unorm_sRGB, All); + + addGPUMTLPixFmtCaps(Apple5, RG8Unorm_sRGB, All); + + addGPUMTLPixFmtCaps(Apple5, B5G6R5Unorm, RFCMRB); + addGPUMTLPixFmtCaps(Apple5, A1BGR5Unorm, RFCMRB); + addGPUMTLPixFmtCaps(Apple5, ABGR4Unorm, RFCMRB); + addGPUMTLPixFmtCaps(Apple5, BGR5A1Unorm, RFCMRB); + + addGPUMTLPixFmtCaps(Apple5, RGBA8Unorm_sRGB, All); + addGPUMTLPixFmtCaps(Apple5, BGRA8Unorm_sRGB, All); + + // Blending is actually supported for this format, but format channels cannot be individually write-enabled during blending. + // Disabling blending is the least-intrusive way to handle this in a Godot-friendly way. + addGPUMTLPixFmtCaps(Apple5, RGB9E5Float, All); + disableMTLPixFmtCaps(RGB9E5Float, Blend); + + addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_2BPP, RF); + addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_2BPP_sRGB, RF); + addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_4BPP, RF); + addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_4BPP_sRGB, RF); + + addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8, RF); + addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8_sRGB, RF); + addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8A1, RF); + addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8A1_sRGB, RF); + addGPUMTLPixFmtCaps(Apple5, EAC_RGBA8, RF); + addGPUMTLPixFmtCaps(Apple5, EAC_RGBA8_sRGB, RF); + addGPUMTLPixFmtCaps(Apple5, EAC_R11Unorm, RF); + addGPUMTLPixFmtCaps(Apple5, EAC_R11Snorm, RF); + addGPUMTLPixFmtCaps(Apple5, EAC_RG11Unorm, RF); + addGPUMTLPixFmtCaps(Apple5, EAC_RG11Snorm, RF); + + addGPUMTLPixFmtCaps(Apple5, ASTC_4x4_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_4x4_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_4x4_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_5x4_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_5x4_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_5x4_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_5x5_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_5x5_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_5x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_6x5_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_6x5_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_6x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_6x6_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_6x6_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_6x6_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_8x5_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_8x5_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_8x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_8x6_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_8x6_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_8x6_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_8x8_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_8x8_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_8x8_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x5_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x5_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x6_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x6_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x6_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x8_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x8_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x8_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x10_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_10x10_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x10_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_12x10_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_12x10_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_12x10_HDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_12x12_LDR, RF); + addGPUMTLPixFmtCaps(Apple5, ASTC_12x12_sRGB, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_12x12_HDR, RF); + + addGPUMTLPixFmtCaps(Apple5, BGRA10_XR, All); + addGPUMTLPixFmtCaps(Apple5, BGRA10_XR_sRGB, All); + addGPUMTLPixFmtCaps(Apple5, BGR10_XR, All); + addGPUMTLPixFmtCaps(Apple5, BGR10_XR_sRGB, All); + + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UCharNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, CharNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UChar, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, Char, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UShortNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, ShortNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UShort, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, Short, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, Half, Vertex); + addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UChar4Normalized_BGRA, Vertex); +#endif + +#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, R8Unorm_sRGB, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, R8Unorm_sRGB, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, R8Snorm, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, RG8Unorm_sRGB, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RG8Unorm_sRGB, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, RG8Snorm, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Uint, RWC); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Uint, Atomic); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Sint, RWC); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Sint, Atomic); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Float, RWCMB); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, RGBA8Unorm_sRGB, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGBA8Unorm_sRGB, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, RGBA8Snorm, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, BGRA8Unorm_sRGB, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, BGRA8Unorm_sRGB, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGB10A2Unorm, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGB10A2Uint, RWCM); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RG11B10Float, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGB9E5Float, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RG32Uint, RWC); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RG32Sint, RWC); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RG32Float, RWCB); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RGBA32Uint, RWC); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RGBA32Sint, RWC); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RGBA32Float, RWC); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_4x4_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_4x4_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x4_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x4_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x5_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x5_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x5_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x5_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x6_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x6_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x5_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x5_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x6_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x6_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x8_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x8_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x5_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x5_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x6_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x6_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x8_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x8_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x10_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x10_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x10_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x10_sRGB, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x12_LDR, RF); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x12_sRGB, RF); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, Depth32Float, DRMR); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, Depth32Float_Stencil8, DRMR); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, Stencil8, DRMR); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGRA10_XR, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGRA10_XR_sRGB, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGR10_XR, All); + addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGR10_XR_sRGB, All); + + addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v4, BGR10A2Unorm, All); + + addGPUMTLPixFmtCaps(Apple6, ASTC_4x4_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_5x4_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_5x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_6x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_6x6_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_8x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_8x6_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_8x8_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x5_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x6_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x8_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_10x10_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_12x10_HDR, RF); + addGPUMTLPixFmtCaps(Apple6, ASTC_12x12_HDR, RF); + + addGPUMTLPixFmtCaps(Apple1, Depth16Unorm, DRFM); + addGPUMTLPixFmtCaps(Apple3, Depth16Unorm, DRFMR); + + // Vertex formats. + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UCharNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, CharNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UChar, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, Char, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UShortNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, ShortNormalized, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UShort, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, Short, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, Half, Vertex); + addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UChar4Normalized_BGRA, Vertex); + +// Disable for iOS simulator last. +#if TARGET_OS_SIMULATOR + if (![mtlDevice supportsFamily:MTLGPUFamilyApple5]) { + disableAllMTLPixFmtCaps(R8Unorm_sRGB); + disableAllMTLPixFmtCaps(RG8Unorm_sRGB); + disableAllMTLPixFmtCaps(B5G6R5Unorm); + disableAllMTLPixFmtCaps(A1BGR5Unorm); + disableAllMTLPixFmtCaps(ABGR4Unorm); + disableAllMTLPixFmtCaps(BGR5A1Unorm); + + disableAllMTLPixFmtCaps(BGRA10_XR); + disableAllMTLPixFmtCaps(BGRA10_XR_sRGB); + disableAllMTLPixFmtCaps(BGR10_XR); + disableAllMTLPixFmtCaps(BGR10_XR_sRGB); + + disableAllMTLPixFmtCaps(GBGR422); + disableAllMTLPixFmtCaps(BGRG422); + + disableMTLPixFmtCaps(RGB9E5Float, ColorAtt); + + disableMTLPixFmtCaps(R8Unorm_sRGB, Write); + disableMTLPixFmtCaps(RG8Unorm_sRGB, Write); + disableMTLPixFmtCaps(RGBA8Unorm_sRGB, Write); + disableMTLPixFmtCaps(BGRA8Unorm_sRGB, Write); + disableMTLPixFmtCaps(PVRTC_RGBA_2BPP_sRGB, Write); + disableMTLPixFmtCaps(PVRTC_RGBA_4BPP_sRGB, Write); + disableMTLPixFmtCaps(ETC2_RGB8_sRGB, Write); + disableMTLPixFmtCaps(ETC2_RGB8A1_sRGB, Write); + disableMTLPixFmtCaps(EAC_RGBA8_sRGB, Write); + disableMTLPixFmtCaps(ASTC_4x4_sRGB, Write); + disableMTLPixFmtCaps(ASTC_5x4_sRGB, Write); + disableMTLPixFmtCaps(ASTC_5x5_sRGB, Write); + disableMTLPixFmtCaps(ASTC_6x5_sRGB, Write); + disableMTLPixFmtCaps(ASTC_6x6_sRGB, Write); + disableMTLPixFmtCaps(ASTC_8x5_sRGB, Write); + disableMTLPixFmtCaps(ASTC_8x6_sRGB, Write); + disableMTLPixFmtCaps(ASTC_8x8_sRGB, Write); + disableMTLPixFmtCaps(ASTC_10x5_sRGB, Write); + disableMTLPixFmtCaps(ASTC_10x6_sRGB, Write); + disableMTLPixFmtCaps(ASTC_10x8_sRGB, Write); + disableMTLPixFmtCaps(ASTC_10x10_sRGB, Write); + disableMTLPixFmtCaps(ASTC_12x10_sRGB, Write); + disableMTLPixFmtCaps(ASTC_12x12_sRGB, Write); + } +#endif +#endif +} + +#undef addFeatSetMTLPixFmtCaps +#undef addGPUOSMTLPixFmtCaps +#undef disableMTLPixFmtCaps +#undef disableAllMTLPixFmtCaps +#undef addFeatSetMTLVtxFmtCaps + +// Populates the DataFormat lookup maps and connects Godot and Metal pixel formats to one-another. +void PixelFormats::buildDFFormatMaps() { + // Iterate through the DataFormat descriptions, populate the lookup maps and back pointers, + // and validate the Metal formats for the platform and OS. + for (uint32_t fmtIdx = 0; fmtIdx < RD::DATA_FORMAT_MAX; fmtIdx++) { + DataFormatDesc &dfDesc = _dataFormatDescriptions[fmtIdx]; + DataFormat dfFmt = dfDesc.dataFormat; + if (dfFmt != RD::DATA_FORMAT_MAX) { + // Populate the back reference from the Metal formats to the Godot format. + // Validate the corresponding Metal formats for the platform, and clear them + // in the Godot format if not supported. + if (dfDesc.mtlPixelFormat) { + MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(dfDesc.mtlPixelFormat); + if (mtlDesc.dataFormat == RD::DATA_FORMAT_MAX) { + mtlDesc.dataFormat = dfFmt; + } + if (!mtlDesc.isSupported()) { + dfDesc.mtlPixelFormat = MTLPixelFormatInvalid; + } + } + if (dfDesc.mtlPixelFormatSubstitute) { + MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(dfDesc.mtlPixelFormatSubstitute); + if (!mtlDesc.isSupported()) { + dfDesc.mtlPixelFormatSubstitute = MTLPixelFormatInvalid; + } + } + if (dfDesc.mtlVertexFormat) { + MTLFormatDesc &mtlDesc = getMTLVertexFormatDesc(dfDesc.mtlVertexFormat); + if (mtlDesc.dataFormat == RD::DATA_FORMAT_MAX) { + mtlDesc.dataFormat = dfFmt; + } + if (!mtlDesc.isSupported()) { + dfDesc.mtlVertexFormat = MTLVertexFormatInvalid; + } + } + if (dfDesc.mtlVertexFormatSubstitute) { + MTLFormatDesc &mtlDesc = getMTLVertexFormatDesc(dfDesc.mtlVertexFormatSubstitute); + if (!mtlDesc.isSupported()) { + dfDesc.mtlVertexFormatSubstitute = MTLVertexFormatInvalid; + } + } + } + } +} diff --git a/drivers/metal/rendering_context_driver_metal.h b/drivers/metal/rendering_context_driver_metal.h new file mode 100644 index 0000000000..0363ab111a --- /dev/null +++ b/drivers/metal/rendering_context_driver_metal.h @@ -0,0 +1,206 @@ +/**************************************************************************/ +/* rendering_context_driver_metal.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef RENDERING_CONTEXT_DRIVER_METAL_H +#define RENDERING_CONTEXT_DRIVER_METAL_H + +#ifdef METAL_ENABLED + +#import "rendering_device_driver_metal.h" + +#import "servers/rendering/rendering_context_driver.h" + +#import <CoreGraphics/CGGeometry.h> +#import <Metal/Metal.h> +#import <QuartzCore/CALayer.h> + +@class CAMetalLayer; +@protocol CAMetalDrawable; +class PixelFormats; +class MDResourceCache; + +class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingContextDriverMetal : public RenderingContextDriver { +protected: + id<MTLDevice> metal_device = nil; + Device device; // There is only one device on Apple Silicon. + +public: + Error initialize() final override; + const Device &device_get(uint32_t p_device_index) const final override; + uint32_t device_get_count() const final override; + bool device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const final override { return true; } + RenderingDeviceDriver *driver_create() final override; + void driver_free(RenderingDeviceDriver *p_driver) final override; + SurfaceID surface_create(const void *p_platform_data) final override; + void surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) final override; + void surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) final override; + DisplayServer::VSyncMode surface_get_vsync_mode(SurfaceID p_surface) const final override; + uint32_t surface_get_width(SurfaceID p_surface) const final override; + uint32_t surface_get_height(SurfaceID p_surface) const final override; + void surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) final override; + bool surface_get_needs_resize(SurfaceID p_surface) const final override; + void surface_destroy(SurfaceID p_surface) final override; + bool is_debug_utils_enabled() const final override { return true; } + +#pragma mark - Metal-specific methods + + // Platform-specific data for the Windows embedded in this driver. + struct WindowPlatformData { + CAMetalLayer *__unsafe_unretained layer; + }; + + class Surface { + protected: + id<MTLDevice> device; + + public: + uint32_t width = 0; + uint32_t height = 0; + DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED; + bool needs_resize = false; + + Surface(id<MTLDevice> p_device) : + device(p_device) {} + virtual ~Surface() = default; + + MTLPixelFormat get_pixel_format() const { return MTLPixelFormatBGRA8Unorm; } + virtual Error resize(uint32_t p_desired_framebuffer_count) = 0; + virtual RDD::FramebufferID acquire_next_frame_buffer() = 0; + virtual void present(MDCommandBuffer *p_cmd_buffer) = 0; + }; + + class SurfaceLayer : public Surface { + CAMetalLayer *__unsafe_unretained layer = nil; + LocalVector<MDFrameBuffer> frame_buffers; + LocalVector<id<MTLDrawable>> drawables; + uint32_t rear = -1; + uint32_t front = 0; + uint32_t count = 0; + + public: + SurfaceLayer(CAMetalLayer *p_layer, id<MTLDevice> p_device) : + Surface(p_device), layer(p_layer) { + layer.allowsNextDrawableTimeout = YES; + layer.framebufferOnly = YES; + layer.opaque = OS::get_singleton()->is_layered_allowed() ? NO : YES; + layer.pixelFormat = get_pixel_format(); + layer.device = p_device; + } + + ~SurfaceLayer() override { + layer = nil; + } + + Error resize(uint32_t p_desired_framebuffer_count) override final { + if (width == 0 || height == 0) { + // Very likely the window is minimized, don't create a swap chain. + return ERR_SKIP; + } + + CGSize drawableSize = CGSizeMake(width, height); + CGSize current = layer.drawableSize; + if (!CGSizeEqualToSize(current, drawableSize)) { + layer.drawableSize = drawableSize; + } + + // Metal supports a maximum of 3 drawables. + p_desired_framebuffer_count = MIN(3U, p_desired_framebuffer_count); + layer.maximumDrawableCount = p_desired_framebuffer_count; + +#if TARGET_OS_OSX + // Display sync is only supported on macOS. + switch (vsync_mode) { + case DisplayServer::VSYNC_MAILBOX: + case DisplayServer::VSYNC_ADAPTIVE: + case DisplayServer::VSYNC_ENABLED: + layer.displaySyncEnabled = YES; + break; + case DisplayServer::VSYNC_DISABLED: + layer.displaySyncEnabled = NO; + break; + } +#endif + drawables.resize(p_desired_framebuffer_count); + frame_buffers.resize(p_desired_framebuffer_count); + for (uint32_t i = 0; i < p_desired_framebuffer_count; i++) { + // Reserve space for the drawable texture. + frame_buffers[i].textures.resize(1); + } + + return OK; + } + + RDD::FramebufferID acquire_next_frame_buffer() override final { + if (count == frame_buffers.size()) { + return RDD::FramebufferID(); + } + + rear = (rear + 1) % frame_buffers.size(); + count++; + + MDFrameBuffer &frame_buffer = frame_buffers[rear]; + frame_buffer.size = Size2i(width, height); + + id<CAMetalDrawable> drawable = layer.nextDrawable; + ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available"); + drawables[rear] = drawable; + frame_buffer.textures.write[0] = drawable.texture; + + return RDD::FramebufferID(&frame_buffer); + } + + void present(MDCommandBuffer *p_cmd_buffer) override final { + if (count == 0) { + return; + } + + // Release texture and drawable. + frame_buffers[front].textures.write[0] = nil; + id<MTLDrawable> drawable = drawables[front]; + drawables[front] = nil; + + count--; + front = (front + 1) % frame_buffers.size(); + + [p_cmd_buffer->get_command_buffer() presentDrawable:drawable]; + } + }; + + id<MTLDevice> get_metal_device() const { return metal_device; } + +#pragma mark - Initialization + + RenderingContextDriverMetal(); + ~RenderingContextDriverMetal() override; +}; + +#endif // METAL_ENABLED + +#endif // RENDERING_CONTEXT_DRIVER_METAL_H diff --git a/drivers/metal/rendering_context_driver_metal.mm b/drivers/metal/rendering_context_driver_metal.mm new file mode 100644 index 0000000000..b257d7142a --- /dev/null +++ b/drivers/metal/rendering_context_driver_metal.mm @@ -0,0 +1,134 @@ +/**************************************************************************/ +/* rendering_context_driver_metal.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#import "rendering_context_driver_metal.h" + +@protocol MTLDeviceEx <MTLDevice> +#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 130300 +- (void)setShouldMaximizeConcurrentCompilation:(BOOL)v; +#endif +@end + +RenderingContextDriverMetal::RenderingContextDriverMetal() { +} + +RenderingContextDriverMetal::~RenderingContextDriverMetal() { +} + +Error RenderingContextDriverMetal::initialize() { + metal_device = MTLCreateSystemDefaultDevice(); +#if TARGET_OS_OSX + if (@available(macOS 13.3, *)) { + [id<MTLDeviceEx>(metal_device) setShouldMaximizeConcurrentCompilation:YES]; + } +#endif + device.type = DEVICE_TYPE_INTEGRATED_GPU; + device.vendor = VENDOR_APPLE; + device.workarounds = Workarounds(); + + MetalDeviceProperties props(metal_device); + int version = (int)props.features.highestFamily - (int)MTLGPUFamilyApple1 + 1; + device.name = vformat("%s (Apple%d)", metal_device.name.UTF8String, version); + + return OK; +} + +const RenderingContextDriver::Device &RenderingContextDriverMetal::device_get(uint32_t p_device_index) const { + DEV_ASSERT(p_device_index < 1); + return device; +} + +uint32_t RenderingContextDriverMetal::device_get_count() const { + return 1; +} + +RenderingDeviceDriver *RenderingContextDriverMetal::driver_create() { + return memnew(RenderingDeviceDriverMetal(this)); +} + +void RenderingContextDriverMetal::driver_free(RenderingDeviceDriver *p_driver) { + memdelete(p_driver); +} + +RenderingContextDriver::SurfaceID RenderingContextDriverMetal::surface_create(const void *p_platform_data) { + const WindowPlatformData *wpd = (const WindowPlatformData *)(p_platform_data); + Surface *surface = memnew(SurfaceLayer(wpd->layer, metal_device)); + + return SurfaceID(surface); +} + +void RenderingContextDriverMetal::surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) { + Surface *surface = (Surface *)(p_surface); + if (surface->width == p_width && surface->height == p_height) { + return; + } + surface->width = p_width; + surface->height = p_height; + surface->needs_resize = true; +} + +void RenderingContextDriverMetal::surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) { + Surface *surface = (Surface *)(p_surface); + if (surface->vsync_mode == p_vsync_mode) { + return; + } + surface->vsync_mode = p_vsync_mode; + surface->needs_resize = true; +} + +DisplayServer::VSyncMode RenderingContextDriverMetal::surface_get_vsync_mode(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->vsync_mode; +} + +uint32_t RenderingContextDriverMetal::surface_get_width(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->width; +} + +uint32_t RenderingContextDriverMetal::surface_get_height(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->height; +} + +void RenderingContextDriverMetal::surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) { + Surface *surface = (Surface *)(p_surface); + surface->needs_resize = p_needs_resize; +} + +bool RenderingContextDriverMetal::surface_get_needs_resize(SurfaceID p_surface) const { + Surface *surface = (Surface *)(p_surface); + return surface->needs_resize; +} + +void RenderingContextDriverMetal::surface_destroy(SurfaceID p_surface) { + Surface *surface = (Surface *)(p_surface); + memdelete(surface); +} diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h new file mode 100644 index 0000000000..7c23624e43 --- /dev/null +++ b/drivers/metal/rendering_device_driver_metal.h @@ -0,0 +1,437 @@ +/**************************************************************************/ +/* rendering_device_driver_metal.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef RENDERING_DEVICE_DRIVER_METAL_H +#define RENDERING_DEVICE_DRIVER_METAL_H + +#import "metal_objects.h" + +#import "servers/rendering/rendering_device_driver.h" + +#import <Metal/Metal.h> +#import <spirv.hpp> +#import <variant> + +#ifdef DEBUG_ENABLED +#ifndef _DEBUG +#define _DEBUG +#endif +#endif + +class RenderingContextDriverMetal; + +class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public RenderingDeviceDriver { + friend struct ShaderCacheEntry; + + template <typename T> + using Result = std::variant<T, Error>; + +#pragma mark - Generic + + RenderingContextDriverMetal *context_driver = nullptr; + RenderingContextDriver::Device context_device; + id<MTLDevice> device = nil; + + uint32_t version_major = 2; + uint32_t version_minor = 0; + MetalDeviceProperties *metal_device_properties = nullptr; + PixelFormats *pixel_formats = nullptr; + std::unique_ptr<MDResourceCache> resource_cache; + + RDD::Capabilities capabilities; + RDD::MultiviewCapabilities multiview_capabilities; + + id<MTLBinaryArchive> archive = nil; + uint32_t archive_count = 0; + + id<MTLCommandQueue> device_queue = nil; + id<MTLCaptureScope> device_scope = nil; + + String pipeline_cache_id; + + Error _create_device(); + Error _check_capabilities(); + +#pragma mark - Shader Cache + + ShaderLoadStrategy _shader_load_strategy = ShaderLoadStrategy::DEFAULT; + + /** + * The shader cache is a map of hashes of the Metal source to shader cache entries. + * + * To prevent unbounded growth of the cache, cache entries are automatically freed when + * there are no more references to the MDLibrary associated with the cache entry. + */ + HashMap<SHA256Digest, ShaderCacheEntry *, HashableHasher<SHA256Digest>> _shader_cache; + void shader_cache_free_entry(const SHA256Digest &key); + +public: + Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override final; + +#pragma mark - Memory + +#pragma mark - Buffers + +public: + virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final; + virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final; + virtual void buffer_free(BufferID p_buffer) override final; + virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final; + virtual uint8_t *buffer_map(BufferID p_buffer) override final; + virtual void buffer_unmap(BufferID p_buffer) override final; + +#pragma mark - Texture + +private: + // Returns true if the texture is a valid linear format. + Result<bool> is_valid_linear(TextureFormat const &p_format) const; + void _get_sub_resource(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) const; + +public: + virtual TextureID texture_create(const TextureFormat &p_format, const TextureView &p_view) override final; + virtual TextureID texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil) override final; + virtual TextureID texture_create_shared(TextureID p_original_texture, const TextureView &p_view) override final; + virtual TextureID texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) override final; + virtual void texture_free(TextureID p_texture) override final; + virtual uint64_t texture_get_allocation_size(TextureID p_texture) override final; + virtual void texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) override final; + virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final; + virtual void texture_unmap(TextureID p_texture) override final; + virtual BitField<TextureUsageBits> texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; + virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; + +#pragma mark - Sampler + +public: + virtual SamplerID sampler_create(const SamplerState &p_state) final override; + virtual void sampler_free(SamplerID p_sampler) final override; + virtual bool sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) override final; + +#pragma mark - Vertex Array + +private: +public: + virtual VertexFormatID vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) override final; + virtual void vertex_format_free(VertexFormatID p_vertex_format) override final; + +#pragma mark - Barriers + + virtual void command_pipeline_barrier( + CommandBufferID p_cmd_buffer, + BitField<PipelineStageBits> p_src_stages, + BitField<PipelineStageBits> p_dst_stages, + VectorView<MemoryBarrier> p_memory_barriers, + VectorView<BufferBarrier> p_buffer_barriers, + VectorView<TextureBarrier> p_texture_barriers) override final; + +#pragma mark - Fences + +private: + struct Fence { + dispatch_semaphore_t semaphore; + Fence() : + semaphore(dispatch_semaphore_create(0)) {} + }; + +public: + virtual FenceID fence_create() override final; + virtual Error fence_wait(FenceID p_fence) override final; + virtual void fence_free(FenceID p_fence) override final; + +#pragma mark - Semaphores + +public: + virtual SemaphoreID semaphore_create() override final; + virtual void semaphore_free(SemaphoreID p_semaphore) override final; + +#pragma mark - Commands + // ----- QUEUE FAMILY ----- + + virtual CommandQueueFamilyID command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface = 0) override final; + + // ----- QUEUE ----- +public: + virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override final; + virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) override final; + virtual void command_queue_free(CommandQueueID p_cmd_queue) override final; + + // ----- POOL ----- + + virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final; + virtual void command_pool_free(CommandPoolID p_cmd_pool) override final; + + // ----- BUFFER ----- + +private: + // Used to maintain references. + Vector<MDCommandBuffer *> command_buffers; + +public: + virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final; + virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final; + virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final; + virtual void command_buffer_end(CommandBufferID p_cmd_buffer) override final; + virtual void command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) override final; + +#pragma mark - Swapchain + +private: + struct SwapChain { + RenderingContextDriver::SurfaceID surface = RenderingContextDriver::SurfaceID(); + RenderPassID render_pass; + RDD::DataFormat data_format = DATA_FORMAT_MAX; + SwapChain() : + render_pass(nullptr) {} + }; + + void _swap_chain_release(SwapChain *p_swap_chain); + void _swap_chain_release_buffers(SwapChain *p_swap_chain); + +public: + virtual SwapChainID swap_chain_create(RenderingContextDriver::SurfaceID p_surface) override final; + virtual Error swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) override final; + virtual FramebufferID swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) override final; + virtual RenderPassID swap_chain_get_render_pass(SwapChainID p_swap_chain) override final; + virtual DataFormat swap_chain_get_format(SwapChainID p_swap_chain) override final; + virtual void swap_chain_free(SwapChainID p_swap_chain) override final; + +#pragma mark - Frame Buffer + + virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) override final; + virtual void framebuffer_free(FramebufferID p_framebuffer) override final; + +#pragma mark - Shader + +private: + // Serialization types need access to private state. + + friend struct ShaderStageData; + friend struct SpecializationConstantData; + friend struct UniformData; + friend struct ShaderBinaryData; + friend struct PushConstantData; + +private: + Error _reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection); + +public: + virtual String shader_get_binary_cache_key() override final; + virtual Vector<uint8_t> shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) override final; + virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final; + virtual void shader_free(ShaderID p_shader) override final; + virtual void shader_destroy_modules(ShaderID p_shader) override final; + +#pragma mark - Uniform Set + +public: + virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void uniform_set_free(UniformSetID p_uniform_set) override final; + +#pragma mark - Commands + + virtual void command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + +#pragma mark Transfer + +private: + enum class CopySource { + Buffer, + Texture, + }; + void _copy_texture_buffer(CommandBufferID p_cmd_buffer, + CopySource p_source, + TextureID p_texture, + BufferID p_buffer, + VectorView<BufferTextureCopyRegion> p_regions); + +public: + virtual void command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) override final; + virtual void command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) override final; + + virtual void command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) override final; + virtual void command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) override final; + virtual void command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) override final; + + virtual void command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) override final; + virtual void command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) override final; + +#pragma mark Pipeline + +private: + Result<id<MTLFunction>> _create_function(MDLibrary *p_library, NSString *p_name, VectorView<PipelineSpecializationConstant> &p_specialization_constants); + +public: + virtual void pipeline_free(PipelineID p_pipeline_id) override final; + + // ----- BINDING ----- + + virtual void command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_first_index, VectorView<uint32_t> p_data) override final; + + // ----- CACHE ----- +private: + String _pipeline_get_cache_path() const; + +public: + virtual bool pipeline_cache_create(const Vector<uint8_t> &p_data) override final; + virtual void pipeline_cache_free() override final; + virtual size_t pipeline_cache_query_size() override final; + virtual Vector<uint8_t> pipeline_cache_serialize() override final; + +#pragma mark Rendering + + // ----- SUBPASS ----- + + virtual RenderPassID render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count) override final; + virtual void render_pass_free(RenderPassID p_render_pass) override final; + + // ----- COMMANDS ----- + +public: + virtual void command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) override final; + virtual void command_end_render_pass(CommandBufferID p_cmd_buffer) override final; + virtual void command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) override final; + virtual void command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) override final; + virtual void command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) override final; + virtual void command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) override final; + + // Binding. + virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final; + virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + + // Drawing. + virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final; + virtual void command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) override final; + virtual void command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; + virtual void command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; + virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + + // Buffer binding. + virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; + virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; + + // Dynamic state. + virtual void command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) override final; + virtual void command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) override final; + + // ----- PIPELINE ----- + + virtual PipelineID render_pipeline_create( + ShaderID p_shader, + VertexFormatID p_vertex_format, + RenderPrimitive p_render_primitive, + PipelineRasterizationState p_rasterization_state, + PipelineMultisampleState p_multisample_state, + PipelineDepthStencilState p_depth_stencil_state, + PipelineColorBlendState p_blend_state, + VectorView<int32_t> p_color_attachments, + BitField<PipelineDynamicStateFlags> p_dynamic_state, + RenderPassID p_render_pass, + uint32_t p_render_subpass, + VectorView<PipelineSpecializationConstant> p_specialization_constants) override final; + +#pragma mark - Compute + + // ----- COMMANDS ----- + + // Binding. + virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final; + virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + + // Dispatching. + virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final; + virtual void command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) override final; + + // ----- PIPELINE ----- + + virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) override final; + +#pragma mark - Queries + + // ----- TIMESTAMP ----- + + // Basic. + virtual QueryPoolID timestamp_query_pool_create(uint32_t p_query_count) override final; + virtual void timestamp_query_pool_free(QueryPoolID p_pool_id) override final; + virtual void timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) override final; + virtual uint64_t timestamp_query_result_to_time(uint64_t p_result) override final; + + // Commands. + virtual void command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) override final; + virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) override final; + +#pragma mark - Labels + + virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final; + virtual void command_end_label(CommandBufferID p_cmd_buffer) override final; + +#pragma mark - Debug + + virtual void command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) override final; + +#pragma mark - Submission + + virtual void begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) override final; + virtual void end_segment() override final; + +#pragma mark - Miscellaneous + + virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final; + virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final; + virtual uint64_t get_total_memory_used() override final; + virtual uint64_t limit_get(Limit p_limit) override final; + virtual uint64_t api_trait_get(ApiTrait p_trait) override final; + virtual bool has_feature(Features p_feature) override final; + virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual String get_api_name() const override final { return "Metal"; }; + virtual String get_api_version() const override final; + virtual String get_pipeline_cache_uuid() const override final; + virtual const Capabilities &get_capabilities() const override final; + virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final; + + // Metal-specific. + id<MTLDevice> get_device() const { return device; } + PixelFormats &get_pixel_formats() const { return *pixel_formats; } + MDResourceCache &get_resource_cache() const { return *resource_cache; } + MetalDeviceProperties const &get_device_properties() const { return *metal_device_properties; } + + _FORCE_INLINE_ uint32_t get_metal_buffer_index_for_vertex_attribute_binding(uint32_t p_binding) { + return (metal_device_properties->limits.maxPerStageBufferCount - 1) - p_binding; + } + + size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const; + size_t get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const; + + /******************/ + RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver); + ~RenderingDeviceDriverMetal(); +}; + +#endif // RENDERING_DEVICE_DRIVER_METAL_H diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm new file mode 100644 index 0000000000..9d691a0d23 --- /dev/null +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -0,0 +1,3965 @@ +/**************************************************************************/ +/* rendering_device_driver_metal.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/**************************************************************************/ +/* */ +/* Portions of this code were derived from MoltenVK. */ +/* */ +/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */ +/* (http://www.brenwill.com) */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/**************************************************************************/ + +#import "rendering_device_driver_metal.h" + +#import "pixel_formats.h" +#import "rendering_context_driver_metal.h" + +#import "core/io/compression.h" +#import "core/io/marshalls.h" +#import "core/string/ustring.h" +#import "core/templates/hash_map.h" + +#import <Metal/MTLTexture.h> +#import <Metal/Metal.h> +#import <os/log.h> +#import <os/signpost.h> +#import <spirv_msl.hpp> +#import <spirv_parser.hpp> + +#pragma mark - Logging + +os_log_t LOG_DRIVER; +// Used for dynamic tracing. +os_log_t LOG_INTERVALS; + +__attribute__((constructor)) static void InitializeLogging(void) { + LOG_DRIVER = os_log_create("org.godotengine.godot.metal", OS_LOG_CATEGORY_POINTS_OF_INTEREST); + LOG_INTERVALS = os_log_create("org.godotengine.godot.metal", "events"); +} + +/*****************/ +/**** GENERIC ****/ +/*****************/ + +// RDD::CompareOperator == VkCompareOp. +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, MTLCompareFunctionNever)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, MTLCompareFunctionLess)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_EQUAL, MTLCompareFunctionEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS_OR_EQUAL, MTLCompareFunctionLessEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER, MTLCompareFunctionGreater)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, MTLCompareFunctionNotEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTLCompareFunctionGreaterEqual)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTLCompareFunctionAlways)); + +_FORCE_INLINE_ MTLSize mipmapLevelSizeFromTexture(id<MTLTexture> p_tex, NSUInteger p_level) { + MTLSize lvlSize; + lvlSize.width = MAX(p_tex.width >> p_level, 1UL); + lvlSize.height = MAX(p_tex.height >> p_level, 1UL); + lvlSize.depth = MAX(p_tex.depth >> p_level, 1UL); + return lvlSize; +} + +_FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_level) { + if (p_level == 0) { + return p_size; + } + + MTLSize lvlSize; + lvlSize.width = MAX(p_size.width >> p_level, 1UL); + lvlSize.height = MAX(p_size.height >> p_level, 1UL); + lvlSize.depth = MAX(p_size.depth >> p_level, 1UL); + return lvlSize; +} + +_FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) { + return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth; +} + +/*****************/ +/**** BUFFERS ****/ +/*****************/ + +RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) { + MTLResourceOptions options = MTLResourceHazardTrackingModeTracked; + switch (p_allocation_type) { + case MEMORY_ALLOCATION_TYPE_CPU: + options |= MTLResourceStorageModeShared; + break; + case MEMORY_ALLOCATION_TYPE_GPU: + options |= MTLResourceStorageModePrivate; + break; + } + + id<MTLBuffer> obj = [device newBufferWithLength:p_size options:options]; + ERR_FAIL_NULL_V_MSG(obj, BufferID(), "Can't create buffer of size: " + itos(p_size)); + return rid::make(obj); +} + +bool RenderingDeviceDriverMetal::buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) { + // Nothing to do. + return true; +} + +void RenderingDeviceDriverMetal::buffer_free(BufferID p_buffer) { + rid::release(p_buffer); +} + +uint64_t RenderingDeviceDriverMetal::buffer_get_allocation_size(BufferID p_buffer) { + id<MTLBuffer> obj = rid::get(p_buffer); + return obj.allocatedSize; +} + +uint8_t *RenderingDeviceDriverMetal::buffer_map(BufferID p_buffer) { + id<MTLBuffer> obj = rid::get(p_buffer); + ERR_FAIL_COND_V_MSG(obj.storageMode != MTLStorageModeShared, nullptr, "Unable to map private buffers"); + return (uint8_t *)obj.contents; +} + +void RenderingDeviceDriverMetal::buffer_unmap(BufferID p_buffer) { + // Nothing to do. +} + +#pragma mark - Texture + +#pragma mark - Format Conversions + +static const MTLTextureType TEXTURE_TYPE[RD::TEXTURE_TYPE_MAX] = { + MTLTextureType1D, + MTLTextureType2D, + MTLTextureType3D, + MTLTextureTypeCube, + MTLTextureType1DArray, + MTLTextureType2DArray, + MTLTextureTypeCubeArray, +}; + +RenderingDeviceDriverMetal::Result<bool> RenderingDeviceDriverMetal::is_valid_linear(TextureFormat const &p_format) const { + if (!flags::any(p_format.usage_bits, TEXTURE_USAGE_CPU_READ_BIT)) { + return false; + } + + PixelFormats &pf = *pixel_formats; + MTLFormatType ft = pf.getFormatType(p_format.format); + + // Requesting a linear format, which has further restrictions, similar to Vulkan + // when specifying VK_IMAGE_TILING_LINEAR. + + ERR_FAIL_COND_V_MSG(p_format.texture_type != TEXTURE_TYPE_2D, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must be 2D"); + ERR_FAIL_COND_V_MSG(ft != MTLFormatType::DepthStencil, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must not be a depth/stencil format"); + ERR_FAIL_COND_V_MSG(ft != MTLFormatType::Compressed, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must not be a compressed format"); + ERR_FAIL_COND_V_MSG(p_format.mipmaps != 1, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must have 1 mipmap level"); + ERR_FAIL_COND_V_MSG(p_format.array_layers != 1, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must have 1 array layer"); + ERR_FAIL_COND_V_MSG(p_format.samples != TEXTURE_SAMPLES_1, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must have 1 sample"); + + return true; +} + +RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p_format, const TextureView &p_view) { + MTLTextureDescriptor *desc = [MTLTextureDescriptor new]; + desc.textureType = TEXTURE_TYPE[p_format.texture_type]; + + PixelFormats &formats = *pixel_formats; + desc.pixelFormat = formats.getMTLPixelFormat(p_format.format); + MTLFmtCaps format_caps = formats.getCapabilities(desc.pixelFormat); + + desc.width = p_format.width; + desc.height = p_format.height; + desc.depth = p_format.depth; + desc.mipmapLevelCount = p_format.mipmaps; + + if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY || + p_format.texture_type == TEXTURE_TYPE_2D_ARRAY) { + desc.arrayLength = p_format.array_layers; + } else if (p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) { + desc.arrayLength = p_format.array_layers / 6; + } + + // TODO(sgc): Evaluate lossy texture support (perhaps as a project option?) + // https://developer.apple.com/videos/play/tech-talks/10876?time=459 + // desc.compressionType = MTLTextureCompressionTypeLossy; + + if (p_format.samples > TEXTURE_SAMPLES_1) { + SampleCount supported = (*metal_device_properties).find_nearest_supported_sample_count(p_format.samples); + + if (supported > SampleCount1) { + bool ok = p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY; + if (ok) { + switch (p_format.texture_type) { + case TEXTURE_TYPE_2D: + desc.textureType = MTLTextureType2DMultisample; + break; + case TEXTURE_TYPE_2D_ARRAY: + desc.textureType = MTLTextureType2DMultisampleArray; + break; + default: + break; + } + desc.sampleCount = (NSUInteger)supported; + if (p_format.mipmaps > 1) { + // For a buffer-backed or multi-sample texture, the value must be 1. + WARN_PRINT("mipmaps == 1 for multi-sample textures"); + desc.mipmapLevelCount = 1; + } + } else { + WARN_PRINT("Unsupported multi-sample texture type; disabling multi-sample"); + } + } + } + + static const MTLTextureSwizzle COMPONENT_SWIZZLE[TEXTURE_SWIZZLE_MAX] = { + static_cast<MTLTextureSwizzle>(255), // IDENTITY + MTLTextureSwizzleZero, + MTLTextureSwizzleOne, + MTLTextureSwizzleRed, + MTLTextureSwizzleGreen, + MTLTextureSwizzleBlue, + MTLTextureSwizzleAlpha, + }; + + MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake( + p_view.swizzle_r != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_r] : MTLTextureSwizzleRed, + p_view.swizzle_g != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_g] : MTLTextureSwizzleGreen, + p_view.swizzle_b != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_b] : MTLTextureSwizzleBlue, + p_view.swizzle_a != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_a] : MTLTextureSwizzleAlpha); + + // Represents a swizzle operation that is a no-op. + static MTLTextureSwizzleChannels IDENTITY_SWIZZLE = { + .red = MTLTextureSwizzleRed, + .green = MTLTextureSwizzleGreen, + .blue = MTLTextureSwizzleBlue, + .alpha = MTLTextureSwizzleAlpha, + }; + + bool no_swizzle = memcmp(&IDENTITY_SWIZZLE, &swizzle, sizeof(MTLTextureSwizzleChannels)) == 0; + if (!no_swizzle) { + desc.swizzle = swizzle; + } + + // Usage. + MTLResourceOptions options = MTLResourceCPUCacheModeDefaultCache | MTLResourceHazardTrackingModeTracked; + if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) { + options |= MTLResourceStorageModeShared; + } else { + options |= MTLResourceStorageModePrivate; + } + desc.resourceOptions = options; + + if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) { + desc.usage |= MTLTextureUsageShaderRead; + } + + if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) { + desc.usage |= MTLTextureUsageShaderWrite; + } + + if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) { + desc.usage |= MTLTextureUsageShaderWrite; + } + + bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt)); + + if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && + can_be_attachment) { + desc.usage |= MTLTextureUsageRenderTarget; + } + + if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) { + desc.usage |= MTLTextureUsageShaderRead; + } + + if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) { + ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT"); + } + + if (flags::any(p_format.usage_bits, TEXTURE_USAGE_CAN_UPDATE_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT) && + can_be_attachment && no_swizzle) { + // Per MoltenVK, can be cleared as a render attachment. + desc.usage |= MTLTextureUsageRenderTarget; + } + if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT) { + // Covered by blits. + } + + // Create texture views with a different component layout. + if (!p_format.shareable_formats.is_empty()) { + desc.usage |= MTLTextureUsagePixelFormatView; + } + + // Allocate memory. + + bool is_linear; + { + Result<bool> is_linear_or_err = is_valid_linear(p_format); + ERR_FAIL_COND_V(std::holds_alternative<Error>(is_linear_or_err), TextureID()); + is_linear = std::get<bool>(is_linear_or_err); + } + + // Check if it is a linear format for atomic operations and therefore needs a buffer, + // as generally Metal does not support atomic operations on textures. + bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT)); + + id<MTLTexture> obj = nil; + if (needs_buffer) { + // Linear textures are restricted to 2D textures, a single mipmap level and a single array layer. + MTLPixelFormat pixel_format = desc.pixelFormat; + size_t row_alignment = get_texel_buffer_alignment_for_format(p_format.format); + size_t bytes_per_row = formats.getBytesPerRow(pixel_format, p_format.width); + bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment); + size_t bytes_per_layer = formats.getBytesPerLayer(pixel_format, bytes_per_row, p_format.height); + size_t byte_count = bytes_per_layer * p_format.depth * p_format.array_layers; + + id<MTLBuffer> buf = [device newBufferWithLength:byte_count options:options]; + obj = [buf newTextureWithDescriptor:desc offset:0 bytesPerRow:bytes_per_row]; + } else { + obj = [device newTextureWithDescriptor:desc]; + } + ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create texture."); + + return rid::make(obj); +} + +RDD::TextureID RenderingDeviceDriverMetal::texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil) { + ERR_FAIL_V_MSG(RDD::TextureID(), "not implemented"); +} + +RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) { + id<MTLTexture> src_texture = rid::get(p_original_texture); + +#if DEV_ENABLED + if (src_texture.sampleCount > 1) { + // TODO(sgc): is it ok to create a shared texture from a multi-sample texture? + WARN_PRINT("Is it safe to create a shared texture from multi-sample texture?"); + } +#endif + + MTLPixelFormat format = pixel_formats->getMTLPixelFormat(p_view.format); + + static const MTLTextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = { + static_cast<MTLTextureSwizzle>(255), // IDENTITY + MTLTextureSwizzleZero, + MTLTextureSwizzleOne, + MTLTextureSwizzleRed, + MTLTextureSwizzleGreen, + MTLTextureSwizzleBlue, + MTLTextureSwizzleAlpha, + }; + +#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTLTextureSwizzle##CHAN) + MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake( + SWIZZLE(r, Red), + SWIZZLE(g, Green), + SWIZZLE(b, Blue), + SWIZZLE(a, Alpha)); +#undef SWIZZLE + id<MTLTexture> obj = [src_texture newTextureViewWithPixelFormat:format + textureType:src_texture.textureType + levels:NSMakeRange(0, src_texture.mipmapLevelCount) + slices:NSMakeRange(0, src_texture.arrayLength) + swizzle:swizzle]; + ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture"); + return rid::make(obj); +} + +RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) { + id<MTLTexture> src_texture = rid::get(p_original_texture); + + static const MTLTextureType VIEW_TYPES[] = { + MTLTextureType1D, // MTLTextureType1D + MTLTextureType1D, // MTLTextureType1DArray + MTLTextureType2D, // MTLTextureType2D + MTLTextureType2D, // MTLTextureType2DArray + MTLTextureType2D, // MTLTextureType2DMultisample + MTLTextureType2D, // MTLTextureTypeCube + MTLTextureType2D, // MTLTextureTypeCubeArray + MTLTextureType2D, // MTLTextureType3D + MTLTextureType2D, // MTLTextureType2DMultisampleArray + }; + + MTLTextureType textureType = VIEW_TYPES[src_texture.textureType]; + switch (p_slice_type) { + case TEXTURE_SLICE_2D: { + textureType = MTLTextureType2D; + } break; + case TEXTURE_SLICE_3D: { + textureType = MTLTextureType3D; + } break; + case TEXTURE_SLICE_CUBEMAP: { + textureType = MTLTextureTypeCube; + } break; + case TEXTURE_SLICE_2D_ARRAY: { + textureType = MTLTextureType2DArray; + } break; + case TEXTURE_SLICE_MAX: { + ERR_FAIL_V_MSG(TextureID(), "Invalid texture slice type"); + } break; + } + + MTLPixelFormat format = pixel_formats->getMTLPixelFormat(p_view.format); + + static const MTLTextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = { + static_cast<MTLTextureSwizzle>(255), // IDENTITY + MTLTextureSwizzleZero, + MTLTextureSwizzleOne, + MTLTextureSwizzleRed, + MTLTextureSwizzleGreen, + MTLTextureSwizzleBlue, + MTLTextureSwizzleAlpha, + }; + +#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTLTextureSwizzle##CHAN) + MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake( + SWIZZLE(r, Red), + SWIZZLE(g, Green), + SWIZZLE(b, Blue), + SWIZZLE(a, Alpha)); +#undef SWIZZLE + id<MTLTexture> obj = [src_texture newTextureViewWithPixelFormat:format + textureType:textureType + levels:NSMakeRange(p_mipmap, p_mipmaps) + slices:NSMakeRange(p_layer, p_layers) + swizzle:swizzle]; + ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture"); + return rid::make(obj); +} + +void RenderingDeviceDriverMetal::texture_free(TextureID p_texture) { + rid::release(p_texture); +} + +uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_texture) { + id<MTLTexture> obj = rid::get(p_texture); + return obj.allocatedSize; +} + +void RenderingDeviceDriverMetal::_get_sub_resource(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) const { + id<MTLTexture> obj = rid::get(p_texture); + + *r_layout = {}; + + PixelFormats &pf = *pixel_formats; + + size_t row_alignment = get_texel_buffer_alignment_for_format(obj.pixelFormat); + size_t offset = 0; + size_t array_layers = obj.arrayLength; + MTLSize size = MTLSizeMake(obj.width, obj.height, obj.depth); + MTLPixelFormat pixel_format = obj.pixelFormat; + + // First skip over the mipmap levels. + for (uint32_t mipLvl = 0; mipLvl < p_subresource.mipmap; mipLvl++) { + MTLSize mip_size = mipmapLevelSizeFromSize(size, mipLvl); + size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mip_size.width); + bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment); + size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mip_size.height); + offset += bytes_per_layer * mip_size.depth * array_layers; + } + + // Get current mipmap. + MTLSize mip_size = mipmapLevelSizeFromSize(size, p_subresource.mipmap); + size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mip_size.width); + bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment); + size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mip_size.height); + r_layout->size = bytes_per_layer * mip_size.depth; + r_layout->offset = offset + (r_layout->size * p_subresource.layer - 1); + r_layout->depth_pitch = bytes_per_layer; + r_layout->row_pitch = bytes_per_row; + r_layout->layer_pitch = r_layout->size * array_layers; +} + +void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) { + id<MTLTexture> obj = rid::get(p_texture); + *r_layout = {}; + + if ((obj.resourceOptions & MTLResourceStorageModePrivate) != 0) { + MTLSize sz = MTLSizeMake(obj.width, obj.height, obj.depth); + + PixelFormats &pf = *pixel_formats; + DataFormat format = pf.getDataFormat(obj.pixelFormat); + if (p_subresource.mipmap > 0) { + r_layout->offset = get_image_format_required_size(format, sz.width, sz.height, sz.depth, p_subresource.mipmap); + } + + sz = mipmapLevelSizeFromSize(sz, p_subresource.mipmap); + + uint32_t bw = 0, bh = 0; + get_compressed_image_format_block_dimensions(format, bw, bh); + uint32_t sbw = 0, sbh = 0; + r_layout->size = get_image_format_required_size(format, sz.width, sz.height, sz.depth, 1, &sbw, &sbh); + r_layout->row_pitch = r_layout->size / ((sbh / bh) * sz.depth); + r_layout->depth_pitch = r_layout->size / sz.depth; + r_layout->layer_pitch = r_layout->size / obj.arrayLength; + } else { + CRASH_NOW_MSG("need to calculate layout for shared texture"); + } +} + +uint8_t *RenderingDeviceDriverMetal::texture_map(TextureID p_texture, const TextureSubresource &p_subresource) { + id<MTLTexture> obj = rid::get(p_texture); + ERR_FAIL_NULL_V_MSG(obj.buffer, nullptr, "texture is not created from a buffer"); + + TextureCopyableLayout layout; + _get_sub_resource(p_texture, p_subresource, &layout); + return (uint8_t *)(obj.buffer.contents) + layout.offset; + PixelFormats &pf = *pixel_formats; + + size_t row_alignment = get_texel_buffer_alignment_for_format(obj.pixelFormat); + size_t offset = 0; + size_t array_layers = obj.arrayLength; + MTLSize size = MTLSizeMake(obj.width, obj.height, obj.depth); + MTLPixelFormat pixel_format = obj.pixelFormat; + + // First skip over the mipmap levels. + for (uint32_t mipLvl = 0; mipLvl < p_subresource.mipmap; mipLvl++) { + MTLSize mipExtent = mipmapLevelSizeFromSize(size, mipLvl); + size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mipExtent.width); + bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment); + size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mipExtent.height); + offset += bytes_per_layer * mipExtent.depth * array_layers; + } + + if (p_subresource.layer > 1) { + // Calculate offset to desired layer. + MTLSize mipExtent = mipmapLevelSizeFromSize(size, p_subresource.mipmap); + size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mipExtent.width); + bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment); + size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mipExtent.height); + offset += bytes_per_layer * mipExtent.depth * (p_subresource.layer - 1); + } + + // TODO: Confirm with rendering team that there is no other way Godot may attempt to map a texture with multiple mipmaps or array layers. + + // NOTE: It is not possible to create a buffer-backed texture with mipmaps or array layers, + // as noted in the is_valid_linear function, so the offset calculation SHOULD always be zero. + // Given that, this code should be simplified. + + return (uint8_t *)(obj.buffer.contents) + offset; +} + +void RenderingDeviceDriverMetal::texture_unmap(TextureID p_texture) { + // Nothing to do. +} + +BitField<RDD::TextureUsageBits> RenderingDeviceDriverMetal::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { + PixelFormats &pf = *pixel_formats; + if (pf.getMTLPixelFormat(p_format) == MTLPixelFormatInvalid) { + return 0; + } + + MTLFmtCaps caps = pf.getCapabilities(p_format); + + // Everything supported by default makes an all-or-nothing check easier for the caller. + BitField<RDD::TextureUsageBits> supported = INT64_MAX; + supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT); // No VRS support for Metal. + + if (!flags::any(caps, kMTLFmtCapsColorAtt)) { + supported.clear_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT); + } + if (!flags::any(caps, kMTLFmtCapsDSAtt)) { + supported.clear_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + } + if (!flags::any(caps, kMTLFmtCapsRead)) { + supported.clear_flag(TEXTURE_USAGE_SAMPLING_BIT); + } + if (!flags::any(caps, kMTLFmtCapsAtomic)) { + supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT); + } + + return supported; +} + +bool RenderingDeviceDriverMetal::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) { + r_raw_reinterpretation = false; + return true; +} + +#pragma mark - Sampler + +static const MTLCompareFunction COMPARE_OPERATORS[RD::COMPARE_OP_MAX] = { + MTLCompareFunctionNever, + MTLCompareFunctionLess, + MTLCompareFunctionEqual, + MTLCompareFunctionLessEqual, + MTLCompareFunctionGreater, + MTLCompareFunctionNotEqual, + MTLCompareFunctionGreaterEqual, + MTLCompareFunctionAlways, +}; + +static const MTLStencilOperation STENCIL_OPERATIONS[RD::STENCIL_OP_MAX] = { + MTLStencilOperationKeep, + MTLStencilOperationZero, + MTLStencilOperationReplace, + MTLStencilOperationIncrementClamp, + MTLStencilOperationDecrementClamp, + MTLStencilOperationInvert, + MTLStencilOperationIncrementWrap, + MTLStencilOperationDecrementWrap, +}; + +static const MTLBlendFactor BLEND_FACTORS[RD::BLEND_FACTOR_MAX] = { + MTLBlendFactorZero, + MTLBlendFactorOne, + MTLBlendFactorSourceColor, + MTLBlendFactorOneMinusSourceColor, + MTLBlendFactorDestinationColor, + MTLBlendFactorOneMinusDestinationColor, + MTLBlendFactorSourceAlpha, + MTLBlendFactorOneMinusSourceAlpha, + MTLBlendFactorDestinationAlpha, + MTLBlendFactorOneMinusDestinationAlpha, + MTLBlendFactorBlendColor, + MTLBlendFactorOneMinusBlendColor, + MTLBlendFactorBlendAlpha, + MTLBlendFactorOneMinusBlendAlpha, + MTLBlendFactorSourceAlphaSaturated, + MTLBlendFactorSource1Color, + MTLBlendFactorOneMinusSource1Color, + MTLBlendFactorSource1Alpha, + MTLBlendFactorOneMinusSource1Alpha, +}; +static const MTLBlendOperation BLEND_OPERATIONS[RD::BLEND_OP_MAX] = { + MTLBlendOperationAdd, + MTLBlendOperationSubtract, + MTLBlendOperationReverseSubtract, + MTLBlendOperationMin, + MTLBlendOperationMax, +}; + +static const API_AVAILABLE(macos(11.0), ios(14.0)) MTLSamplerAddressMode ADDRESS_MODES[RD::SAMPLER_REPEAT_MODE_MAX] = { + MTLSamplerAddressModeRepeat, + MTLSamplerAddressModeMirrorRepeat, + MTLSamplerAddressModeClampToEdge, + MTLSamplerAddressModeClampToBorderColor, + MTLSamplerAddressModeMirrorClampToEdge, +}; + +static const API_AVAILABLE(macos(11.0), ios(14.0)) MTLSamplerBorderColor SAMPLER_BORDER_COLORS[RD::SAMPLER_BORDER_COLOR_MAX] = { + MTLSamplerBorderColorTransparentBlack, + MTLSamplerBorderColorTransparentBlack, + MTLSamplerBorderColorOpaqueBlack, + MTLSamplerBorderColorOpaqueBlack, + MTLSamplerBorderColorOpaqueWhite, + MTLSamplerBorderColorOpaqueWhite, +}; + +RDD::SamplerID RenderingDeviceDriverMetal::sampler_create(const SamplerState &p_state) { + MTLSamplerDescriptor *desc = [MTLSamplerDescriptor new]; + desc.supportArgumentBuffers = YES; + + desc.magFilter = p_state.mag_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest; + desc.minFilter = p_state.min_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest; + desc.mipFilter = p_state.mip_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMipFilterLinear : MTLSamplerMipFilterNearest; + + desc.sAddressMode = ADDRESS_MODES[p_state.repeat_u]; + desc.tAddressMode = ADDRESS_MODES[p_state.repeat_v]; + desc.rAddressMode = ADDRESS_MODES[p_state.repeat_w]; + + if (p_state.use_anisotropy) { + desc.maxAnisotropy = p_state.anisotropy_max; + } + + desc.compareFunction = COMPARE_OPERATORS[p_state.compare_op]; + + desc.lodMinClamp = p_state.min_lod; + desc.lodMaxClamp = p_state.max_lod; + + desc.borderColor = SAMPLER_BORDER_COLORS[p_state.border_color]; + + desc.normalizedCoordinates = !p_state.unnormalized_uvw; + + if (p_state.lod_bias != 0.0) { + WARN_VERBOSE("Metal does not support LOD bias for samplers."); + } + + id<MTLSamplerState> obj = [device newSamplerStateWithDescriptor:desc]; + ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerStateWithDescriptor failed"); + return rid::make(obj); +} + +void RenderingDeviceDriverMetal::sampler_free(SamplerID p_sampler) { + rid::release(p_sampler); +} + +bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) { + switch (p_filter) { + case SAMPLER_FILTER_NEAREST: + return true; + case SAMPLER_FILTER_LINEAR: { + MTLFmtCaps caps = pixel_formats->getCapabilities(p_format); + return flags::any(caps, kMTLFmtCapsFilter); + } + } +} + +#pragma mark - Vertex Array + +RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) { + MTLVertexDescriptor *desc = MTLVertexDescriptor.vertexDescriptor; + + for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) { + VertexAttribute const &vf = p_vertex_attribs[i]; + + ERR_FAIL_COND_V_MSG(get_format_vertex_size(vf.format) == 0, VertexFormatID(), + "Data format for attachment (" + itos(i) + "), '" + FORMAT_NAMES[vf.format] + "', is not valid for a vertex array."); + + desc.attributes[vf.location].format = pixel_formats->getMTLVertexFormat(vf.format); + desc.attributes[vf.location].offset = vf.offset; + uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(i); + desc.attributes[vf.location].bufferIndex = idx; + if (vf.stride == 0) { + desc.layouts[idx].stepFunction = MTLVertexStepFunctionConstant; + desc.layouts[idx].stepRate = 0; + desc.layouts[idx].stride = pixel_formats->getBytesPerBlock(vf.format); + } else { + desc.layouts[idx].stepFunction = vf.frequency == VERTEX_FREQUENCY_VERTEX ? MTLVertexStepFunctionPerVertex : MTLVertexStepFunctionPerInstance; + desc.layouts[idx].stepRate = 1; + desc.layouts[idx].stride = vf.stride; + } + } + + return rid::make(desc); +} + +void RenderingDeviceDriverMetal::vertex_format_free(VertexFormatID p_vertex_format) { + rid::release(p_vertex_format); +} + +#pragma mark - Barriers + +void RenderingDeviceDriverMetal::command_pipeline_barrier( + CommandBufferID p_cmd_buffer, + BitField<PipelineStageBits> p_src_stages, + BitField<PipelineStageBits> p_dst_stages, + VectorView<MemoryBarrier> p_memory_barriers, + VectorView<BufferBarrier> p_buffer_barriers, + VectorView<TextureBarrier> p_texture_barriers) { + WARN_PRINT_ONCE("not implemented"); +} + +#pragma mark - Fences + +RDD::FenceID RenderingDeviceDriverMetal::fence_create() { + Fence *fence = memnew(Fence); + return FenceID(fence); +} + +Error RenderingDeviceDriverMetal::fence_wait(FenceID p_fence) { + Fence *fence = (Fence *)(p_fence.id); + + // Wait forever, so this function is infallible. + dispatch_semaphore_wait(fence->semaphore, DISPATCH_TIME_FOREVER); + + return OK; +} + +void RenderingDeviceDriverMetal::fence_free(FenceID p_fence) { + Fence *fence = (Fence *)(p_fence.id); + memdelete(fence); +} + +#pragma mark - Semaphores + +RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create() { + // Metal doesn't use semaphores, as their purpose within Godot is to ensure ordering of command buffer execution. + return SemaphoreID(1); +} + +void RenderingDeviceDriverMetal::semaphore_free(SemaphoreID p_semaphore) { +} + +#pragma mark - Queues + +RDD::CommandQueueFamilyID RenderingDeviceDriverMetal::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) { + if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT) || (p_surface != 0)) { + return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT); + } else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_COMPUTE_BIT)) { + return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_COMPUTE_BIT); + } else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_TRANSFER_BIT)) { + return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_TRANSFER_BIT); + } else { + return CommandQueueFamilyID(); + } +} + +RDD::CommandQueueID RenderingDeviceDriverMetal::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) { + return CommandQueueID(1); +} + +Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID>, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID>, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) { + uint32_t size = p_cmd_buffers.size(); + if (size == 0) { + return OK; + } + + for (uint32_t i = 0; i < size - 1; i++) { + MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[i].id); + cmd_buffer->commit(); + } + + // The last command buffer will signal the fence and semaphores. + MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id); + Fence *fence = (Fence *)(p_cmd_fence.id); + if (fence != nullptr) { + [cmd_buffer->get_command_buffer() addCompletedHandler:^(id<MTLCommandBuffer> buffer) { + dispatch_semaphore_signal(fence->semaphore); + }]; + } + + for (uint32_t i = 0; i < p_swap_chains.size(); i++) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id); + RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface); + metal_surface->present(cmd_buffer); + } + + cmd_buffer->commit(); + + if (p_swap_chains.size() > 0) { + // Used as a signal that we're presenting, so this is the end of a frame. + [device_scope endScope]; + [device_scope beginScope]; + } + + return OK; +} + +void RenderingDeviceDriverMetal::command_queue_free(CommandQueueID p_cmd_queue) { +} + +#pragma mark - Command Buffers + +// ----- POOL ----- + +RDD::CommandPoolID RenderingDeviceDriverMetal::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) { + DEV_ASSERT(p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY); + return rid::make(device_queue); +} + +void RenderingDeviceDriverMetal::command_pool_free(CommandPoolID p_cmd_pool) { + rid::release(p_cmd_pool); +} + +// ----- BUFFER ----- + +RDD::CommandBufferID RenderingDeviceDriverMetal::command_buffer_create(CommandPoolID p_cmd_pool) { + id<MTLCommandQueue> queue = rid::get(p_cmd_pool); + MDCommandBuffer *obj = new MDCommandBuffer(queue, this); + command_buffers.push_back(obj); + return CommandBufferID(obj); +} + +bool RenderingDeviceDriverMetal::command_buffer_begin(CommandBufferID p_cmd_buffer) { + MDCommandBuffer *obj = (MDCommandBuffer *)(p_cmd_buffer.id); + obj->begin(); + return true; +} + +bool RenderingDeviceDriverMetal::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) { + ERR_FAIL_V_MSG(false, "not implemented"); +} + +void RenderingDeviceDriverMetal::command_buffer_end(CommandBufferID p_cmd_buffer) { + MDCommandBuffer *obj = (MDCommandBuffer *)(p_cmd_buffer.id); + obj->end(); +} + +void RenderingDeviceDriverMetal::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) { + ERR_FAIL_MSG("not implemented"); +} + +#pragma mark - Swap Chain + +void RenderingDeviceDriverMetal::_swap_chain_release(SwapChain *p_swap_chain) { + _swap_chain_release_buffers(p_swap_chain); +} + +void RenderingDeviceDriverMetal::_swap_chain_release_buffers(SwapChain *p_swap_chain) { +} + +RDD::SwapChainID RenderingDeviceDriverMetal::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) { + RenderingContextDriverMetal::Surface const *surface = (RenderingContextDriverMetal::Surface *)(p_surface); + + // Create the render pass that will be used to draw to the swap chain's framebuffers. + RDD::Attachment attachment; + attachment.format = pixel_formats->getDataFormat(surface->get_pixel_format()); + attachment.samples = RDD::TEXTURE_SAMPLES_1; + attachment.load_op = RDD::ATTACHMENT_LOAD_OP_CLEAR; + attachment.store_op = RDD::ATTACHMENT_STORE_OP_STORE; + + RDD::Subpass subpass; + RDD::AttachmentReference color_ref; + color_ref.attachment = 0; + color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT); + subpass.color_references.push_back(color_ref); + + RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1); + ERR_FAIL_COND_V(!render_pass, SwapChainID()); + + // Create the empty swap chain until it is resized. + SwapChain *swap_chain = memnew(SwapChain); + swap_chain->surface = p_surface; + swap_chain->data_format = attachment.format; + swap_chain->render_pass = render_pass; + return SwapChainID(swap_chain); +} + +Error RenderingDeviceDriverMetal::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) { + DEV_ASSERT(p_cmd_queue.id != 0); + DEV_ASSERT(p_swap_chain.id != 0); + + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + RenderingContextDriverMetal::Surface *surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface); + surface->resize(p_desired_framebuffer_count); + + // Once everything's been created correctly, indicate the surface no longer needs to be resized. + context_driver->surface_set_needs_resize(swap_chain->surface, false); + + return OK; +} + +RDD::FramebufferID RenderingDeviceDriverMetal::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) { + DEV_ASSERT(p_cmd_queue.id != 0); + DEV_ASSERT(p_swap_chain.id != 0); + + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + if (context_driver->surface_get_needs_resize(swap_chain->surface)) { + r_resize_required = true; + return FramebufferID(); + } + + RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface); + return metal_surface->acquire_next_frame_buffer(); +} + +RDD::RenderPassID RenderingDeviceDriverMetal::swap_chain_get_render_pass(SwapChainID p_swap_chain) { + const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id); + return swap_chain->render_pass; +} + +RDD::DataFormat RenderingDeviceDriverMetal::swap_chain_get_format(SwapChainID p_swap_chain) { + const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id); + return swap_chain->data_format; +} + +void RenderingDeviceDriverMetal::swap_chain_free(SwapChainID p_swap_chain) { + SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); + _swap_chain_release(swap_chain); + render_pass_free(swap_chain->render_pass); + memdelete(swap_chain); +} + +#pragma mark - Frame buffer + +RDD::FramebufferID RenderingDeviceDriverMetal::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) { + MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); + + Vector<MTL::Texture> textures; + textures.resize(p_attachments.size()); + + for (uint32_t i = 0; i < p_attachments.size(); i += 1) { + MDAttachment const &a = pass->attachments[i]; + id<MTLTexture> tex = rid::get(p_attachments[i]); + if (tex == nil) { +#if DEV_ENABLED + WARN_PRINT("Invalid texture for attachment " + itos(i)); +#endif + } + if (a.samples > 1) { + if (tex.sampleCount != a.samples) { +#if DEV_ENABLED + WARN_PRINT("Mismatched sample count for attachment " + itos(i) + "; expected " + itos(a.samples) + ", got " + itos(tex.sampleCount)); +#endif + } + } + textures.write[i] = tex; + } + + MDFrameBuffer *fb = new MDFrameBuffer(textures, Size2i(p_width, p_height)); + return FramebufferID(fb); +} + +void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) { + MDFrameBuffer *obj = (MDFrameBuffer *)(p_framebuffer.id); + delete obj; +} + +#pragma mark - Shader + +const uint32_t SHADER_BINARY_VERSION = 1; + +// region Serialization + +class BufWriter; + +template <typename T> +concept Serializable = requires(T t, BufWriter &p_writer) { + { + t.serialize_size() + } -> std::same_as<size_t>; + { + t.serialize(p_writer) + } -> std::same_as<void>; +}; + +class BufWriter { + uint8_t *data = nullptr; + uint64_t length = 0; // Length of data. + uint64_t pos = 0; + +public: + BufWriter(uint8_t *p_data, uint64_t p_length) : + data(p_data), length(p_length) {} + + template <Serializable T> + void write(T const &p_value) { + p_value.serialize(*this); + } + + _FORCE_INLINE_ void write(uint32_t p_value) { + DEV_ASSERT(pos + sizeof(uint32_t) <= length); + pos += encode_uint32(p_value, data + pos); + } + + _FORCE_INLINE_ void write(RD::ShaderStage p_value) { + write((uint32_t)p_value); + } + + _FORCE_INLINE_ void write(bool p_value) { + DEV_ASSERT(pos + sizeof(uint8_t) <= length); + *(data + pos) = p_value ? 1 : 0; + pos += 1; + } + + _FORCE_INLINE_ void write(int p_value) { + write((uint32_t)p_value); + } + + _FORCE_INLINE_ void write(uint64_t p_value) { + DEV_ASSERT(pos + sizeof(uint64_t) <= length); + pos += encode_uint64(p_value, data + pos); + } + + _FORCE_INLINE_ void write(float p_value) { + DEV_ASSERT(pos + sizeof(float) <= length); + pos += encode_float(p_value, data + pos); + } + + _FORCE_INLINE_ void write(double p_value) { + DEV_ASSERT(pos + sizeof(double) <= length); + pos += encode_double(p_value, data + pos); + } + + void write_compressed(CharString const &p_string) { + write(p_string.length()); // Uncompressed size. + + DEV_ASSERT(pos + sizeof(uint32_t) + Compression::get_max_compressed_buffer_size(p_string.length(), Compression::MODE_ZSTD) <= length); + + // Save pointer for compressed size. + uint8_t *dst_size_ptr = data + pos; // Compressed size. + pos += sizeof(uint32_t); + + int dst_size = Compression::compress(data + pos, reinterpret_cast<uint8_t const *>(p_string.ptr()), p_string.length(), Compression::MODE_ZSTD); + encode_uint32(dst_size, dst_size_ptr); + pos += dst_size; + } + + void write(CharString const &p_string) { + write_buffer(reinterpret_cast<const uint8_t *>(p_string.ptr()), p_string.length()); + } + + template <typename T> + void write(VectorView<T> p_vector) { + write(p_vector.size()); + for (uint32_t i = 0; i < p_vector.size(); i++) { + T const &e = p_vector[i]; + write(e); + } + } + + void write(VectorView<uint8_t> p_vector) { + write_buffer(p_vector.ptr(), p_vector.size()); + } + + template <typename K, typename V> + void write(HashMap<K, V> const &p_map) { + write(p_map.size()); + for (KeyValue<K, V> const &e : p_map) { + write(e.key); + write(e.value); + } + } + + uint64_t get_pos() const { + return pos; + } + + uint64_t get_length() const { + return length; + } + +private: + void write_buffer(uint8_t const *p_buffer, uint32_t p_length) { + write(p_length); + + DEV_ASSERT(pos + p_length <= length); + memcpy(data + pos, p_buffer, p_length); + pos += p_length; + } +}; + +class BufReader; + +template <typename T> +concept Deserializable = requires(T t, BufReader &p_reader) { + { + t.serialize_size() + } -> std::same_as<size_t>; + { + t.deserialize(p_reader) + } -> std::same_as<void>; +}; + +class BufReader { + uint8_t const *data = nullptr; + uint64_t length = 0; + uint64_t pos = 0; + + bool check_length(size_t p_size) { + if (status != Status::OK) + return false; + + if (pos + p_size > length) { + status = Status::SHORT_BUFFER; + return false; + } + return true; + } + +#define CHECK(p_size) \ + if (!check_length(p_size)) \ + return + +public: + enum class Status { + OK, + SHORT_BUFFER, + BAD_COMPRESSION, + }; + + Status status = Status::OK; + + BufReader(uint8_t const *p_data, uint64_t p_length) : + data(p_data), length(p_length) {} + + template <Deserializable T> + void read(T &p_value) { + p_value.deserialize(*this); + } + + _FORCE_INLINE_ void read(uint32_t &p_val) { + CHECK(sizeof(uint32_t)); + + p_val = decode_uint32(data + pos); + pos += sizeof(uint32_t); + } + + _FORCE_INLINE_ void read(RD::ShaderStage &p_val) { + uint32_t val; + read(val); + p_val = (RD::ShaderStage)val; + } + + _FORCE_INLINE_ void read(bool &p_val) { + CHECK(sizeof(uint8_t)); + + p_val = *(data + pos) > 0; + pos += 1; + } + + _FORCE_INLINE_ void read(uint64_t &p_val) { + CHECK(sizeof(uint64_t)); + + p_val = decode_uint64(data + pos); + pos += sizeof(uint64_t); + } + + _FORCE_INLINE_ void read(float &p_val) { + CHECK(sizeof(float)); + + p_val = decode_float(data + pos); + pos += sizeof(float); + } + + _FORCE_INLINE_ void read(double &p_val) { + CHECK(sizeof(double)); + + p_val = decode_double(data + pos); + pos += sizeof(double); + } + + void read(CharString &p_val) { + uint32_t len; + read(len); + CHECK(len); + p_val.resize(len + 1 /* NUL */); + memcpy(p_val.ptrw(), data + pos, len); + p_val.set(len, 0); + pos += len; + } + + void read_compressed(CharString &p_val) { + uint32_t len; + read(len); + uint32_t comp_size; + read(comp_size); + + CHECK(comp_size); + + p_val.resize(len + 1 /* NUL */); + uint32_t bytes = (uint32_t)Compression::decompress(reinterpret_cast<uint8_t *>(p_val.ptrw()), len, data + pos, comp_size, Compression::MODE_ZSTD); + if (bytes != len) { + status = Status::BAD_COMPRESSION; + return; + } + p_val.set(len, 0); + pos += comp_size; + } + + void read(LocalVector<uint8_t> &p_val) { + uint32_t len; + read(len); + CHECK(len); + p_val.resize(len); + memcpy(p_val.ptr(), data + pos, len); + pos += len; + } + + template <typename T> + void read(LocalVector<T> &p_val) { + uint32_t len; + read(len); + CHECK(len); + p_val.resize(len); + for (uint32_t i = 0; i < len; i++) { + read(p_val[i]); + } + } + + template <typename K, typename V> + void read(HashMap<K, V> &p_map) { + uint32_t len; + read(len); + CHECK(len); + p_map.reserve(len); + for (uint32_t i = 0; i < len; i++) { + K key; + read(key); + V value; + read(value); + p_map[key] = value; + } + } + +#undef CHECK +}; + +const uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535; + +struct ComputeSize { + uint32_t x = 0; + uint32_t y = 0; + uint32_t z = 0; + + size_t serialize_size() const { + return sizeof(uint32_t) * 3; + } + + void serialize(BufWriter &p_writer) const { + p_writer.write(x); + p_writer.write(y); + p_writer.write(z); + } + + void deserialize(BufReader &p_reader) { + p_reader.read(x); + p_reader.read(y); + p_reader.read(z); + } +}; + +struct ShaderStageData { + RD::ShaderStage stage = RD::ShaderStage::SHADER_STAGE_MAX; + CharString entry_point_name; + CharString source; + + size_t serialize_size() const { + int comp_size = Compression::get_max_compressed_buffer_size(source.length(), Compression::MODE_ZSTD); + return sizeof(uint32_t) // Stage. + + sizeof(uint32_t) /* entry_point_name.utf8().length */ + entry_point_name.length() + sizeof(uint32_t) /* uncompressed size */ + sizeof(uint32_t) /* compressed size */ + comp_size; + } + + void serialize(BufWriter &p_writer) const { + p_writer.write((uint32_t)stage); + p_writer.write(entry_point_name); + p_writer.write_compressed(source); + } + + void deserialize(BufReader &p_reader) { + p_reader.read((uint32_t &)stage); + p_reader.read(entry_point_name); + p_reader.read_compressed(source); + } +}; + +struct SpecializationConstantData { + uint32_t constant_id = UINT32_MAX; + RD::PipelineSpecializationConstantType type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; + ShaderStageUsage stages = ShaderStageUsage::None; + // Specifies the stages the constant is used by Metal. + ShaderStageUsage used_stages = ShaderStageUsage::None; + uint32_t int_value = UINT32_MAX; + + size_t serialize_size() const { + return sizeof(constant_id) + sizeof(uint32_t) // type + + sizeof(stages) + sizeof(used_stages) // used_stages + + sizeof(int_value); // int_value + } + + void serialize(BufWriter &p_writer) const { + p_writer.write(constant_id); + p_writer.write((uint32_t)type); + p_writer.write(stages); + p_writer.write(used_stages); + p_writer.write(int_value); + } + + void deserialize(BufReader &p_reader) { + p_reader.read(constant_id); + p_reader.read((uint32_t &)type); + p_reader.read((uint32_t &)stages); + p_reader.read((uint32_t &)used_stages); + p_reader.read(int_value); + } +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformData { + RD::UniformType type = RD::UniformType::UNIFORM_TYPE_MAX; + uint32_t binding = UINT32_MAX; + bool writable = false; + uint32_t length = UINT32_MAX; + ShaderStageUsage stages = ShaderStageUsage::None; + // Specifies the stages the uniform data is + // used by the Metal shader. + ShaderStageUsage active_stages = ShaderStageUsage::None; + BindingInfoMap bindings; + BindingInfoMap bindings_secondary; + + size_t serialize_size() const { + size_t size = 0; + size += sizeof(uint32_t); // type + size += sizeof(uint32_t); // binding + size += sizeof(uint32_t); // writable + size += sizeof(uint32_t); // length + size += sizeof(uint32_t); // stages + size += sizeof(uint32_t); // active_stages + size += sizeof(uint32_t); // bindings.size() + size += sizeof(uint32_t) * bindings.size(); // Total size of keys. + for (KeyValue<RD::ShaderStage, BindingInfo> const &e : bindings) { + size += e.value.serialize_size(); + } + size += sizeof(uint32_t); // bindings_secondary.size() + size += sizeof(uint32_t) * bindings_secondary.size(); // Total size of keys. + for (KeyValue<RD::ShaderStage, BindingInfo> const &e : bindings_secondary) { + size += e.value.serialize_size(); + } + return size; + } + + void serialize(BufWriter &p_writer) const { + p_writer.write((uint32_t)type); + p_writer.write(binding); + p_writer.write(writable); + p_writer.write(length); + p_writer.write(stages); + p_writer.write(active_stages); + p_writer.write(bindings); + p_writer.write(bindings_secondary); + } + + void deserialize(BufReader &p_reader) { + p_reader.read((uint32_t &)type); + p_reader.read(binding); + p_reader.read(writable); + p_reader.read(length); + p_reader.read((uint32_t &)stages); + p_reader.read((uint32_t &)active_stages); + p_reader.read(bindings); + p_reader.read(bindings_secondary); + } +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformSetData { + uint32_t index = UINT32_MAX; + LocalVector<UniformData> uniforms; + + size_t serialize_size() const { + size_t size = 0; + size += sizeof(uint32_t); // index + size += sizeof(uint32_t); // uniforms.size() + for (UniformData const &e : uniforms) { + size += e.serialize_size(); + } + return size; + } + + void serialize(BufWriter &p_writer) const { + p_writer.write(index); + p_writer.write(VectorView(uniforms)); + } + + void deserialize(BufReader &p_reader) { + p_reader.read(index); + p_reader.read(uniforms); + } +}; + +struct PushConstantData { + uint32_t size = UINT32_MAX; + ShaderStageUsage stages = ShaderStageUsage::None; + ShaderStageUsage used_stages = ShaderStageUsage::None; + HashMap<RD::ShaderStage, uint32_t> msl_binding; + + size_t serialize_size() const { + return sizeof(uint32_t) // size + + sizeof(uint32_t) // stages + + sizeof(uint32_t) // used_stages + + sizeof(uint32_t) // msl_binding.size() + + sizeof(uint32_t) * msl_binding.size() // keys + + sizeof(uint32_t) * msl_binding.size(); // values + } + + void serialize(BufWriter &p_writer) const { + p_writer.write(size); + p_writer.write((uint32_t)stages); + p_writer.write((uint32_t)used_stages); + p_writer.write(msl_binding); + } + + void deserialize(BufReader &p_reader) { + p_reader.read(size); + p_reader.read((uint32_t &)stages); + p_reader.read((uint32_t &)used_stages); + p_reader.read(msl_binding); + } +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { + CharString shader_name; + // The Metal language version specified when compiling SPIR-V to MSL. + // Format is major * 10000 + minor * 100 + patch. + uint32_t msl_version = UINT32_MAX; + uint32_t vertex_input_mask = UINT32_MAX; + uint32_t fragment_output_mask = UINT32_MAX; + uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX; + uint32_t is_compute = UINT32_MAX; + ComputeSize compute_local_size; + PushConstantData push_constant; + LocalVector<ShaderStageData> stages; + LocalVector<SpecializationConstantData> constants; + LocalVector<UniformSetData> uniforms; + + MTLLanguageVersion get_msl_version() const { + uint32_t major = msl_version / 10000; + uint32_t minor = (msl_version / 100) % 100; + return MTLLanguageVersion((major << 0x10) + minor); + } + + size_t serialize_size() const { + size_t size = 0; + size += sizeof(uint32_t) + shader_name.length(); // shader_name + size += sizeof(uint32_t); // msl_version + size += sizeof(uint32_t); // vertex_input_mask + size += sizeof(uint32_t); // fragment_output_mask + size += sizeof(uint32_t); // spirv_specialization_constants_ids_mask + size += sizeof(uint32_t); // is_compute + size += compute_local_size.serialize_size(); // compute_local_size + size += push_constant.serialize_size(); // push_constant + size += sizeof(uint32_t); // stages.size() + for (ShaderStageData const &e : stages) { + size += e.serialize_size(); + } + size += sizeof(uint32_t); // constants.size() + for (SpecializationConstantData const &e : constants) { + size += e.serialize_size(); + } + size += sizeof(uint32_t); // uniforms.size() + for (UniformSetData const &e : uniforms) { + size += e.serialize_size(); + } + return size; + } + + void serialize(BufWriter &p_writer) const { + p_writer.write(shader_name); + p_writer.write(msl_version); + p_writer.write(vertex_input_mask); + p_writer.write(fragment_output_mask); + p_writer.write(spirv_specialization_constants_ids_mask); + p_writer.write(is_compute); + p_writer.write(compute_local_size); + p_writer.write(push_constant); + p_writer.write(VectorView(stages)); + p_writer.write(VectorView(constants)); + p_writer.write(VectorView(uniforms)); + } + + void deserialize(BufReader &p_reader) { + p_reader.read(shader_name); + p_reader.read(msl_version); + p_reader.read(vertex_input_mask); + p_reader.read(fragment_output_mask); + p_reader.read(spirv_specialization_constants_ids_mask); + p_reader.read(is_compute); + p_reader.read(compute_local_size); + p_reader.read(push_constant); + p_reader.read(stages); + p_reader.read(constants); + p_reader.read(uniforms); + } +}; + +// endregion + +String RenderingDeviceDriverMetal::shader_get_binary_cache_key() { + return "Metal-SV" + uitos(SHADER_BINARY_VERSION); +} + +Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection) { + using namespace spirv_cross; + using spirv_cross::Resource; + + r_reflection = {}; + + for (uint32_t i = 0; i < p_spirv.size(); i++) { + ShaderStageSPIRVData const &v = p_spirv[i]; + ShaderStage stage = v.shader_stage; + uint32_t const *const ir = reinterpret_cast<uint32_t const *const>(v.spirv.ptr()); + size_t word_count = v.spirv.size() / sizeof(uint32_t); + Parser parser(ir, word_count); + try { + parser.parse(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); + } + + ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); + + if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { + r_reflection.is_compute = true; + ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED, + "Compute shaders can only receive one stage, dedicated to compute."); + } + ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED, + "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); + + ParsedIR &pir = parser.get_parsed_ir(); + using BT = SPIRType::BaseType; + + Compiler compiler(std::move(pir)); + + if (r_reflection.is_compute) { + r_reflection.compute_local_size[0] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 0); + r_reflection.compute_local_size[1] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 1); + r_reflection.compute_local_size[2] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 2); + } + + // Parse bindings. + + auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { + uint32_t res = -1; + if (compiler.has_decoration(id, decoration)) { + res = compiler.get_decoration(id, decoration); + } + return res; + }; + + // Always clearer than a boolean. + enum class Writable { + No, + Maybe, + }; + + // clang-format off + enum { + SPIRV_WORD_SIZE = sizeof(uint32_t), + SPIRV_DATA_ALIGNMENT = 4 * SPIRV_WORD_SIZE, + }; + // clang-format on + + auto process_uniforms = [&r_reflection, &compiler, &get_decoration, stage, stage_flag](SmallVector<Resource> &resources, Writable writable, std::function<RDD::UniformType(SPIRType const &)> uniform_type) { + for (Resource const &res : resources) { + ShaderUniform uniform; + + std::string const &name = compiler.get_name(res.id); + uint32_t set = get_decoration(res.id, spv::DecorationDescriptorSet); + ERR_FAIL_COND_V_MSG(set == (uint32_t)-1, FAILED, "No descriptor set found"); + ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, FAILED, "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."); + + uniform.binding = get_decoration(res.id, spv::DecorationBinding); + ERR_FAIL_COND_V_MSG(uniform.binding == (uint32_t)-1, FAILED, "No binding found"); + + SPIRType const &a_type = compiler.get_type(res.type_id); + uniform.type = uniform_type(a_type); + + // Update length. + switch (a_type.basetype) { + case BT::Struct: { + if (uniform.type == UNIFORM_TYPE_STORAGE_BUFFER) { + // Consistent with spirv_reflect. + uniform.length = 0; + } else { + uniform.length = round_up_to_alignment(compiler.get_declared_struct_size(a_type), SPIRV_DATA_ALIGNMENT); + } + } break; + case BT::Image: + case BT::Sampler: + case BT::SampledImage: { + uniform.length = 1; + for (uint32_t const &a : a_type.array) { + uniform.length *= a; + } + } break; + default: + break; + } + + // Update writable. + if (writable == Writable::Maybe) { + if (a_type.basetype == BT::Struct) { + Bitset flags = compiler.get_buffer_block_flags(res.id); + uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable) && !flags.get(spv::DecorationNonWritable); + } else if (a_type.basetype == BT::Image) { + if (a_type.image.access == spv::AccessQualifierMax) { + uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable); + } else { + uniform.writable = a_type.image.access != spv::AccessQualifierReadOnly; + } + } + } + + if (set < (uint32_t)r_reflection.uniform_sets.size()) { + // Check if this already exists. + bool exists = false; + for (uint32_t k = 0; k < r_reflection.uniform_sets[set].size(); k++) { + if (r_reflection.uniform_sets[set][k].binding == uniform.binding) { + // Already exists, verify that it's the same type. + ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].type != uniform.type, FAILED, + "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform type."); + + // Also, verify that it's the same size. + ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].length != uniform.length, FAILED, + "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform size."); + + // Also, verify that it has the same writability. + ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].writable != uniform.writable, FAILED, + "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability."); + + // Just append stage mask and continue. + r_reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag); + exists = true; + break; + } + } + + if (exists) { + continue; // Merged. + } + } + + uniform.stages.set_flag(stage_flag); + + if (set >= (uint32_t)r_reflection.uniform_sets.size()) { + r_reflection.uniform_sets.resize(set + 1); + } + + r_reflection.uniform_sets.write[set].push_back(uniform); + } + + return OK; + }; + + ShaderResources resources = compiler.get_shader_resources(); + + process_uniforms(resources.uniform_buffers, Writable::No, [](SPIRType const &a_type) { + DEV_ASSERT(a_type.basetype == BT::Struct); + return UNIFORM_TYPE_UNIFORM_BUFFER; + }); + + process_uniforms(resources.storage_buffers, Writable::Maybe, [](SPIRType const &a_type) { + DEV_ASSERT(a_type.basetype == BT::Struct); + return UNIFORM_TYPE_STORAGE_BUFFER; + }); + + process_uniforms(resources.storage_images, Writable::Maybe, [](SPIRType const &a_type) { + DEV_ASSERT(a_type.basetype == BT::Image); + if (a_type.image.dim == spv::DimBuffer) { + return UNIFORM_TYPE_IMAGE_BUFFER; + } else { + return UNIFORM_TYPE_IMAGE; + } + }); + + process_uniforms(resources.sampled_images, Writable::No, [](SPIRType const &a_type) { + DEV_ASSERT(a_type.basetype == BT::SampledImage); + return UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + }); + + process_uniforms(resources.separate_images, Writable::No, [](SPIRType const &a_type) { + DEV_ASSERT(a_type.basetype == BT::Image); + if (a_type.image.dim == spv::DimBuffer) { + return UNIFORM_TYPE_TEXTURE_BUFFER; + } else { + return UNIFORM_TYPE_TEXTURE; + } + }); + + process_uniforms(resources.separate_samplers, Writable::No, [](SPIRType const &a_type) { + DEV_ASSERT(a_type.basetype == BT::Sampler); + return UNIFORM_TYPE_SAMPLER; + }); + + process_uniforms(resources.subpass_inputs, Writable::No, [](SPIRType const &a_type) { + DEV_ASSERT(a_type.basetype == BT::Image && a_type.image.dim == spv::DimSubpassData); + return UNIFORM_TYPE_INPUT_ATTACHMENT; + }); + + if (!resources.push_constant_buffers.empty()) { + // There can be only one push constant block. + Resource const &res = resources.push_constant_buffers.front(); + + size_t push_constant_size = round_up_to_alignment(compiler.get_declared_struct_size(compiler.get_type(res.base_type_id)), SPIRV_DATA_ALIGNMENT); + ERR_FAIL_COND_V_MSG(r_reflection.push_constant_size && r_reflection.push_constant_size != push_constant_size, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages."); + + r_reflection.push_constant_size = push_constant_size; + r_reflection.push_constant_stages.set_flag(stage_flag); + } + + ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), FAILED, "Atomic counters not supported"); + ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), FAILED, "Acceleration structures not supported"); + ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), FAILED, "Shader record buffers not supported"); + + if (stage == SHADER_STAGE_VERTEX && !resources.stage_inputs.empty()) { + for (Resource const &res : resources.stage_inputs) { + SPIRType a_type = compiler.get_type(res.base_type_id); + uint32_t loc = get_decoration(res.id, spv::DecorationLocation); + if (loc != (uint32_t)-1) { + r_reflection.vertex_input_mask |= 1 << loc; + } + } + } + + if (stage == SHADER_STAGE_FRAGMENT && !resources.stage_outputs.empty()) { + for (Resource const &res : resources.stage_outputs) { + SPIRType a_type = compiler.get_type(res.base_type_id); + uint32_t loc = get_decoration(res.id, spv::DecorationLocation); + uint32_t built_in = spv::BuiltIn(get_decoration(res.id, spv::DecorationBuiltIn)); + if (loc != (uint32_t)-1 && built_in != spv::BuiltInFragDepth) { + r_reflection.fragment_output_mask |= 1 << loc; + } + } + } + + // Specialization constants. + for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { + int32_t existing = -1; + ShaderSpecializationConstant sconst; + SPIRConstant &spc = compiler.get_constant(constant.id); + SPIRType const &spct = compiler.get_type(spc.constant_type); + + sconst.constant_id = constant.constant_id; + sconst.int_value = 0; + + switch (spct.basetype) { + case BT::Boolean: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; + sconst.bool_value = spc.scalar() != 0; + } break; + case BT::Int: + case BT::UInt: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; + sconst.int_value = spc.scalar(); + } break; + case BT::Float: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; + sconst.float_value = spc.scalar_f32(); + } break; + default: + ERR_FAIL_V_MSG(FAILED, "Unsupported specialization constant type"); + } + sconst.stages.set_flag(stage_flag); + + for (uint32_t k = 0; k < r_reflection.specialization_constants.size(); k++) { + if (r_reflection.specialization_constants[k].constant_id == sconst.constant_id) { + ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ."); + ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ."); + existing = k; + break; + } + } + + if (existing > 0) { + r_reflection.specialization_constants.write[existing].stages.set_flag(stage_flag); + } else { + r_reflection.specialization_constants.push_back(sconst); + } + } + + r_reflection.stages.set_flag(stage_flag); + } + + // Sort all uniform_sets. + for (uint32_t i = 0; i < r_reflection.uniform_sets.size(); i++) { + r_reflection.uniform_sets.write[i].sort(); + } + + return OK; +} + +Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) { + using Result = ::Vector<uint8_t>; + using namespace spirv_cross; + using spirv_cross::CompilerMSL; + using spirv_cross::Resource; + + ShaderReflection spirv_data; + ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data), Result()); + + ShaderBinaryData bin_data{}; + if (!p_shader_name.is_empty()) { + bin_data.shader_name = p_shader_name.utf8(); + } else { + bin_data.shader_name = "unnamed"; + } + + bin_data.vertex_input_mask = spirv_data.vertex_input_mask; + bin_data.fragment_output_mask = spirv_data.fragment_output_mask; + bin_data.compute_local_size = ComputeSize{ + .x = spirv_data.compute_local_size[0], + .y = spirv_data.compute_local_size[1], + .z = spirv_data.compute_local_size[2], + }; + bin_data.is_compute = spirv_data.is_compute; + bin_data.push_constant.size = spirv_data.push_constant_size; + bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages; + + for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) { + const ::Vector<ShaderUniform> &spirv_set = spirv_data.uniform_sets[i]; + UniformSetData set{ .index = i }; + for (const ShaderUniform &spirv_uniform : spirv_set) { + UniformData binding{}; + binding.type = spirv_uniform.type; + binding.binding = spirv_uniform.binding; + binding.writable = spirv_uniform.writable; + binding.stages = (ShaderStageUsage)(uint8_t)spirv_uniform.stages; + binding.length = spirv_uniform.length; + set.uniforms.push_back(binding); + } + bin_data.uniforms.push_back(set); + } + + for (const ShaderSpecializationConstant &spirv_sc : spirv_data.specialization_constants) { + SpecializationConstantData spec_constant{}; + spec_constant.type = spirv_sc.type; + spec_constant.constant_id = spirv_sc.constant_id; + spec_constant.int_value = spirv_sc.int_value; + spec_constant.stages = (ShaderStageUsage)(uint8_t)spirv_sc.stages; + bin_data.constants.push_back(spec_constant); + bin_data.spirv_specialization_constants_ids_mask |= (1 << spirv_sc.constant_id); + } + + // Reflection using SPIRV-Cross: + // https://github.com/KhronosGroup/SPIRV-Cross/wiki/Reflection-API-user-guide + + CompilerMSL::Options msl_options{}; + msl_options.set_msl_version(version_major, version_minor); + if (version_major == 3 && version_minor >= 1) { + // TODO(sgc): Restrict to Metal 3.0 for now, until bugs in SPIRV-cross image atomics are resolved. + msl_options.set_msl_version(3, 0); + } + bin_data.msl_version = msl_options.msl_version; +#if TARGET_OS_OSX + msl_options.platform = CompilerMSL::Options::macOS; +#else + msl_options.platform = CompilerMSL::Options::iOS; +#endif + +#if TARGET_OS_IOS + msl_options.ios_use_simdgroup_functions = (*metal_device_properties).features.simdPermute; +#endif + + msl_options.argument_buffers = true; + msl_options.force_active_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers. + // msl_options.pad_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers. + msl_options.texture_buffer_native = true; // Enable texture buffer support. + msl_options.use_framebuffer_fetch_subpasses = false; + msl_options.pad_fragment_output_components = true; + msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID; + msl_options.agx_manual_cube_grad_fixup = true; + + CompilerGLSL::Options options{}; + options.vertex.flip_vert_y = true; +#if DEV_ENABLED + options.emit_line_directives = true; +#endif + + for (uint32_t i = 0; i < p_spirv.size(); i++) { + ShaderStageSPIRVData const &v = p_spirv[i]; + ShaderStage stage = v.shader_stage; + char const *stage_name = SHADER_STAGE_NAMES[stage]; + uint32_t const *const ir = reinterpret_cast<uint32_t const *const>(v.spirv.ptr()); + size_t word_count = v.spirv.size() / sizeof(uint32_t); + Parser parser(ir, word_count); + try { + parser.parse(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(Result(), "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); + } + + CompilerMSL compiler(std::move(parser.get_parsed_ir())); + compiler.set_msl_options(msl_options); + compiler.set_common_options(options); + + std::unordered_set<VariableID> active = compiler.get_active_interface_variables(); + ShaderResources resources = compiler.get_shader_resources(); + + std::string source = compiler.compile(); + + ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, Result(), "Expected a single entry point and stage."); + + EntryPoint &entry_point_stage = compiler.get_entry_points_and_stages().front(); + SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model); + + // Process specialization constants. + if (!compiler.get_specialization_constants().empty()) { + for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { + LocalVector<SpecializationConstantData>::Iterator res = bin_data.constants.begin(); + while (res != bin_data.constants.end()) { + if (res->constant_id == constant.constant_id) { + res->used_stages |= 1 << stage; + break; + } + ++res; + } + if (res == bin_data.constants.end()) { + WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id)); + } + } + } + + // Process bindings. + + LocalVector<UniformSetData> &uniform_sets = bin_data.uniforms; + using BT = SPIRType::BaseType; + + // Always clearer than a boolean. + enum class Writable { + No, + Maybe, + }; + + // Returns a std::optional containing the value of the + // decoration, if it exists. + auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { + uint32_t res = -1; + if (compiler.has_decoration(id, decoration)) { + res = compiler.get_decoration(id, decoration); + } + return res; + }; + + auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector<Resource> &resources, Writable writable) { + for (Resource const &res : resources) { + uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet); + uint32_t dbin = get_decoration(res.id, spv::DecorationBinding); + UniformData *found = nullptr; + if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets.size()) { + UniformSetData &set = uniform_sets[dset]; + LocalVector<UniformData>::Iterator pos = set.uniforms.begin(); + while (pos != set.uniforms.end()) { + if (dbin == pos->binding) { + found = &(*pos); + break; + } + ++pos; + } + } + + ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found"); + + bool is_active = active.find(res.id) != active.end(); + if (is_active) { + found->active_stages |= 1 << stage; + } + + BindingInfo primary{}; + + SPIRType const &a_type = compiler.get_type(res.type_id); + BT basetype = a_type.basetype; + + switch (basetype) { + case BT::Struct: { + primary.dataType = MTLDataTypePointer; + } break; + + case BT::Image: + case BT::SampledImage: { + primary.dataType = MTLDataTypeTexture; + } break; + + case BT::Sampler: { + primary.dataType = MTLDataTypeSampler; + } break; + + default: { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType"); + } break; + } + + // Find array length. + if (basetype == BT::Image || basetype == BT::SampledImage) { + primary.arrayLength = 1; + for (uint32_t const &a : a_type.array) { + primary.arrayLength *= a; + } + primary.isMultisampled = a_type.image.ms; + + SPIRType::ImageType const &image = a_type.image; + primary.imageFormat = image.format; + + switch (image.dim) { + case spv::Dim1D: { + if (image.arrayed) { + primary.textureType = MTLTextureType1DArray; + } else { + primary.textureType = MTLTextureType1D; + } + } break; + case spv::DimSubpassData: { + DISPATCH_FALLTHROUGH; + } + case spv::Dim2D: { + if (image.arrayed && image.ms) { + primary.textureType = MTLTextureType2DMultisampleArray; + } else if (image.arrayed) { + primary.textureType = MTLTextureType2DArray; + } else if (image.ms) { + primary.textureType = MTLTextureType2DMultisample; + } else { + primary.textureType = MTLTextureType2D; + } + } break; + case spv::Dim3D: { + primary.textureType = MTLTextureType3D; + } break; + case spv::DimCube: { + if (image.arrayed) { + primary.textureType = MTLTextureTypeCube; + } + } break; + case spv::DimRect: { + } break; + case spv::DimBuffer: { + // VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER + primary.textureType = MTLTextureTypeTextureBuffer; + } break; + case spv::DimMax: { + // Add all enumerations to silence the compiler warning + // and generate future warnings, should a new one be added. + } break; + } + } + + // Update writable. + if (writable == Writable::Maybe) { + if (basetype == BT::Struct) { + Bitset flags = compiler.get_buffer_block_flags(res.id); + if (!flags.get(spv::DecorationNonWritable)) { + if (flags.get(spv::DecorationNonReadable)) { + primary.access = MTLBindingAccessWriteOnly; + } else { + primary.access = MTLBindingAccessReadWrite; + } + } + } else if (basetype == BT::Image) { + switch (a_type.image.access) { + case spv::AccessQualifierWriteOnly: + primary.access = MTLBindingAccessWriteOnly; + break; + case spv::AccessQualifierReadWrite: + primary.access = MTLBindingAccessReadWrite; + break; + case spv::AccessQualifierReadOnly: + break; + case spv::AccessQualifierMax: + DISPATCH_FALLTHROUGH; + default: + if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) { + if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) { + primary.access = MTLBindingAccessWriteOnly; + } else { + primary.access = MTLBindingAccessReadWrite; + } + } + break; + } + } + } + + switch (primary.access) { + case MTLBindingAccessReadOnly: + primary.usage = MTLResourceUsageRead; + break; + case MTLBindingAccessWriteOnly: + primary.usage = MTLResourceUsageWrite; + break; + case MTLBindingAccessReadWrite: + primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite; + break; + } + + primary.index = compiler.get_automatic_msl_resource_binding(res.id); + + found->bindings[stage] = primary; + + // A sampled image contains two bindings, the primary + // is to the image, and the secondary is to the associated sampler. + if (basetype == BT::SampledImage) { + uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); + if (binding != (uint32_t)-1) { + found->bindings_secondary[stage] = BindingInfo{ + .dataType = MTLDataTypeSampler, + .index = binding, + .access = MTLBindingAccessReadOnly, + }; + } + } + + // An image may have a secondary binding if it is used + // for atomic operations. + if (basetype == BT::Image) { + uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); + if (binding != (uint32_t)-1) { + found->bindings_secondary[stage] = BindingInfo{ + .dataType = MTLDataTypePointer, + .index = binding, + .access = MTLBindingAccessReadWrite, + }; + } + } + } + return Error::OK; + }; + + if (!resources.uniform_buffers.empty()) { + Error err = descriptor_bindings(resources.uniform_buffers, Writable::No); + ERR_FAIL_COND_V(err != OK, Result()); + } + if (!resources.storage_buffers.empty()) { + Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe); + ERR_FAIL_COND_V(err != OK, Result()); + } + if (!resources.storage_images.empty()) { + Error err = descriptor_bindings(resources.storage_images, Writable::Maybe); + ERR_FAIL_COND_V(err != OK, Result()); + } + if (!resources.sampled_images.empty()) { + Error err = descriptor_bindings(resources.sampled_images, Writable::No); + ERR_FAIL_COND_V(err != OK, Result()); + } + if (!resources.separate_images.empty()) { + Error err = descriptor_bindings(resources.separate_images, Writable::No); + ERR_FAIL_COND_V(err != OK, Result()); + } + if (!resources.separate_samplers.empty()) { + Error err = descriptor_bindings(resources.separate_samplers, Writable::No); + ERR_FAIL_COND_V(err != OK, Result()); + } + if (!resources.subpass_inputs.empty()) { + Error err = descriptor_bindings(resources.subpass_inputs, Writable::No); + ERR_FAIL_COND_V(err != OK, Result()); + } + + if (!resources.push_constant_buffers.empty()) { + for (Resource const &res : resources.push_constant_buffers) { + uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); + if (binding != (uint32_t)-1) { + bin_data.push_constant.used_stages |= 1 << stage; + bin_data.push_constant.msl_binding[stage] = binding; + } + } + } + + ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), Result(), "Atomic counters not supported"); + ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), Result(), "Acceleration structures not supported"); + ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), Result(), "Shader record buffers not supported"); + + if (!resources.stage_inputs.empty()) { + for (Resource const &res : resources.stage_inputs) { + uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); + if (binding != (uint32_t)-1) { + bin_data.vertex_input_mask |= 1 << binding; + } + } + } + + ShaderStageData stage_data; + stage_data.stage = v.shader_stage; + stage_data.entry_point_name = entry_point.name.c_str(); + stage_data.source = source.c_str(); + bin_data.stages.push_back(stage_data); + } + + size_t vec_size = bin_data.serialize_size() + 8; + + ::Vector<uint8_t> ret; + ret.resize(vec_size); + BufWriter writer(ret.ptrw(), vec_size); + const uint8_t HEADER[4] = { 'G', 'M', 'S', 'L' }; + writer.write(*(uint32_t *)HEADER); + writer.write(SHADER_BINARY_VERSION); + bin_data.serialize(writer); + ret.resize(writer.get_pos()); + + return ret; +} + +void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key) { + if (ShaderCacheEntry **pentry = _shader_cache.getptr(key); pentry != nullptr) { + ShaderCacheEntry *entry = *pentry; + _shader_cache.erase(key); + entry->library = nil; + memdelete(entry); + } +} + +RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) { + r_shader_desc = {}; // Driver-agnostic. + + const uint8_t *binptr = p_shader_binary.ptr(); + uint32_t binsize = p_shader_binary.size(); + + BufReader reader(binptr, binsize); + uint8_t header[4]; + reader.read((uint32_t &)header); + ERR_FAIL_COND_V_MSG(memcmp(header, "GMSL", 4) != 0, ShaderID(), "Invalid header"); + uint32_t version = 0; + reader.read(version); + ERR_FAIL_COND_V_MSG(version != SHADER_BINARY_VERSION, ShaderID(), "Invalid shader binary version"); + + ShaderBinaryData binary_data; + binary_data.deserialize(reader); + switch (reader.status) { + case BufReader::Status::OK: + break; + case BufReader::Status::BAD_COMPRESSION: + ERR_FAIL_V_MSG(ShaderID(), "Invalid compressed data"); + case BufReader::Status::SHORT_BUFFER: + ERR_FAIL_V_MSG(ShaderID(), "Unexpected end of buffer"); + } + + MTLCompileOptions *options = [MTLCompileOptions new]; + options.languageVersion = binary_data.get_msl_version(); + HashMap<ShaderStage, MDLibrary *> libraries; + + for (ShaderStageData &shader_data : binary_data.stages) { + SHA256Digest key = SHA256Digest(shader_data.source.ptr(), shader_data.source.length()); + + if (ShaderCacheEntry **p = _shader_cache.getptr(key); p != nullptr) { + libraries[shader_data.stage] = (*p)->library; + continue; + } + + NSString *source = [[NSString alloc] initWithBytes:(void *)shader_data.source.ptr() + length:shader_data.source.length() + encoding:NSUTF8StringEncoding]; + + ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, key)); + cd->name = binary_data.shader_name; + cd->stage = shader_data.stage; + + MDLibrary *library = [MDLibrary newLibraryWithCacheEntry:cd + device:device + source:source + options:options + strategy:_shader_load_strategy]; + _shader_cache[key] = cd; + libraries[shader_data.stage] = library; + } + + Vector<UniformSet> uniform_sets; + uniform_sets.resize(binary_data.uniforms.size()); + + r_shader_desc.uniform_sets.resize(binary_data.uniforms.size()); + + // Create sets. + for (UniformSetData &uniform_set : binary_data.uniforms) { + UniformSet &set = uniform_sets.write[uniform_set.index]; + set.uniforms.resize(uniform_set.uniforms.size()); + + Vector<ShaderUniform> &uset = r_shader_desc.uniform_sets.write[uniform_set.index]; + uset.resize(uniform_set.uniforms.size()); + + for (uint32_t i = 0; i < uniform_set.uniforms.size(); i++) { + UniformData &uniform = uniform_set.uniforms[i]; + + ShaderUniform su; + su.type = uniform.type; + su.writable = uniform.writable; + su.length = uniform.length; + su.binding = uniform.binding; + su.stages = uniform.stages; + uset.write[i] = su; + + UniformInfo ui; + ui.binding = uniform.binding; + ui.active_stages = uniform.active_stages; + for (KeyValue<RDC::ShaderStage, BindingInfo> &kv : uniform.bindings) { + ui.bindings.insert(kv.key, kv.value); + } + for (KeyValue<RDC::ShaderStage, BindingInfo> &kv : uniform.bindings_secondary) { + ui.bindings_secondary.insert(kv.key, kv.value); + } + set.uniforms[i] = ui; + } + } + for (UniformSetData &uniform_set : binary_data.uniforms) { + UniformSet &set = uniform_sets.write[uniform_set.index]; + + // Make encoders. + for (ShaderStageData const &stage_data : binary_data.stages) { + ShaderStage stage = stage_data.stage; + NSMutableArray<MTLArgumentDescriptor *> *descriptors = [NSMutableArray new]; + + for (UniformInfo const &uniform : set.uniforms) { + BindingInfo const *binding_info = uniform.bindings.getptr(stage); + if (binding_info == nullptr) + continue; + + [descriptors addObject:binding_info->new_argument_descriptor()]; + BindingInfo const *secondary_binding_info = uniform.bindings_secondary.getptr(stage); + if (secondary_binding_info != nullptr) { + [descriptors addObject:secondary_binding_info->new_argument_descriptor()]; + } + } + + if (descriptors.count == 0) { + // No bindings. + continue; + } + // Sort by index. + [descriptors sortUsingComparator:^NSComparisonResult(MTLArgumentDescriptor *a, MTLArgumentDescriptor *b) { + if (a.index < b.index) { + return NSOrderedAscending; + } else if (a.index > b.index) { + return NSOrderedDescending; + } else { + return NSOrderedSame; + } + }]; + + id<MTLArgumentEncoder> enc = [device newArgumentEncoderWithArguments:descriptors]; + set.encoders[stage] = enc; + set.offsets[stage] = set.buffer_size; + set.buffer_size += enc.encodedLength; + } + } + + r_shader_desc.specialization_constants.resize(binary_data.constants.size()); + for (uint32_t i = 0; i < binary_data.constants.size(); i++) { + SpecializationConstantData &c = binary_data.constants[i]; + + ShaderSpecializationConstant sc; + sc.type = c.type; + sc.constant_id = c.constant_id; + sc.int_value = c.int_value; + sc.stages = c.stages; + r_shader_desc.specialization_constants.write[i] = sc; + } + + MDShader *shader = nullptr; + if (binary_data.is_compute) { + MDComputeShader *cs = new MDComputeShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_COMPUTE]); + + uint32_t *binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_COMPUTE); + if (binding) { + cs->push_constants.size = binary_data.push_constant.size; + cs->push_constants.binding = *binding; + } + + cs->local = MTLSizeMake(binary_data.compute_local_size.x, binary_data.compute_local_size.y, binary_data.compute_local_size.z); +#if DEV_ENABLED + cs->kernel_source = binary_data.stages[0].source; +#endif + shader = cs; + } else { + MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]); + + uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX); + if (vert_binding) { + rs->push_constants.vert.size = binary_data.push_constant.size; + rs->push_constants.vert.binding = *vert_binding; + } + uint32_t *frag_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_FRAGMENT); + if (frag_binding) { + rs->push_constants.frag.size = binary_data.push_constant.size; + rs->push_constants.frag.binding = *frag_binding; + } + +#if DEV_ENABLED + for (ShaderStageData &stage_data : binary_data.stages) { + if (stage_data.stage == ShaderStage::SHADER_STAGE_VERTEX) { + rs->vert_source = stage_data.source; + } else if (stage_data.stage == ShaderStage::SHADER_STAGE_FRAGMENT) { + rs->frag_source = stage_data.source; + } + } +#endif + shader = rs; + } + + r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; + r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; + r_shader_desc.is_compute = binary_data.is_compute; + r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x; + r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y; + r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z; + r_shader_desc.push_constant_size = binary_data.push_constant.size; + + return ShaderID(shader); +} + +void RenderingDeviceDriverMetal::shader_free(ShaderID p_shader) { + MDShader *obj = (MDShader *)p_shader.id; + delete obj; +} + +void RenderingDeviceDriverMetal::shader_destroy_modules(ShaderID p_shader) { + // TODO. +} + +/*********************/ +/**** UNIFORM SET ****/ +/*********************/ + +RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index) { + MDUniformSet *set = new MDUniformSet(); + Vector<BoundUniform> bound_uniforms; + bound_uniforms.resize(p_uniforms.size()); + for (uint32_t i = 0; i < p_uniforms.size(); i += 1) { + bound_uniforms.write[i] = p_uniforms[i]; + } + set->uniforms = bound_uniforms; + set->index = p_set_index; + + return UniformSetID(set); +} + +void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) { + MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id; + delete obj; +} + +void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { +} + +#pragma mark - Transfer + +void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { + MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + id<MTLBuffer> buffer = rid::get(p_buffer); + + id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder(); + [blit fillBuffer:buffer + range:NSMakeRange(p_offset, p_size) + value:0]; +} + +void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) { + MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + id<MTLBuffer> src = rid::get(p_src_buffer); + id<MTLBuffer> dst = rid::get(p_dst_buffer); + + id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder(); + + for (uint32_t i = 0; i < p_regions.size(); i++) { + BufferCopyRegion region = p_regions[i]; + [blit copyFromBuffer:src + sourceOffset:region.src_offset + toBuffer:dst + destinationOffset:region.dst_offset + size:region.size]; + } +} + +MTLSize MTLSizeFromVector3i(Vector3i p_size) { + return MTLSizeMake(p_size.x, p_size.y, p_size.z); +} + +MTLOrigin MTLOriginFromVector3i(Vector3i p_origin) { + return MTLOriginMake(p_origin.x, p_origin.y, p_origin.z); +} + +// Clamps the size so that the sum of the origin and size do not exceed the maximum size. +static inline MTLSize clampMTLSize(MTLSize p_size, MTLOrigin p_origin, MTLSize p_max_size) { + MTLSize clamped; + clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x); + clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y); + clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z); + return clamped; +} + +void RenderingDeviceDriverMetal::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) { + MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + id<MTLTexture> src = rid::get(p_src_texture); + id<MTLTexture> dst = rid::get(p_dst_texture); + + id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder(); + PixelFormats &pf = *pixel_formats; + + MTLPixelFormat src_fmt = src.pixelFormat; + bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed; + MTLPixelFormat dst_fmt = dst.pixelFormat; + bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed; + + // Validate copy. + if (src.sampleCount != dst.sampleCount || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) { + ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts."); + } + + // If source and destination have different formats and at least one is compressed, a temporary buffer is required. + bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed); + if (need_tmp_buffer) { + ERR_FAIL_MSG("not implemented: copy with intermediate buffer"); + } + + if (src_fmt != dst_fmt) { + // Map the source pixel format to the dst through a texture view on the source texture. + src = [src newTextureViewWithPixelFormat:dst_fmt]; + } + + for (uint32_t i = 0; i < p_regions.size(); i++) { + TextureCopyRegion region = p_regions[i]; + + MTLSize extent = MTLSizeFromVector3i(region.size); + + // If copies can be performed using direct texture-texture copying, do so. + uint32_t src_level = region.src_subresources.mipmap; + uint32_t src_base_layer = region.src_subresources.base_layer; + MTLSize src_extent = mipmapLevelSizeFromTexture(src, src_level); + uint32_t dst_level = region.dst_subresources.mipmap; + uint32_t dst_base_layer = region.dst_subresources.base_layer; + MTLSize dst_extent = mipmapLevelSizeFromTexture(dst, dst_level); + + // All layers may be copied at once, if the extent completely covers both images. + if (src_extent == extent && dst_extent == extent) { + [blit copyFromTexture:src + sourceSlice:src_base_layer + sourceLevel:src_level + toTexture:dst + destinationSlice:dst_base_layer + destinationLevel:dst_level + sliceCount:region.src_subresources.layer_count + levelCount:1]; + } else { + MTLOrigin src_origin = MTLOriginFromVector3i(region.src_offset); + MTLSize src_size = clampMTLSize(extent, src_origin, src_extent); + uint32_t layer_count = 0; + if ((src.textureType == MTLTextureType3D) != (dst.textureType == MTLTextureType3D)) { + // In the case, the number of layers to copy is in extent.depth. Use that value, + // then clamp the depth, so we don't try to copy more than Metal will allow. + layer_count = extent.depth; + src_size.depth = 1; + } else { + layer_count = region.src_subresources.layer_count; + } + MTLOrigin dst_origin = MTLOriginFromVector3i(region.dst_offset); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + // We can copy between a 3D and a 2D image easily. Just copy between + // one slice of the 2D image and one plane of the 3D image at a time. + if ((src.textureType == MTLTextureType3D) == (dst.textureType == MTLTextureType3D)) { + [blit copyFromTexture:src + sourceSlice:src_base_layer + layer + sourceLevel:src_level + sourceOrigin:src_origin + sourceSize:src_size + toTexture:dst + destinationSlice:dst_base_layer + layer + destinationLevel:dst_level + destinationOrigin:dst_origin]; + } else if (src.textureType == MTLTextureType3D) { + [blit copyFromTexture:src + sourceSlice:src_base_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_origin.x, src_origin.y, src_origin.z + layer) + sourceSize:src_size + toTexture:dst + destinationSlice:dst_base_layer + layer + destinationLevel:dst_level + destinationOrigin:dst_origin]; + } else { + DEV_ASSERT(dst.textureType == MTLTextureType3D); + [blit copyFromTexture:src + sourceSlice:src_base_layer + layer + sourceLevel:src_level + sourceOrigin:src_origin + sourceSize:src_size + toTexture:dst + destinationSlice:dst_base_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z + layer)]; + } + } + } + } +} + +void RenderingDeviceDriverMetal::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + id<MTLTexture> src_tex = rid::get(p_src_texture); + id<MTLTexture> dst_tex = rid::get(p_dst_texture); + + MTLRenderPassDescriptor *mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor]; + MTLRenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD.colorAttachments[0]; + mtlColorAttDesc.loadAction = MTLLoadActionLoad; + mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve; + + mtlColorAttDesc.texture = src_tex; + mtlColorAttDesc.resolveTexture = dst_tex; + mtlColorAttDesc.level = p_src_mipmap; + mtlColorAttDesc.slice = p_src_layer; + mtlColorAttDesc.resolveLevel = p_dst_mipmap; + mtlColorAttDesc.resolveSlice = p_dst_layer; + cb->encodeRenderCommandEncoderWithDescriptor(mtlRPD, @"Resolve Image"); +} + +void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + id<MTLTexture> src_tex = rid::get(p_texture); + + if (src_tex.parentTexture) { + // Clear via the parent texture rather than the view. + src_tex = src_tex.parentTexture; + } + + PixelFormats &pf = *pixel_formats; + + if (pf.isDepthFormat(src_tex.pixelFormat) || pf.isStencilFormat(src_tex.pixelFormat)) { + ERR_FAIL_MSG("invalid: depth or stencil texture format"); + } + + MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; + + if (p_subresources.aspect.has_flag(TEXTURE_ASPECT_COLOR_BIT)) { + MTLRenderPassColorAttachmentDescriptor *caDesc = desc.colorAttachments[0]; + caDesc.texture = src_tex; + caDesc.loadAction = MTLLoadActionClear; + caDesc.storeAction = MTLStoreActionStore; + caDesc.clearColor = MTLClearColorMake(p_color.r, p_color.g, p_color.b, p_color.a); + + // Extract the mipmap levels that are to be updated. + uint32_t mipLvlStart = p_subresources.base_mipmap; + uint32_t mipLvlCnt = p_subresources.mipmap_count; + uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt; + + uint32_t levelCount = src_tex.mipmapLevelCount; + + // Extract the cube or array layers (slices) that are to be updated. + bool is3D = src_tex.textureType == MTLTextureType3D; + uint32_t layerStart = is3D ? 0 : p_subresources.base_layer; + uint32_t layerCnt = p_subresources.layer_count; + uint32_t layerEnd = layerStart + layerCnt; + + MetalFeatures const &features = (*metal_device_properties).features; + + // Iterate across mipmap levels and layers, and perform and empty render to clear each. + for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { + ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range"); + + caDesc.level = mipLvl; + + // If a 3D image, we need to get the depth for each level. + if (is3D) { + layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth; + layerEnd = layerStart + layerCnt; + } + + if ((features.layeredRendering && src_tex.sampleCount == 1) || features.multisampleLayeredRendering) { + // We can clear all layers at once. + if (is3D) { + caDesc.depthPlane = layerStart; + } else { + caDesc.slice = layerStart; + } + desc.renderTargetArrayLength = layerCnt; + cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); + } else { + for (uint32_t layer = layerStart; layer < layerEnd; layer++) { + if (is3D) { + caDesc.depthPlane = layer; + } else { + caDesc.slice = layer; + } + cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); + } + } + } + } +} + +API_AVAILABLE(macos(11.0), ios(14.0)) +bool isArrayTexture(MTLTextureType p_type) { + return (p_type == MTLTextureType3D || + p_type == MTLTextureType2DArray || + p_type == MTLTextureType2DMultisampleArray || + p_type == MTLTextureType1DArray); +} + +void RenderingDeviceDriverMetal::_copy_texture_buffer(CommandBufferID p_cmd_buffer, + CopySource p_source, + TextureID p_texture, + BufferID p_buffer, + VectorView<BufferTextureCopyRegion> p_regions) { + MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + id<MTLBuffer> buffer = rid::get(p_buffer); + id<MTLTexture> texture = rid::get(p_texture); + + id<MTLBlitCommandEncoder> enc = cmd->blit_command_encoder(); + + PixelFormats &pf = *pixel_formats; + MTLPixelFormat mtlPixFmt = texture.pixelFormat; + + MTLBlitOption options = MTLBlitOptionNone; + if (pf.isPVRTCFormat(mtlPixFmt)) { + options |= MTLBlitOptionRowLinearPVRTC; + } + + for (uint32_t i = 0; i < p_regions.size(); i++) { + BufferTextureCopyRegion region = p_regions[i]; + + uint32_t mip_level = region.texture_subresources.mipmap; + MTLOrigin txt_origin = MTLOriginMake(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z); + MTLSize src_extent = mipmapLevelSizeFromTexture(texture, mip_level); + MTLSize txt_size = clampMTLSize(MTLSizeMake(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z), + txt_origin, + src_extent); + + uint32_t buffImgWd = region.texture_region_size.x; + uint32_t buffImgHt = region.texture_region_size.y; + + NSUInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd); + NSUInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt); + + MTLBlitOption blit_options = options; + + if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) { + bool want_depth = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_DEPTH_BIT); + bool want_stencil = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_STENCIL_BIT); + + // The stencil component is always 1 byte per pixel. + // Don't reduce depths of 32-bit depth/stencil formats. + if (want_depth && !want_stencil) { + if (pf.getBytesPerTexel(mtlPixFmt) != 4) { + bytesPerRow -= buffImgWd; + bytesPerImg -= buffImgWd * buffImgHt; + } + blit_options |= MTLBlitOptionDepthFromDepthStencil; + } else if (want_stencil && !want_depth) { + bytesPerRow = buffImgWd; + bytesPerImg = buffImgWd * buffImgHt; + blit_options |= MTLBlitOptionStencilFromDepthStencil; + } + } + + if (!isArrayTexture(texture.textureType)) { + bytesPerImg = 0; + } + + if (p_source == CopySource::Buffer) { + for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { + [enc copyFromBuffer:buffer + sourceOffset:region.buffer_offset + (bytesPerImg * lyrIdx) + sourceBytesPerRow:bytesPerRow + sourceBytesPerImage:bytesPerImg + sourceSize:txt_size + toTexture:texture + destinationSlice:region.texture_subresources.base_layer + lyrIdx + destinationLevel:mip_level + destinationOrigin:txt_origin + options:blit_options]; + } + } else { + for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { + [enc copyFromTexture:texture + sourceSlice:region.texture_subresources.base_layer + lyrIdx + sourceLevel:mip_level + sourceOrigin:txt_origin + sourceSize:txt_size + toBuffer:buffer + destinationOffset:region.buffer_offset + (bytesPerImg * lyrIdx) + destinationBytesPerRow:bytesPerRow + destinationBytesPerImage:bytesPerImg + options:blit_options]; + } + } + } +} + +void RenderingDeviceDriverMetal::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) { + _copy_texture_buffer(p_cmd_buffer, CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions); +} + +void RenderingDeviceDriverMetal::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) { + _copy_texture_buffer(p_cmd_buffer, CopySource::Texture, p_src_texture, p_dst_buffer, p_regions); +} + +#pragma mark - Pipeline + +void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) { + MDPipeline *obj = (MDPipeline *)(p_pipeline_id.id); + delete obj; +} + +// ----- BINDING ----- + +void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + MDShader *shader = (MDShader *)(p_shader.id); + shader->encode_push_constant_data(p_data, cb); +} + +// ----- CACHE ----- + +String RenderingDeviceDriverMetal::_pipeline_get_cache_path() const { + String path = OS::get_singleton()->get_user_data_dir() + "/metal/pipelines"; + path += "." + context_device.name.validate_filename().replace(" ", "_").to_lower(); + if (Engine::get_singleton()->is_editor_hint()) { + path += ".editor"; + } + path += ".cache"; + + return path; +} + +bool RenderingDeviceDriverMetal::pipeline_cache_create(const Vector<uint8_t> &p_data) { + return false; + CharString path = _pipeline_get_cache_path().utf8(); + NSString *nPath = [[NSString alloc] initWithBytesNoCopy:path.ptrw() + length:path.length() + encoding:NSUTF8StringEncoding + freeWhenDone:NO]; + MTLBinaryArchiveDescriptor *desc = [MTLBinaryArchiveDescriptor new]; + if ([[NSFileManager defaultManager] fileExistsAtPath:nPath]) { + desc.url = [NSURL fileURLWithPath:nPath]; + } + NSError *error = nil; + archive = [device newBinaryArchiveWithDescriptor:desc error:&error]; + return true; +} + +void RenderingDeviceDriverMetal::pipeline_cache_free() { + archive = nil; +} + +size_t RenderingDeviceDriverMetal::pipeline_cache_query_size() { + return archive_count * 1024; +} + +Vector<uint8_t> RenderingDeviceDriverMetal::pipeline_cache_serialize() { + if (!archive) { + return Vector<uint8_t>(); + } + + CharString path = _pipeline_get_cache_path().utf8(); + + NSString *nPath = [[NSString alloc] initWithBytesNoCopy:path.ptrw() + length:path.length() + encoding:NSUTF8StringEncoding + freeWhenDone:NO]; + NSURL *target = [NSURL fileURLWithPath:nPath]; + NSError *error = nil; + if ([archive serializeToURL:target error:&error]) { + return Vector<uint8_t>(); + } else { + print_line(error.localizedDescription.UTF8String); + return Vector<uint8_t>(); + } +} + +#pragma mark - Rendering + +// ----- SUBPASS ----- + +RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count) { + PixelFormats &pf = *pixel_formats; + + size_t subpass_count = p_subpasses.size(); + + Vector<MDSubpass> subpasses; + subpasses.resize(subpass_count); + for (uint32_t i = 0; i < subpass_count; i++) { + MDSubpass &subpass = subpasses.write[i]; + subpass.subpass_index = i; + subpass.input_references = p_subpasses[i].input_references; + subpass.color_references = p_subpasses[i].color_references; + subpass.depth_stencil_reference = p_subpasses[i].depth_stencil_reference; + subpass.resolve_references = p_subpasses[i].resolve_references; + } + + static const MTLLoadAction LOAD_ACTIONS[] = { + [ATTACHMENT_LOAD_OP_LOAD] = MTLLoadActionLoad, + [ATTACHMENT_LOAD_OP_CLEAR] = MTLLoadActionClear, + [ATTACHMENT_LOAD_OP_DONT_CARE] = MTLLoadActionDontCare, + }; + + static const MTLStoreAction STORE_ACTIONS[] = { + [ATTACHMENT_STORE_OP_STORE] = MTLStoreActionStore, + [ATTACHMENT_STORE_OP_DONT_CARE] = MTLStoreActionDontCare, + }; + + Vector<MDAttachment> attachments; + attachments.resize(p_attachments.size()); + + for (uint32_t i = 0; i < p_attachments.size(); i++) { + Attachment const &a = p_attachments[i]; + MDAttachment &mda = attachments.write[i]; + MTLPixelFormat format = pf.getMTLPixelFormat(a.format); + mda.format = format; + if (a.samples > TEXTURE_SAMPLES_1) { + mda.samples = (*metal_device_properties).find_nearest_supported_sample_count(a.samples); + } + mda.loadAction = LOAD_ACTIONS[a.load_op]; + mda.storeAction = STORE_ACTIONS[a.store_op]; + bool is_depth = pf.isDepthFormat(format); + if (is_depth) { + mda.type |= MDAttachmentType::Depth; + } + bool is_stencil = pf.isStencilFormat(format); + if (is_stencil) { + mda.type |= MDAttachmentType::Stencil; + mda.stencilLoadAction = LOAD_ACTIONS[a.stencil_load_op]; + mda.stencilStoreAction = STORE_ACTIONS[a.stencil_store_op]; + } + if (!is_depth && !is_stencil) { + mda.type |= MDAttachmentType::Color; + } + } + MDRenderPass *obj = new MDRenderPass(attachments, subpasses); + return RenderPassID(obj); +} + +void RenderingDeviceDriverMetal::render_pass_free(RenderPassID p_render_pass) { + MDRenderPass *obj = (MDRenderPass *)(p_render_pass.id); + delete obj; +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverMetal::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_begin_pass(p_render_pass, p_framebuffer, p_cmd_buffer_type, p_rect, p_clear_values); +} + +void RenderingDeviceDriverMetal::command_end_render_pass(CommandBufferID p_cmd_buffer) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_end_pass(); +} + +void RenderingDeviceDriverMetal::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_next_subpass(); +} + +void RenderingDeviceDriverMetal::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_set_viewport(p_viewports); +} + +void RenderingDeviceDriverMetal::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_set_scissor(p_scissors); +} + +void RenderingDeviceDriverMetal::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_clear_attachments(p_attachment_clears, p_rects); +} + +void RenderingDeviceDriverMetal::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->bind_pipeline(p_pipeline); +} + +void RenderingDeviceDriverMetal::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_bind_uniform_set(p_uniform_set, p_shader, p_set_index); +} + +void RenderingDeviceDriverMetal::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_draw(p_vertex_count, p_instance_count, p_base_vertex, p_first_instance); +} + +void RenderingDeviceDriverMetal::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_draw_indexed(p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance); +} + +void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_draw_indexed_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride); +} + +void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_draw_indexed_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); +} + +void RenderingDeviceDriverMetal::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_draw_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride); +} + +void RenderingDeviceDriverMetal::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_draw_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); +} + +void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets); +} + +void RenderingDeviceDriverMetal::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_bind_index_buffer(p_buffer, p_format, p_offset); +} + +void RenderingDeviceDriverMetal::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->render_set_blend_constants(p_constants); +} + +void RenderingDeviceDriverMetal::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) { + if (!Math::is_equal_approx(p_width, 1.0f)) { + ERR_FAIL_MSG("Setting line widths other than 1.0 is not supported by the Metal rendering driver."); + } +} + +// ----- PIPELINE ----- + +RenderingDeviceDriverMetal::Result<id<MTLFunction>> RenderingDeviceDriverMetal::_create_function(MDLibrary *p_library, NSString *p_name, VectorView<PipelineSpecializationConstant> &p_specialization_constants) { + id<MTLLibrary> library = p_library.library; + if (!library) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to compile Metal library"); + } + + id<MTLFunction> function = [library newFunctionWithName:p_name]; + ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, "No function named main0"); + + if (function.functionConstantsDictionary.count == 0) { + return function; + } + + NSArray<MTLFunctionConstant *> *constants = function.functionConstantsDictionary.allValues; + bool is_sorted = true; + for (uint32_t i = 1; i < constants.count; i++) { + if (constants[i - 1].index > constants[i].index) { + is_sorted = false; + break; + } + } + + if (!is_sorted) { + constants = [constants sortedArrayUsingComparator:^NSComparisonResult(MTLFunctionConstant *a, MTLFunctionConstant *b) { + if (a.index < b.index) { + return NSOrderedAscending; + } else if (a.index > b.index) { + return NSOrderedDescending; + } else { + return NSOrderedSame; + } + }]; + } + + // Initialize an array of integers representing the indexes of p_specialization_constants + uint32_t *indexes = (uint32_t *)alloca(p_specialization_constants.size() * sizeof(uint32_t)); + for (uint32_t i = 0; i < p_specialization_constants.size(); i++) { + indexes[i] = i; + } + // Sort the array of integers based on the values in p_specialization_constants + std::sort(indexes, &indexes[p_specialization_constants.size()], [&](int a, int b) { + return p_specialization_constants[a].constant_id < p_specialization_constants[b].constant_id; + }); + + MTLFunctionConstantValues *constantValues = [MTLFunctionConstantValues new]; + uint32_t i = 0; + uint32_t j = 0; + while (i < constants.count && j < p_specialization_constants.size()) { + MTLFunctionConstant *curr = constants[i]; + PipelineSpecializationConstant const &sc = p_specialization_constants[indexes[j]]; + if (curr.index == sc.constant_id) { + switch (curr.type) { + case MTLDataTypeBool: + case MTLDataTypeFloat: + case MTLDataTypeInt: + case MTLDataTypeUInt: { + [constantValues setConstantValue:&sc.int_value + type:curr.type + atIndex:sc.constant_id]; + } break; + default: + ERR_FAIL_V_MSG(function, "Invalid specialization constant type"); + } + i++; + j++; + } else if (curr.index < sc.constant_id) { + i++; + } else { + j++; + } + } + + if (i != constants.count) { + MTLFunctionConstant *curr = constants[i]; + if (curr.index == R32UI_ALIGNMENT_CONSTANT_ID) { + uint32_t alignment = 16; // TODO(sgc): is this always correct? + [constantValues setConstantValue:&alignment + type:curr.type + atIndex:curr.index]; + i++; + } + } + + NSError *err = nil; + function = [library newFunctionWithName:@"main0" + constantValues:constantValues + error:&err]; + ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, String("specialized function failed: ") + err.localizedDescription.UTF8String); + + return function; +} + +// RDD::PolygonCullMode == MTLCullMode. +static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_DISABLED, MTLCullModeNone)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_FRONT, MTLCullModeFront)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_BACK, MTLCullModeBack)); + +// RDD::StencilOperation == MTLStencilOperation. +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_KEEP, MTLStencilOperationKeep)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_ZERO, MTLStencilOperationZero)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_REPLACE, MTLStencilOperationReplace)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_CLAMP, MTLStencilOperationIncrementClamp)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_CLAMP, MTLStencilOperationDecrementClamp)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INVERT, MTLStencilOperationInvert)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_WRAP, MTLStencilOperationIncrementWrap)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_WRAP, MTLStencilOperationDecrementWrap)); + +// RDD::BlendOperation == MTLBlendOperation. +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_ADD, MTLBlendOperationAdd)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_SUBTRACT, MTLBlendOperationSubtract)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_REVERSE_SUBTRACT, MTLBlendOperationReverseSubtract)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MINIMUM, MTLBlendOperationMin)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MAXIMUM, MTLBlendOperationMax)); + +RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( + ShaderID p_shader, + VertexFormatID p_vertex_format, + RenderPrimitive p_render_primitive, + PipelineRasterizationState p_rasterization_state, + PipelineMultisampleState p_multisample_state, + PipelineDepthStencilState p_depth_stencil_state, + PipelineColorBlendState p_blend_state, + VectorView<int32_t> p_color_attachments, + BitField<PipelineDynamicStateFlags> p_dynamic_state, + RenderPassID p_render_pass, + uint32_t p_render_subpass, + VectorView<PipelineSpecializationConstant> p_specialization_constants) { + MDRenderShader *shader = (MDRenderShader *)(p_shader.id); + MTLVertexDescriptor *vert_desc = rid::get(p_vertex_format); + MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); + + os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader); + os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "render_pipeline_create", "shader_name=%{public}s", shader->name.get_data()); + DEFER([=]() { + os_signpost_interval_end(LOG_INTERVALS, reflect_id, "render_pipeline_create"); + }); + + os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline"); + + MTLRenderPipelineDescriptor *desc = [MTLRenderPipelineDescriptor new]; + + { + MDSubpass const &subpass = pass->subpasses[p_render_subpass]; + for (uint32_t i = 0; i < subpass.color_references.size(); i++) { + uint32_t attachment = subpass.color_references[i].attachment; + if (attachment != AttachmentReference::UNUSED) { + MDAttachment const &a = pass->attachments[attachment]; + desc.colorAttachments[i].pixelFormat = a.format; + } + } + + if (subpass.depth_stencil_reference.attachment != AttachmentReference::UNUSED) { + uint32_t attachment = subpass.depth_stencil_reference.attachment; + MDAttachment const &a = pass->attachments[attachment]; + + if (a.type & MDAttachmentType::Depth) { + desc.depthAttachmentPixelFormat = a.format; + } + + if (a.type & MDAttachmentType::Stencil) { + desc.stencilAttachmentPixelFormat = a.format; + } + } + } + + desc.vertexDescriptor = vert_desc; + desc.label = [NSString stringWithUTF8String:shader->name.get_data()]; + + // Input assembly & tessellation. + + MDRenderPipeline *pipeline = new MDRenderPipeline(); + + switch (p_render_primitive) { + case RENDER_PRIMITIVE_POINTS: + desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassPoint; + break; + case RENDER_PRIMITIVE_LINES: + case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY: + case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY: + case RENDER_PRIMITIVE_LINESTRIPS: + desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassLine; + break; + case RENDER_PRIMITIVE_TRIANGLES: + case RENDER_PRIMITIVE_TRIANGLE_STRIPS: + case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY: + case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY: + case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX: + desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle; + break; + case RENDER_PRIMITIVE_TESSELATION_PATCH: + desc.maxTessellationFactor = p_rasterization_state.patch_control_points; + desc.tessellationPartitionMode = MTLTessellationPartitionModeInteger; + ERR_FAIL_V_MSG(PipelineID(), "tessellation not implemented"); + break; + case RENDER_PRIMITIVE_MAX: + default: + desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified; + break; + } + + switch (p_render_primitive) { + case RENDER_PRIMITIVE_POINTS: + pipeline->raster_state.render_primitive = MTLPrimitiveTypePoint; + break; + case RENDER_PRIMITIVE_LINES: + case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY: + pipeline->raster_state.render_primitive = MTLPrimitiveTypeLine; + break; + case RENDER_PRIMITIVE_LINESTRIPS: + case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY: + pipeline->raster_state.render_primitive = MTLPrimitiveTypeLineStrip; + break; + case RENDER_PRIMITIVE_TRIANGLES: + case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY: + pipeline->raster_state.render_primitive = MTLPrimitiveTypeTriangle; + break; + case RENDER_PRIMITIVE_TRIANGLE_STRIPS: + case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY: + case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX: + pipeline->raster_state.render_primitive = MTLPrimitiveTypeTriangleStrip; + break; + default: + break; + } + + // Rasterization. + desc.rasterizationEnabled = !p_rasterization_state.discard_primitives; + pipeline->raster_state.clip_mode = p_rasterization_state.enable_depth_clamp ? MTLDepthClipModeClamp : MTLDepthClipModeClip; + pipeline->raster_state.fill_mode = p_rasterization_state.wireframe ? MTLTriangleFillModeLines : MTLTriangleFillModeFill; + + static const MTLCullMode CULL_MODE[3] = { + MTLCullModeNone, + MTLCullModeFront, + MTLCullModeBack, + }; + pipeline->raster_state.cull_mode = CULL_MODE[p_rasterization_state.cull_mode]; + pipeline->raster_state.winding = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE) ? MTLWindingClockwise : MTLWindingCounterClockwise; + pipeline->raster_state.depth_bias.enabled = p_rasterization_state.depth_bias_enabled; + pipeline->raster_state.depth_bias.depth_bias = p_rasterization_state.depth_bias_constant_factor; + pipeline->raster_state.depth_bias.slope_scale = p_rasterization_state.depth_bias_slope_factor; + pipeline->raster_state.depth_bias.clamp = p_rasterization_state.depth_bias_clamp; + // In Metal there is no line width. + if (!Math::is_equal_approx(p_rasterization_state.line_width, 1.0f)) { + WARN_PRINT("unsupported: line width"); + } + + // Multisample. + if (p_multisample_state.enable_sample_shading) { + WARN_PRINT("unsupported: multi-sample shading"); + } + + if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) { + pipeline->sample_count = (*metal_device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count); + } + desc.rasterSampleCount = static_cast<NSUInteger>(pipeline->sample_count); + desc.alphaToCoverageEnabled = p_multisample_state.enable_alpha_to_coverage; + desc.alphaToOneEnabled = p_multisample_state.enable_alpha_to_one; + + // Depth stencil. + if (p_depth_stencil_state.enable_depth_test && desc.depthAttachmentPixelFormat != MTLPixelFormatInvalid) { + pipeline->raster_state.depth_test.enabled = true; + MTLDepthStencilDescriptor *ds_desc = [MTLDepthStencilDescriptor new]; + ds_desc.depthWriteEnabled = p_depth_stencil_state.enable_depth_write; + ds_desc.depthCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.depth_compare_operator]; + if (p_depth_stencil_state.enable_depth_range) { + WARN_PRINT("unsupported: depth range"); + } + + if (p_depth_stencil_state.enable_stencil) { + pipeline->raster_state.stencil.front_reference = p_depth_stencil_state.front_op.reference; + pipeline->raster_state.stencil.back_reference = p_depth_stencil_state.back_op.reference; + + { + // Front. + MTLStencilDescriptor *sd = [MTLStencilDescriptor new]; + sd.stencilFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.fail]; + sd.depthStencilPassOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.pass]; + sd.depthFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.depth_fail]; + sd.stencilCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.front_op.compare]; + sd.readMask = p_depth_stencil_state.front_op.compare_mask; + sd.writeMask = p_depth_stencil_state.front_op.write_mask; + ds_desc.frontFaceStencil = sd; + } + { + // Back. + MTLStencilDescriptor *sd = [MTLStencilDescriptor new]; + sd.stencilFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.fail]; + sd.depthStencilPassOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.pass]; + sd.depthFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.depth_fail]; + sd.stencilCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.back_op.compare]; + sd.readMask = p_depth_stencil_state.back_op.compare_mask; + sd.writeMask = p_depth_stencil_state.back_op.write_mask; + ds_desc.backFaceStencil = sd; + } + } + + pipeline->depth_stencil = [device newDepthStencilStateWithDescriptor:ds_desc]; + ERR_FAIL_NULL_V_MSG(pipeline->depth_stencil, PipelineID(), "Failed to create depth stencil state"); + } else { + // TODO(sgc): FB13671991 raised as Apple docs state calling setDepthStencilState:nil is valid, but currently generates an exception + pipeline->depth_stencil = get_resource_cache().get_depth_stencil_state(false, false); + } + + // Blend state. + { + for (uint32_t i = 0; i < p_color_attachments.size(); i++) { + if (p_color_attachments[i] == ATTACHMENT_UNUSED) { + continue; + } + + const PipelineColorBlendState::Attachment &bs = p_blend_state.attachments[i]; + + MTLRenderPipelineColorAttachmentDescriptor *ca_desc = desc.colorAttachments[p_color_attachments[i]]; + ca_desc.blendingEnabled = bs.enable_blend; + + ca_desc.sourceRGBBlendFactor = BLEND_FACTORS[bs.src_color_blend_factor]; + ca_desc.destinationRGBBlendFactor = BLEND_FACTORS[bs.dst_color_blend_factor]; + ca_desc.rgbBlendOperation = BLEND_OPERATIONS[bs.color_blend_op]; + + ca_desc.sourceAlphaBlendFactor = BLEND_FACTORS[bs.src_alpha_blend_factor]; + ca_desc.destinationAlphaBlendFactor = BLEND_FACTORS[bs.dst_alpha_blend_factor]; + ca_desc.alphaBlendOperation = BLEND_OPERATIONS[bs.alpha_blend_op]; + + ca_desc.writeMask = MTLColorWriteMaskNone; + if (bs.write_r) { + ca_desc.writeMask |= MTLColorWriteMaskRed; + } + if (bs.write_g) { + ca_desc.writeMask |= MTLColorWriteMaskGreen; + } + if (bs.write_b) { + ca_desc.writeMask |= MTLColorWriteMaskBlue; + } + if (bs.write_a) { + ca_desc.writeMask |= MTLColorWriteMaskAlpha; + } + } + + pipeline->raster_state.blend.r = p_blend_state.blend_constant.r; + pipeline->raster_state.blend.g = p_blend_state.blend_constant.g; + pipeline->raster_state.blend.b = p_blend_state.blend_constant.b; + pipeline->raster_state.blend.a = p_blend_state.blend_constant.a; + } + + // Dynamic state. + + if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BIAS)) { + pipeline->raster_state.depth_bias.enabled = true; + } + + if (p_dynamic_state.has_flag(DYNAMIC_STATE_BLEND_CONSTANTS)) { + pipeline->raster_state.blend.enabled = true; + } + + if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BOUNDS)) { + // TODO(sgc): ?? + } + + if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + // TODO(sgc): ?? + } + + if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + // TODO(sgc): ?? + } + + if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_REFERENCE)) { + pipeline->raster_state.stencil.enabled = true; + } + + if (shader->vert != nil) { + Result<id<MTLFunction>> function_or_err = _create_function(shader->vert, @"main0", p_specialization_constants); + ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID()); + desc.vertexFunction = std::get<id<MTLFunction>>(function_or_err); + } + + if (shader->frag != nil) { + Result<id<MTLFunction>> function_or_err = _create_function(shader->frag, @"main0", p_specialization_constants); + ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID()); + desc.fragmentFunction = std::get<id<MTLFunction>>(function_or_err); + } + + if (archive) { + desc.binaryArchives = @[ archive ]; + } + + NSError *error = nil; + pipeline->state = [device newRenderPipelineStateWithDescriptor:desc + error:&error]; + pipeline->shader = shader; + + ERR_FAIL_COND_V_MSG(error != nil, PipelineID(), ([NSString stringWithFormat:@"error creating pipeline: %@", error.localizedDescription].UTF8String)); + + if (archive) { + if ([archive addRenderPipelineFunctionsWithDescriptor:desc error:&error]) { + archive_count += 1; + } else { + print_error(error.localizedDescription.UTF8String); + } + } + + return PipelineID(pipeline); +} + +#pragma mark - Compute + +// ----- COMMANDS ----- + +void RenderingDeviceDriverMetal::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->bind_pipeline(p_pipeline); +} + +void RenderingDeviceDriverMetal::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->compute_bind_uniform_set(p_uniform_set, p_shader, p_set_index); +} + +void RenderingDeviceDriverMetal::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->compute_dispatch(p_x_groups, p_y_groups, p_z_groups); +} + +void RenderingDeviceDriverMetal::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + cb->compute_dispatch_indirect(p_indirect_buffer, p_offset); +} + +// ----- PIPELINE ----- + +RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) { + MDComputeShader *shader = (MDComputeShader *)(p_shader.id); + + os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader); + os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "compute_pipeline_create", "shader_name=%{public}s", shader->name.get_data()); + DEFER([=]() { + os_signpost_interval_end(LOG_INTERVALS, reflect_id, "compute_pipeline_create"); + }); + + os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline"); + + Result<id<MTLFunction>> function_or_err = _create_function(shader->kernel, @"main0", p_specialization_constants); + ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID()); + id<MTLFunction> function = std::get<id<MTLFunction>>(function_or_err); + + MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new]; + desc.computeFunction = function; + if (archive) { + desc.binaryArchives = @[ archive ]; + } + + NSError *error; + id<MTLComputePipelineState> state = [device newComputePipelineStateWithDescriptor:desc + options:MTLPipelineOptionNone + reflection:nil + error:&error]; + ERR_FAIL_COND_V_MSG(error != nil, PipelineID(), ([NSString stringWithFormat:@"error creating pipeline: %@", error.localizedDescription].UTF8String)); + + MDComputePipeline *pipeline = new MDComputePipeline(state); + pipeline->compute_state.local = shader->local; + pipeline->shader = shader; + + if (archive) { + if ([archive addComputePipelineFunctionsWithDescriptor:desc error:&error]) { + archive_count += 1; + } else { + print_error(error.localizedDescription.UTF8String); + } + } + + return PipelineID(pipeline); +} + +#pragma mark - Queries + +// ----- TIMESTAMP ----- + +RDD::QueryPoolID RenderingDeviceDriverMetal::timestamp_query_pool_create(uint32_t p_query_count) { + return QueryPoolID(1); +} + +void RenderingDeviceDriverMetal::timestamp_query_pool_free(QueryPoolID p_pool_id) { +} + +void RenderingDeviceDriverMetal::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) { + // Metal doesn't support timestamp queries, so we just clear the buffer. + bzero(r_results, p_query_count * sizeof(uint64_t)); +} + +uint64_t RenderingDeviceDriverMetal::timestamp_query_result_to_time(uint64_t p_result) { + return p_result; +} + +void RenderingDeviceDriverMetal::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) { +} + +void RenderingDeviceDriverMetal::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) { +} + +#pragma mark - Labels + +void RenderingDeviceDriverMetal::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)p_label_name length:strlen(p_label_name) encoding:NSUTF8StringEncoding freeWhenDone:NO]; + [cb->get_command_buffer() pushDebugGroup:s]; +} + +void RenderingDeviceDriverMetal::command_end_label(CommandBufferID p_cmd_buffer) { + MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); + [cb->get_command_buffer() popDebugGroup]; +} + +#pragma mark - Debug + +void RenderingDeviceDriverMetal::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) { + // TODO: Implement. +} + +#pragma mark - Submission + +void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) { +} + +void RenderingDeviceDriverMetal::end_segment() { +} + +#pragma mark - Misc + +void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) { + switch (p_type) { + case OBJECT_TYPE_TEXTURE: { + id<MTLTexture> tex = rid::get(p_driver_id); + tex.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; + } break; + case OBJECT_TYPE_SAMPLER: { + // Can't set label after creation. + } break; + case OBJECT_TYPE_BUFFER: { + id<MTLBuffer> buffer = rid::get(p_driver_id); + buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; + } break; + case OBJECT_TYPE_SHADER: { + NSString *label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; + MDShader *shader = (MDShader *)(p_driver_id.id); + if (MDRenderShader *rs = dynamic_cast<MDRenderShader *>(shader); rs != nullptr) { + [rs->vert setLabel:label]; + [rs->frag setLabel:label]; + } else if (MDComputeShader *cs = dynamic_cast<MDComputeShader *>(shader); cs != nullptr) { + [cs->kernel setLabel:label]; + } else { + DEV_ASSERT(false); + } + } break; + case OBJECT_TYPE_UNIFORM_SET: { + MDUniformSet *set = (MDUniformSet *)(p_driver_id.id); + for (KeyValue<MDShader *, BoundUniformSet> &keyval : set->bound_uniforms) { + keyval.value.buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; + } + } break; + case OBJECT_TYPE_PIPELINE: { + // Can't set label after creation. + } break; + default: { + DEV_ASSERT(false); + } + } +} + +uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p_type, ID p_driver_id) { + switch (p_type) { + case DRIVER_RESOURCE_LOGICAL_DEVICE: { + return 0; + } + case DRIVER_RESOURCE_PHYSICAL_DEVICE: { + return 0; + } + case DRIVER_RESOURCE_TOPMOST_OBJECT: { + return 0; + } + case DRIVER_RESOURCE_COMMAND_QUEUE: { + return 0; + } + case DRIVER_RESOURCE_QUEUE_FAMILY: { + return 0; + } + case DRIVER_RESOURCE_TEXTURE: { + return p_driver_id.id; + } + case DRIVER_RESOURCE_TEXTURE_VIEW: { + return p_driver_id.id; + } + case DRIVER_RESOURCE_TEXTURE_DATA_FORMAT: { + return 0; + } + case DRIVER_RESOURCE_SAMPLER: { + return p_driver_id.id; + } + case DRIVER_RESOURCE_UNIFORM_SET: + return 0; + case DRIVER_RESOURCE_BUFFER: { + return p_driver_id.id; + } + case DRIVER_RESOURCE_COMPUTE_PIPELINE: + return 0; + case DRIVER_RESOURCE_RENDER_PIPELINE: + return 0; + default: { + return 0; + } + } +} + +uint64_t RenderingDeviceDriverMetal::get_total_memory_used() { + return device.currentAllocatedSize; +} + +uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) { + MetalDeviceProperties const &props = (*metal_device_properties); + MetalLimits const &limits = props.limits; + +#if defined(DEV_ENABLED) +#define UNKNOWN(NAME) \ + case NAME: \ + WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \ + return (uint64_t)1 << 30; +#else +#define UNKNOWN(NAME) \ + case NAME: \ + return (uint64_t)1 << 30 +#endif + + // clang-format off + switch (p_limit) { + case LIMIT_MAX_BOUND_UNIFORM_SETS: + return limits.maxBoundDescriptorSets; + case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS: + return limits.maxColorAttachments; + case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET: + return limits.maxTexturesPerArgumentBuffer; + case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET: + return limits.maxSamplersPerArgumentBuffer; + case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET: + return limits.maxBuffersPerArgumentBuffer; + case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET: + return limits.maxTexturesPerArgumentBuffer; + case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET: + return limits.maxBuffersPerArgumentBuffer; + case LIMIT_MAX_DRAW_INDEXED_INDEX: + return limits.maxDrawIndexedIndexValue; + case LIMIT_MAX_FRAMEBUFFER_HEIGHT: + return limits.maxFramebufferHeight; + case LIMIT_MAX_FRAMEBUFFER_WIDTH: + return limits.maxFramebufferWidth; + case LIMIT_MAX_TEXTURE_ARRAY_LAYERS: + return limits.maxImageArrayLayers; + case LIMIT_MAX_TEXTURE_SIZE_1D: + return limits.maxImageDimension1D; + case LIMIT_MAX_TEXTURE_SIZE_2D: + return limits.maxImageDimension2D; + case LIMIT_MAX_TEXTURE_SIZE_3D: + return limits.maxImageDimension3D; + case LIMIT_MAX_TEXTURE_SIZE_CUBE: + return limits.maxImageDimensionCube; + case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE: + return limits.maxTexturesPerArgumentBuffer; + case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE: + return limits.maxSamplersPerArgumentBuffer; + case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE: + return limits.maxBuffersPerArgumentBuffer; + case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE: + return limits.maxTexturesPerArgumentBuffer; + case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE: + return limits.maxBuffersPerArgumentBuffer; + case LIMIT_MAX_PUSH_CONSTANT_SIZE: + return limits.maxBufferLength; + case LIMIT_MAX_UNIFORM_BUFFER_SIZE: + return limits.maxBufferLength; + case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET: + return limits.maxVertexDescriptorLayoutStride; + case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES: + return limits.maxVertexInputAttributes; + case LIMIT_MAX_VERTEX_INPUT_BINDINGS: + return limits.maxVertexInputBindings; + case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE: + return limits.maxVertexInputBindingStride; + case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT: + return limits.minUniformBufferOffsetAlignment; + case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X: + return limits.maxComputeWorkGroupCount.width; + case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y: + return limits.maxComputeWorkGroupCount.height; + case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z: + return limits.maxComputeWorkGroupCount.depth; + case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS: + return std::max({ limits.maxThreadsPerThreadGroup.width, limits.maxThreadsPerThreadGroup.height, limits.maxThreadsPerThreadGroup.depth }); + case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X: + return limits.maxThreadsPerThreadGroup.width; + case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y: + return limits.maxThreadsPerThreadGroup.height; + case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: + return limits.maxThreadsPerThreadGroup.depth; + case LIMIT_MAX_VIEWPORT_DIMENSIONS_X: + return limits.maxViewportDimensionX; + case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: + return limits.maxViewportDimensionY; + case LIMIT_SUBGROUP_SIZE: + // MoltenVK sets the subgroupSize to the same as the maxSubgroupSize. + return limits.maxSubgroupSize; + case LIMIT_SUBGROUP_MIN_SIZE: + return limits.minSubgroupSize; + case LIMIT_SUBGROUP_MAX_SIZE: + return limits.maxSubgroupSize; + case LIMIT_SUBGROUP_IN_SHADERS: + return (int64_t)limits.subgroupSupportedShaderStages; + case LIMIT_SUBGROUP_OPERATIONS: + return (int64_t)limits.subgroupSupportedOperations; + UNKNOWN(LIMIT_VRS_TEXEL_WIDTH); + UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT); + default: + ERR_FAIL_V(0); + } + // clang-format on + return 0; +} + +uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) { + switch (p_trait) { + case API_TRAIT_HONORS_PIPELINE_BARRIERS: + return 0; + default: + return RenderingDeviceDriver::api_trait_get(p_trait); + } +} + +bool RenderingDeviceDriverMetal::has_feature(Features p_feature) { + switch (p_feature) { + case SUPPORTS_MULTIVIEW: + return false; + case SUPPORTS_FSR_HALF_FLOAT: + return true; + case SUPPORTS_ATTACHMENT_VRS: + // TODO(sgc): Maybe supported via https://developer.apple.com/documentation/metal/render_passes/rendering_at_different_rasterization_rates?language=objc + // See also: + // + // * https://forum.beyond3d.com/threads/variable-rate-shading-vs-variable-rate-rasterization.62243/post-2191363 + // + return false; + case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: + return true; + default: + return false; + } +} + +const RDD::MultiviewCapabilities &RenderingDeviceDriverMetal::get_multiview_capabilities() { + return multiview_capabilities; +} + +String RenderingDeviceDriverMetal::get_api_version() const { + return vformat("%d.%d", version_major, version_minor); +} + +String RenderingDeviceDriverMetal::get_pipeline_cache_uuid() const { + return pipeline_cache_id; +} + +const RDD::Capabilities &RenderingDeviceDriverMetal::get_capabilities() const { + return capabilities; +} + +bool RenderingDeviceDriverMetal::is_composite_alpha_supported(CommandQueueID p_queue) const { + // The CAMetalLayer.opaque property is configured according to this global setting. + return OS::get_singleton()->is_layered_allowed(); +} + +size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const { + return [device minimumLinearTextureAlignmentForPixelFormat:pixel_formats->getMTLPixelFormat(p_format)]; +} + +size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const { + return [device minimumLinearTextureAlignmentForPixelFormat:p_format]; +} + +/******************/ + +RenderingDeviceDriverMetal::RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver) : + context_driver(p_context_driver) { + DEV_ASSERT(p_context_driver != nullptr); + + if (String res = OS::get_singleton()->get_environment("GODOT_MTL_SHADER_LOAD_STRATEGY"); res == U"lazy") { + _shader_load_strategy = ShaderLoadStrategy::LAZY; + } +} + +RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() { + for (MDCommandBuffer *cb : command_buffers) { + delete cb; + } + + for (KeyValue<SHA256Digest, ShaderCacheEntry *> &kv : _shader_cache) { + memdelete(kv.value); + } +} + +#pragma mark - Initialization + +Error RenderingDeviceDriverMetal::_create_device() { + device = context_driver->get_metal_device(); + + device_queue = [device newCommandQueue]; + ERR_FAIL_NULL_V(device_queue, ERR_CANT_CREATE); + + device_scope = [MTLCaptureManager.sharedCaptureManager newCaptureScopeWithCommandQueue:device_queue]; + device_scope.label = @"Godot Frame"; + [device_scope beginScope]; // Allow Xcode to capture the first frame, if desired. + + resource_cache = std::make_unique<MDResourceCache>(this); + + return OK; +} + +Error RenderingDeviceDriverMetal::_check_capabilities() { + MTLCompileOptions *options = [MTLCompileOptions new]; + version_major = (options.languageVersion >> 0x10) & 0xff; + version_minor = (options.languageVersion >> 0x00) & 0xff; + + capabilities.device_family = DEVICE_METAL; + capabilities.version_major = version_major; + capabilities.version_minor = version_minor; + + return OK; +} + +Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p_frame_count) { + context_device = context_driver->device_get(p_device_index); + Error err = _create_device(); + ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + + err = _check_capabilities(); + ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + + // Set the pipeline cache ID based on the Metal version. + pipeline_cache_id = "metal-driver-" + get_api_version(); + + metal_device_properties = memnew(MetalDeviceProperties(device)); + pixel_formats = memnew(PixelFormats(device)); + + // Check required features and abort if any of them is missing. + if (!metal_device_properties->features.imageCubeArray) { + // NOTE: Apple A11 (Apple4) GPUs support image cube arrays, which are devices from 2017 and newer. + String error_string = vformat("Your Apple GPU does not support the following features which are required to use Metal-based renderers in Godot:\n\n"); + if (!metal_device_properties->features.imageCubeArray) { + error_string += "- No support for image cube arrays.\n"; + } + +#if defined(IOS_ENABLED) + // iOS platform ports currently don't exit themselves when this method returns `ERR_CANT_CREATE`. + OS::get_singleton()->alert(error_string + "\nClick OK to exit (black screen will be visible)."); +#else + OS::get_singleton()->alert(error_string + "\nClick OK to exit."); +#endif + + return ERR_CANT_CREATE; + } + + return OK; +} diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp index 210507c2c6..32f2d7dd79 100644 --- a/drivers/unix/file_access_unix.cpp +++ b/drivers/unix/file_access_unix.cpp @@ -218,67 +218,13 @@ bool FileAccessUnix::eof_reached() const { return last_error == ERR_FILE_EOF; } -uint8_t FileAccessUnix::get_8() const { - ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use."); - uint8_t b; - if (fread(&b, 1, 1, f) == 0) { - check_errors(); - b = '\0'; - } - return b; -} - -uint16_t FileAccessUnix::get_16() const { - ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use."); - - uint16_t b = 0; - if (fread(&b, 1, 2, f) != 2) { - check_errors(); - } - - if (big_endian) { - b = BSWAP16(b); - } - - return b; -} - -uint32_t FileAccessUnix::get_32() const { - ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use."); - - uint32_t b = 0; - if (fread(&b, 1, 4, f) != 4) { - check_errors(); - } - - if (big_endian) { - b = BSWAP32(b); - } - - return b; -} - -uint64_t FileAccessUnix::get_64() const { - ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use."); - - uint64_t b = 0; - if (fread(&b, 1, 8, f) != 8) { - check_errors(); - } - - if (big_endian) { - b = BSWAP64(b); - } - - return b; -} - uint64_t FileAccessUnix::get_buffer(uint8_t *p_dst, uint64_t p_length) const { - ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); ERR_FAIL_NULL_V_MSG(f, -1, "File must be opened before use."); + ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); uint64_t read = fread(p_dst, 1, p_length, f); check_errors(); + return read; } @@ -308,41 +254,6 @@ void FileAccessUnix::flush() { fflush(f); } -void FileAccessUnix::store_8(uint8_t p_dest) { - ERR_FAIL_NULL_MSG(f, "File must be opened before use."); - ERR_FAIL_COND(fwrite(&p_dest, 1, 1, f) != 1); -} - -void FileAccessUnix::store_16(uint16_t p_dest) { - ERR_FAIL_NULL_MSG(f, "File must be opened before use."); - - if (big_endian) { - p_dest = BSWAP16(p_dest); - } - - ERR_FAIL_COND(fwrite(&p_dest, 1, 2, f) != 2); -} - -void FileAccessUnix::store_32(uint32_t p_dest) { - ERR_FAIL_NULL_MSG(f, "File must be opened before use."); - - if (big_endian) { - p_dest = BSWAP32(p_dest); - } - - ERR_FAIL_COND(fwrite(&p_dest, 1, 4, f) != 4); -} - -void FileAccessUnix::store_64(uint64_t p_dest) { - ERR_FAIL_NULL_MSG(f, "File must be opened before use."); - - if (big_endian) { - p_dest = BSWAP64(p_dest); - } - - ERR_FAIL_COND(fwrite(&p_dest, 1, 8, f) != 8); -} - void FileAccessUnix::store_buffer(const uint8_t *p_src, uint64_t p_length) { ERR_FAIL_NULL_MSG(f, "File must be opened before use."); ERR_FAIL_COND(!p_src && p_length > 0); @@ -383,7 +294,7 @@ uint64_t FileAccessUnix::_get_modified_time(const String &p_file) { if (!err) { return status.st_mtime; } else { - print_verbose("Failed to get modified time for: " + p_file + ""); + WARN_PRINT("Failed to get modified time for: " + p_file); return 0; } } diff --git a/drivers/unix/file_access_unix.h b/drivers/unix/file_access_unix.h index c0286dbff3..76f629f7c2 100644 --- a/drivers/unix/file_access_unix.h +++ b/drivers/unix/file_access_unix.h @@ -67,20 +67,12 @@ public: virtual bool eof_reached() const override; ///< reading passed EOF - virtual uint8_t get_8() const override; ///< get a byte - virtual uint16_t get_16() const override; - virtual uint32_t get_32() const override; - virtual uint64_t get_64() const override; virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override; virtual Error get_error() const override; ///< get last error virtual Error resize(int64_t p_length) override; virtual void flush() override; - virtual void store_8(uint8_t p_dest) override; ///< store a byte - virtual void store_16(uint16_t p_dest) override; - virtual void store_32(uint32_t p_dest) override; - virtual void store_64(uint64_t p_dest) override; virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes virtual bool file_exists(const String &p_path) override; ///< return true if a file exists diff --git a/drivers/unix/file_access_unix_pipe.cpp b/drivers/unix/file_access_unix_pipe.cpp index 5d9a27ad05..34758e8c7d 100644 --- a/drivers/unix/file_access_unix_pipe.cpp +++ b/drivers/unix/file_access_unix_pipe.cpp @@ -125,22 +125,9 @@ String FileAccessUnixPipe::get_path_absolute() const { return path_src; } -uint8_t FileAccessUnixPipe::get_8() const { - ERR_FAIL_COND_V_MSG(fd[0] < 0, 0, "Pipe must be opened before use."); - - uint8_t b; - if (::read(fd[0], &b, 1) == 0) { - last_error = ERR_FILE_CANT_READ; - b = '\0'; - } else { - last_error = OK; - } - return b; -} - uint64_t FileAccessUnixPipe::get_buffer(uint8_t *p_dst, uint64_t p_length) const { - ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); ERR_FAIL_COND_V_MSG(fd[0] < 0, -1, "Pipe must be opened before use."); + ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); uint64_t read = ::read(fd[0], p_dst, p_length); if (read == p_length) { @@ -155,18 +142,10 @@ Error FileAccessUnixPipe::get_error() const { return last_error; } -void FileAccessUnixPipe::store_8(uint8_t p_src) { - ERR_FAIL_COND_MSG(fd[1] < 0, "Pipe must be opened before use."); - if (::write(fd[1], &p_src, 1) != 1) { - last_error = ERR_FILE_CANT_WRITE; - } else { - last_error = OK; - } -} - void FileAccessUnixPipe::store_buffer(const uint8_t *p_src, uint64_t p_length) { ERR_FAIL_COND_MSG(fd[1] < 0, "Pipe must be opened before use."); ERR_FAIL_COND(!p_src && p_length > 0); + if (::write(fd[1], p_src, p_length) != (ssize_t)p_length) { last_error = ERR_FILE_CANT_WRITE; } else { diff --git a/drivers/unix/file_access_unix_pipe.h b/drivers/unix/file_access_unix_pipe.h index 8e7988791b..19acdb5a37 100644 --- a/drivers/unix/file_access_unix_pipe.h +++ b/drivers/unix/file_access_unix_pipe.h @@ -65,14 +65,12 @@ public: virtual bool eof_reached() const override { return false; } - virtual uint8_t get_8() const override; ///< get a byte virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override; virtual Error get_error() const override; ///< get last error virtual Error resize(int64_t p_length) override { return ERR_UNAVAILABLE; } virtual void flush() override {} - virtual void store_8(uint8_t p_src) override; ///< store a byte virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes virtual bool file_exists(const String &p_path) override { return false; } diff --git a/drivers/vulkan/SCsub b/drivers/vulkan/SCsub index 80d5f35305..1efef5ad77 100644 --- a/drivers/vulkan/SCsub +++ b/drivers/vulkan/SCsub @@ -16,14 +16,14 @@ if env["use_volk"]: if env["platform"] == "android": env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_ANDROID_KHR"]) elif env["platform"] == "ios": - env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_IOS_MVK"]) + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_IOS_MVK", "VK_USE_PLATFORM_METAL_EXT"]) elif env["platform"] == "linuxbsd": if env["x11"]: env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_XLIB_KHR"]) if env["wayland"]: env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_WAYLAND_KHR"]) elif env["platform"] == "macos": - env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_MACOS_MVK"]) + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_MACOS_MVK", "VK_USE_PLATFORM_METAL_EXT"]) elif env["platform"] == "windows": env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_WIN32_KHR"]) diff --git a/drivers/vulkan/rendering_context_driver_vulkan.cpp b/drivers/vulkan/rendering_context_driver_vulkan.cpp index 7cba820978..df9bd98624 100644 --- a/drivers/vulkan/rendering_context_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_context_driver_vulkan.cpp @@ -40,21 +40,355 @@ #include "rendering_device_driver_vulkan.h" #include "vulkan_hooks.h" +#if defined(VK_TRACK_DRIVER_MEMORY) +/*************************************************/ +// Driver memory tracking +/*************************************************/ +// Total driver memory and allocation amount. +SafeNumeric<size_t> driver_memory_total_memory; +SafeNumeric<size_t> driver_memory_total_alloc_count; +// Amount of driver memory for every object type. +SafeNumeric<size_t> driver_memory_tracker[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT][RenderingContextDriverVulkan::VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT]; +// Amount of allocations for every object type. +SafeNumeric<uint32_t> driver_memory_allocation_count[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT][RenderingContextDriverVulkan::VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT]; +#endif + +#if defined(VK_TRACK_DEVICE_MEMORY) +/*************************************************/ +// Device memory report +/*************************************************/ +// Total device memory and allocation amount. +HashMap<uint64_t, size_t> memory_report_table; +// Total memory and allocation amount. +SafeNumeric<uint64_t> memory_report_total_memory; +SafeNumeric<uint64_t> memory_report_total_alloc_count; +// Amount of device memory for every object type. +SafeNumeric<size_t> memory_report_mem_usage[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT]; +// Amount of device memory allocations for every object type. +SafeNumeric<size_t> memory_report_allocation_count[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT]; +#endif + +const char *RenderingContextDriverVulkan::get_tracked_object_name(uint32_t p_type_index) const { +#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY) + static constexpr const char *vkTrackedObjectTypeNames[] = { "UNKNOWN", + "INSTANCE", + "PHYSICAL_DEVICE", + "DEVICE", + "QUEUE", + "SEMAPHORE", + "COMMAND_BUFFER", + "FENCE", + "DEVICE_MEMORY", + "BUFFER", + "IMAGE", + "EVENT", + "QUERY_POOL", + "BUFFER_VIEW", + "IMAGE_VIEW", + "SHADER_MODULE", + "PIPELINE_CACHE", + "PIPELINE_LAYOUT", + "RENDER_PASS", + "PIPELINE", + "DESCRIPTOR_SET_LAYOUT", + "SAMPLER", + "DESCRIPTOR_POOL", + "DESCRIPTOR_SET", + "FRAMEBUFFER", + "COMMAND_POOL", + "DESCRIPTOR_UPDATE_TEMPLATE_KHR", + "SURFACE_KHR", + "SWAPCHAIN_KHR", + "DEBUG_UTILS_MESSENGER_EXT", + "DEBUG_REPORT_CALLBACK_EXT", + "ACCELERATION_STRUCTURE", + "VMA_BUFFER_OR_IMAGE" }; + + return vkTrackedObjectTypeNames[p_type_index]; +#else + return "VK_TRACK_*_MEMORY disabled at build time"; +#endif +} + +#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY) +uint64_t RenderingContextDriverVulkan::get_tracked_object_type_count() const { + return VK_TRACKED_OBJECT_TYPE_COUNT; +} +#endif + +#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY) +RenderingContextDriverVulkan::VkTrackedObjectType vk_object_to_tracked_object(VkObjectType p_type) { + if (p_type > VK_OBJECT_TYPE_COMMAND_POOL && p_type != (VkObjectType)RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_VMA) { + switch (p_type) { + case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR; + case VK_OBJECT_TYPE_SURFACE_KHR: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_SURFACE; + case VK_OBJECT_TYPE_SWAPCHAIN_KHR: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_SWAPCHAIN; + case VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT; + case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT; + case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR: + case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE; + default: + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "Unknown VkObjectType enum value " + itos((uint32_t)p_type) + ".Please add it to VkTrackedObjectType, switch statement in " + "vk_object_to_tracked_object and get_tracked_object_name.", + (int)p_type); + return (RenderingContextDriverVulkan::VkTrackedObjectType)VK_OBJECT_TYPE_UNKNOWN; + } + } + + return (RenderingContextDriverVulkan::VkTrackedObjectType)p_type; +} +#endif + +#if defined(VK_TRACK_DEVICE_MEMORY) +uint64_t RenderingContextDriverVulkan::get_device_total_memory() const { + return memory_report_total_memory.get(); +} + +uint64_t RenderingContextDriverVulkan::get_device_allocation_count() const { + return memory_report_total_alloc_count.get(); +} + +uint64_t RenderingContextDriverVulkan::get_device_memory_by_object_type(uint32_t p_type) const { + return memory_report_mem_usage[p_type].get(); +} + +uint64_t RenderingContextDriverVulkan::get_device_allocs_by_object_type(uint32_t p_type) const { + return memory_report_allocation_count[p_type].get(); +} +#endif + +#if defined(VK_TRACK_DRIVER_MEMORY) +uint64_t RenderingContextDriverVulkan::get_driver_total_memory() const { + return driver_memory_total_memory.get(); +} + +uint64_t RenderingContextDriverVulkan::get_driver_allocation_count() const { + return driver_memory_total_alloc_count.get(); +} + +uint64_t RenderingContextDriverVulkan::get_driver_memory_by_object_type(uint32_t p_type) const { + uint64_t ret = 0; + for (uint32_t i = 0; i < VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT; i++) { + ret += driver_memory_tracker[p_type][i].get(); + } + + return ret; +} + +uint64_t RenderingContextDriverVulkan::get_driver_allocs_by_object_type(uint32_t p_type) const { + uint64_t ret = 0; + for (uint32_t i = 0; i < VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT; i++) { + ret += driver_memory_allocation_count[p_type][i].get(); + } + + return ret; +} +#endif + +#if defined(VK_TRACK_DEVICE_MEMORY) +void RenderingContextDriverVulkan::memory_report_callback(const VkDeviceMemoryReportCallbackDataEXT *p_callback_data, void *p_user_data) { + if (!p_callback_data) { + return; + } + const RenderingContextDriverVulkan::VkTrackedObjectType obj_type = vk_object_to_tracked_object(p_callback_data->objectType); + uint64_t obj_id = p_callback_data->memoryObjectId; + + if (p_callback_data->type == VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_ALLOCATE_EXT) { + // Realloc, update size + if (memory_report_table.has(obj_id)) { + memory_report_total_memory.sub(memory_report_table[obj_id]); + memory_report_mem_usage[obj_type].sub(memory_report_table[obj_id]); + + memory_report_total_memory.add(p_callback_data->size); + memory_report_mem_usage[obj_type].add(p_callback_data->size); + + memory_report_table[p_callback_data->memoryObjectId] = p_callback_data->size; + } else { + memory_report_table[obj_id] = p_callback_data->size; + + memory_report_total_alloc_count.increment(); + memory_report_allocation_count[obj_type].increment(); + memory_report_mem_usage[obj_type].add(p_callback_data->size); + memory_report_total_memory.add(p_callback_data->size); + } + } else if (p_callback_data->type == VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_FREE_EXT) { + if (memory_report_table.has(obj_id)) { + memory_report_total_alloc_count.decrement(); + memory_report_allocation_count[obj_type].decrement(); + memory_report_mem_usage[obj_type].sub(p_callback_data->size); + memory_report_total_memory.sub(p_callback_data->size); + + memory_report_table.remove(memory_report_table.find(obj_id)); + } + } +} +#endif + +VkAllocationCallbacks *RenderingContextDriverVulkan::get_allocation_callbacks(VkObjectType p_type) { +#if !defined(VK_TRACK_DRIVER_MEMORY) + return nullptr; +#else + if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) { + return nullptr; + } + +#ifdef _MSC_VER +#define LAMBDA_VK_CALL_CONV +#else +#define LAMBDA_VK_CALL_CONV VKAPI_PTR +#endif + + struct TrackedMemHeader { + size_t size; + VkSystemAllocationScope allocation_scope; + VkTrackedObjectType type; + }; + VkAllocationCallbacks tracking_callbacks = { + // Allocation function + nullptr, + []( + void *p_user_data, + size_t size, + size_t alignment, + VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV -> void * { + static constexpr size_t tracking_data_size = 32; + VkTrackedObjectType type = static_cast<VkTrackedObjectType>(*reinterpret_cast<VkTrackedObjectType *>(p_user_data)); + + driver_memory_total_memory.add(size); + driver_memory_total_alloc_count.increment(); + driver_memory_tracker[type][allocation_scope].add(size); + driver_memory_allocation_count[type][allocation_scope].increment(); + + alignment = MAX(alignment, tracking_data_size); + + uint8_t *ret = reinterpret_cast<uint8_t *>(Memory::alloc_aligned_static(size + alignment, alignment)); + if (ret == nullptr) { + return nullptr; + } + + // Track allocation + TrackedMemHeader *header = reinterpret_cast<TrackedMemHeader *>(ret); + header->size = size; + header->allocation_scope = allocation_scope; + header->type = type; + *reinterpret_cast<size_t *>(ret + alignment - sizeof(size_t)) = alignment; + + // Return first available chunk of memory + return ret + alignment; + }, + + // Reallocation function + []( + void *p_user_data, + void *p_original, + size_t size, + size_t alignment, + VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV -> void * { + if (p_original == nullptr) { + VkObjectType type = static_cast<VkObjectType>(*reinterpret_cast<uint32_t *>(p_user_data)); + return get_allocation_callbacks(type)->pfnAllocation(p_user_data, size, alignment, allocation_scope); + } + + uint8_t *mem = reinterpret_cast<uint8_t *>(p_original); + // Retrieve alignment + alignment = *reinterpret_cast<size_t *>(mem - sizeof(size_t)); + // Retrieve allocation data + TrackedMemHeader *header = reinterpret_cast<TrackedMemHeader *>(mem - alignment); + + // Update allocation size + driver_memory_total_memory.sub(header->size); + driver_memory_total_memory.add(size); + driver_memory_tracker[header->type][header->allocation_scope].sub(header->size); + driver_memory_tracker[header->type][header->allocation_scope].add(size); + + uint8_t *ret = reinterpret_cast<uint8_t *>(Memory::realloc_aligned_static(header, size + alignment, header->size + alignment, alignment)); + if (ret == nullptr) { + return nullptr; + } + // Update tracker + header = reinterpret_cast<TrackedMemHeader *>(ret); + header->size = size; + return ret + alignment; + }, + + // Free function + []( + void *p_user_data, + void *p_memory) LAMBDA_VK_CALL_CONV { + if (!p_memory) { + return; + } + + uint8_t *mem = reinterpret_cast<uint8_t *>(p_memory); + size_t alignment = *reinterpret_cast<size_t *>(mem - sizeof(size_t)); + TrackedMemHeader *header = reinterpret_cast<TrackedMemHeader *>(mem - alignment); + + driver_memory_total_alloc_count.decrement(); + driver_memory_total_memory.sub(header->size); + driver_memory_tracker[header->type][header->allocation_scope].sub(header->size); + driver_memory_allocation_count[header->type][header->allocation_scope].decrement(); + + Memory::free_aligned_static(header); + }, + // Internal allocation / deallocation. We don't track them as they cannot really be controlled or optimized by the programmer. + []( + void *p_user_data, + size_t size, + VkInternalAllocationType allocation_type, + VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV { + }, + []( + void *p_user_data, + size_t size, + VkInternalAllocationType allocation_type, + VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV { + }, + }; + + // Create a callback per object type + static VkAllocationCallbacks object_callbacks[VK_TRACKED_OBJECT_TYPE_COUNT] = {}; + static uint32_t object_user_data[VK_TRACKED_OBJECT_TYPE_COUNT] = {}; + + // Only build the first time + if (!object_callbacks[0].pfnAllocation) { + for (uint32_t c = 0; c < VK_TRACKED_OBJECT_TYPE_COUNT; ++c) { + object_callbacks[c] = tracking_callbacks; + object_user_data[c] = c; + object_callbacks[c].pUserData = &object_user_data[c]; + + for (uint32_t i = 0; i < VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT; i++) { + driver_memory_tracker[c][i].set(0); + driver_memory_allocation_count[c][i].set(0); + } + } + } + + uint32_t type_index = vk_object_to_tracked_object(p_type); + return &object_callbacks[type_index]; +#endif +} + RenderingContextDriverVulkan::RenderingContextDriverVulkan() { // Empty constructor. } RenderingContextDriverVulkan::~RenderingContextDriverVulkan() { if (debug_messenger != VK_NULL_HANDLE && functions.DestroyDebugUtilsMessengerEXT != nullptr) { - functions.DestroyDebugUtilsMessengerEXT(instance, debug_messenger, nullptr); + functions.DestroyDebugUtilsMessengerEXT(instance, debug_messenger, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT)); } if (debug_report != VK_NULL_HANDLE && functions.DestroyDebugReportCallbackEXT != nullptr) { - functions.DestroyDebugReportCallbackEXT(instance, debug_report, nullptr); + functions.DestroyDebugReportCallbackEXT(instance, debug_report, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT)); } if (instance != VK_NULL_HANDLE) { - vkDestroyInstance(instance, nullptr); + vkDestroyInstance(instance, get_allocation_callbacks(VK_OBJECT_TYPE_INSTANCE)); } } @@ -102,6 +436,10 @@ Error RenderingContextDriverVulkan::_initialize_instance_extensions() { // This extension allows us to use the properties2 features to query additional device capabilities. _register_requested_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false); +#if defined(USE_VOLK) && (defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + _register_requested_instance_extension(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, true); +#endif + // Only enable debug utils in verbose mode or DEV_ENABLED. // End users would get spammed with messages of varying verbosity due to the // mess that thirdparty layers/extensions and drivers seem to leave in their @@ -360,6 +698,11 @@ Error RenderingContextDriverVulkan::_initialize_instance() { VkInstanceCreateInfo instance_info = {}; instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + +#if defined(USE_VOLK) && (defined(MACOS_ENABLED) || defined(IOS_ENABLED)) + instance_info.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; +#endif + instance_info.pApplicationInfo = &app_info; instance_info.enabledExtensionCount = enabled_extension_names.size(); instance_info.ppEnabledExtensionNames = enabled_extension_names.ptr(); @@ -432,7 +775,7 @@ Error RenderingContextDriverVulkan::_initialize_instance() { ERR_FAIL_V_MSG(ERR_CANT_CREATE, "GetProcAddr: Failed to init VK_EXT_debug_utils\nGetProcAddr: Failure"); } - VkResult res = functions.CreateDebugUtilsMessengerEXT(instance, &debug_messenger_create_info, nullptr, &debug_messenger); + VkResult res = functions.CreateDebugUtilsMessengerEXT(instance, &debug_messenger_create_info, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT), &debug_messenger); switch (res) { case VK_SUCCESS: break; @@ -452,7 +795,7 @@ Error RenderingContextDriverVulkan::_initialize_instance() { ERR_FAIL_V_MSG(ERR_CANT_CREATE, "GetProcAddr: Failed to init VK_EXT_debug_report\nGetProcAddr: Failure"); } - VkResult res = functions.CreateDebugReportCallbackEXT(instance, &debug_report_callback_create_info, nullptr, &debug_report); + VkResult res = functions.CreateDebugReportCallbackEXT(instance, &debug_report_callback_create_info, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT), &debug_report); switch (res) { case VK_SUCCESS: break; @@ -551,7 +894,7 @@ Error RenderingContextDriverVulkan::_create_vulkan_instance(const VkInstanceCrea if (VulkanHooks::get_singleton() != nullptr) { return VulkanHooks::get_singleton()->create_vulkan_instance(p_create_info, r_instance) ? OK : ERR_CANT_CREATE; } else { - VkResult err = vkCreateInstance(p_create_info, nullptr, r_instance); + VkResult err = vkCreateInstance(p_create_info, get_allocation_callbacks(VK_OBJECT_TYPE_INSTANCE), r_instance); ERR_FAIL_COND_V_MSG(err == VK_ERROR_INCOMPATIBLE_DRIVER, ERR_CANT_CREATE, "Cannot find a compatible Vulkan installable client driver (ICD).\n\n" "vkCreateInstance Failure"); @@ -670,7 +1013,7 @@ bool RenderingContextDriverVulkan::surface_get_needs_resize(SurfaceID p_surface) void RenderingContextDriverVulkan::surface_destroy(SurfaceID p_surface) { Surface *surface = (Surface *)(p_surface); - vkDestroySurfaceKHR(instance, surface->vk_surface, nullptr); + vkDestroySurfaceKHR(instance, surface->vk_surface, get_allocation_callbacks(VK_OBJECT_TYPE_SURFACE_KHR)); memdelete(surface); } diff --git a/drivers/vulkan/rendering_context_driver_vulkan.h b/drivers/vulkan/rendering_context_driver_vulkan.h index f1d4021e32..4fbca012c6 100644 --- a/drivers/vulkan/rendering_context_driver_vulkan.h +++ b/drivers/vulkan/rendering_context_driver_vulkan.h @@ -35,6 +35,11 @@ #include "servers/rendering/rendering_context_driver.h" +#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) +#define VK_TRACK_DRIVER_MEMORY +#define VK_TRACK_DEVICE_MEMORY +#endif + #ifdef USE_VOLK #include <volk.h> #else @@ -77,6 +82,12 @@ public: PFN_vkDebugReportMessageEXT DebugReportMessageEXT = nullptr; PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT = nullptr; + // Debug marker extensions. + PFN_vkCmdDebugMarkerBeginEXT CmdDebugMarkerBeginEXT = nullptr; + PFN_vkCmdDebugMarkerEndEXT CmdDebugMarkerEndEXT = nullptr; + PFN_vkCmdDebugMarkerInsertEXT CmdDebugMarkerInsertEXT = nullptr; + PFN_vkDebugMarkerSetObjectNameEXT DebugMarkerSetObjectNameEXT = nullptr; + bool debug_report_functions_available() const { return CreateDebugReportCallbackEXT != nullptr && DebugReportMessageEXT != nullptr && @@ -110,6 +121,8 @@ private: // Static callbacks. static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data); static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_report_callback(VkDebugReportFlagsEXT p_flags, VkDebugReportObjectTypeEXT p_object_type, uint64_t p_object, size_t p_location, int32_t p_message_code, const char *p_layer_prefix, const char *p_message, void *p_user_data); + // Debug marker extensions. + VkDebugReportObjectTypeEXT _convert_to_debug_report_objectType(VkObjectType p_object_type); protected: Error _find_validation_layers(TightLocalVector<const char *> &r_layer_names) const; @@ -153,6 +166,45 @@ public: bool queue_family_supports_present(VkPhysicalDevice p_physical_device, uint32_t p_queue_family_index, SurfaceID p_surface) const; const Functions &functions_get() const; + static VkAllocationCallbacks *get_allocation_callbacks(VkObjectType p_type); + +#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY) + enum VkTrackedObjectType{ + VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_COMMAND_POOL + 1, + VK_TRACKED_OBJECT_TYPE_SURFACE, + VK_TRACKED_OBJECT_TYPE_SWAPCHAIN, + VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT, + VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT, + VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE, + VK_TRACKED_OBJECT_TYPE_VMA, + VK_TRACKED_OBJECT_TYPE_COUNT + }; + + enum VkTrackedSystemAllocationScope{ + VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT = VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE + 1 + }; +#endif + + const char *get_tracked_object_name(uint32_t p_type_index) const override; +#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY) + uint64_t get_tracked_object_type_count() const override; +#endif + +#if defined(VK_TRACK_DRIVER_MEMORY) + uint64_t get_driver_total_memory() const override; + uint64_t get_driver_allocation_count() const override; + uint64_t get_driver_memory_by_object_type(uint32_t p_type) const override; + uint64_t get_driver_allocs_by_object_type(uint32_t p_type) const override; +#endif + +#if defined(VK_TRACK_DEVICE_MEMORY) + uint64_t get_device_total_memory() const override; + uint64_t get_device_allocation_count() const override; + uint64_t get_device_memory_by_object_type(uint32_t p_type) const override; + uint64_t get_device_allocs_by_object_type(uint32_t p_type) const override; + static VKAPI_ATTR void VKAPI_CALL memory_report_callback(const VkDeviceMemoryReportCallbackDataEXT *p_callback_data, void *p_user_data); +#endif + RenderingContextDriverVulkan(); virtual ~RenderingContextDriverVulkan() override; }; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 97fd156584..4ea46e8214 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -497,11 +497,32 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_KHR_MAINTENANCE_2_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false); if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); } +#if defined(VK_TRACK_DEVICE_MEMORY) + if (Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) { + _register_requested_device_extension(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME, false); + } +#endif + _register_requested_device_extension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME, false); + + { + // Debug marker extensions. + // Should be last element in the array. +#ifdef DEV_ENABLED + bool want_debug_markers = true; +#else + bool want_debug_markers = OS::get_singleton()->is_stdout_verbose(); +#endif + if (want_debug_markers) { + _register_requested_device_extension(VK_EXT_DEBUG_MARKER_EXTENSION_NAME, false); + } + } + uint32_t device_extension_count = 0; VkResult err = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &device_extension_count, nullptr); ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); @@ -745,6 +766,15 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { if (enabled_device_extension_names.has(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) { pipeline_cache_control_support = pipeline_cache_control_features.pipelineCreationCacheControl; } + + if (enabled_device_extension_names.has(VK_EXT_DEVICE_FAULT_EXTENSION_NAME)) { + device_fault_support = true; + } +#if defined(VK_TRACK_DEVICE_MEMORY) + if (enabled_device_extension_names.has(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME)) { + device_memory_report_support = true; + } +#endif } if (functions.GetPhysicalDeviceProperties2 != nullptr) { @@ -913,6 +943,26 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDevice create_info_next = &pipeline_cache_control_features; } + VkPhysicalDeviceFaultFeaturesEXT device_fault_features = {}; + if (device_fault_support) { + device_fault_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT; + device_fault_features.pNext = create_info_next; + create_info_next = &device_fault_features; + } + +#if defined(VK_TRACK_DEVICE_MEMORY) + VkDeviceDeviceMemoryReportCreateInfoEXT memory_report_info = {}; + if (device_memory_report_support) { + memory_report_info.sType = VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT; + memory_report_info.pfnUserCallback = RenderingContextDriverVulkan::memory_report_callback; + memory_report_info.pNext = create_info_next; + memory_report_info.flags = 0; + memory_report_info.pUserData = this; + + create_info_next = &memory_report_info; + } +#endif + VkPhysicalDeviceVulkan11Features vulkan_1_1_features = {}; VkPhysicalDevice16BitStorageFeaturesKHR storage_features = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; @@ -968,7 +1018,7 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDevice bool device_created = VulkanHooks::get_singleton()->create_vulkan_device(&create_info, &vk_device); ERR_FAIL_COND_V(!device_created, ERR_CANT_CREATE); } else { - VkResult err = vkCreateDevice(physical_device, &create_info, nullptr, &vk_device); + VkResult err = vkCreateDevice(physical_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DEVICE), &vk_device); ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); } @@ -989,6 +1039,19 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDevice if (enabled_device_extension_names.has(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) { device_functions.CreateRenderPass2KHR = PFN_vkCreateRenderPass2KHR(functions.GetDeviceProcAddr(vk_device, "vkCreateRenderPass2KHR")); } + + // Debug marker extensions. + if (enabled_device_extension_names.has(VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) { + device_functions.CmdDebugMarkerBeginEXT = (PFN_vkCmdDebugMarkerBeginEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerBeginEXT"); + device_functions.CmdDebugMarkerEndEXT = (PFN_vkCmdDebugMarkerEndEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerEndEXT"); + device_functions.CmdDebugMarkerInsertEXT = (PFN_vkCmdDebugMarkerInsertEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerInsertEXT"); + device_functions.DebugMarkerSetObjectNameEXT = (PFN_vkDebugMarkerSetObjectNameEXT)functions.GetDeviceProcAddr(vk_device, "vkDebugMarkerSetObjectNameEXT"); + } + + // Debug device fault extension. + if (device_fault_support) { + device_functions.GetDeviceFaultInfoEXT = (PFN_vkGetDeviceFaultInfoEXT)functions.GetDeviceProcAddr(vk_device, "vkGetDeviceFaultInfoEXT"); + } } return OK; @@ -1148,17 +1211,102 @@ bool RenderingDeviceDriverVulkan::_recreate_image_semaphore(CommandQueue *p_comm VkSemaphore semaphore; VkSemaphoreCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - VkResult err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + VkResult err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore); ERR_FAIL_COND_V(err != VK_SUCCESS, false); // Indicate the semaphore is free again and destroy the previous one before storing the new one. - vkDestroySemaphore(vk_device, p_command_queue->image_semaphores[p_semaphore_index], nullptr); + vkDestroySemaphore(vk_device, p_command_queue->image_semaphores[p_semaphore_index], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE)); p_command_queue->image_semaphores[p_semaphore_index] = semaphore; p_command_queue->free_image_semaphores.push_back(p_semaphore_index); return true; } +// Debug marker extensions. +VkDebugReportObjectTypeEXT RenderingDeviceDriverVulkan::_convert_to_debug_report_objectType(VkObjectType p_object_type) { + switch (p_object_type) { + case VK_OBJECT_TYPE_UNKNOWN: + return VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT; + case VK_OBJECT_TYPE_INSTANCE: + return VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT; + case VK_OBJECT_TYPE_PHYSICAL_DEVICE: + return VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT; + case VK_OBJECT_TYPE_DEVICE: + return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT; + case VK_OBJECT_TYPE_QUEUE: + return VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT; + case VK_OBJECT_TYPE_SEMAPHORE: + return VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT; + case VK_OBJECT_TYPE_COMMAND_BUFFER: + return VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT; + case VK_OBJECT_TYPE_FENCE: + return VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT; + case VK_OBJECT_TYPE_DEVICE_MEMORY: + return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT; + case VK_OBJECT_TYPE_BUFFER: + return VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT; + case VK_OBJECT_TYPE_IMAGE: + return VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT; + case VK_OBJECT_TYPE_EVENT: + return VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT; + case VK_OBJECT_TYPE_QUERY_POOL: + return VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT; + case VK_OBJECT_TYPE_BUFFER_VIEW: + return VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT; + case VK_OBJECT_TYPE_IMAGE_VIEW: + return VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT; + case VK_OBJECT_TYPE_SHADER_MODULE: + return VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT; + case VK_OBJECT_TYPE_PIPELINE_CACHE: + return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT; + case VK_OBJECT_TYPE_PIPELINE_LAYOUT: + return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT; + case VK_OBJECT_TYPE_RENDER_PASS: + return VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT; + case VK_OBJECT_TYPE_PIPELINE: + return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT; + case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: + return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT; + case VK_OBJECT_TYPE_SAMPLER: + return VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT; + case VK_OBJECT_TYPE_DESCRIPTOR_POOL: + return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT; + case VK_OBJECT_TYPE_DESCRIPTOR_SET: + return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT; + case VK_OBJECT_TYPE_FRAMEBUFFER: + return VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT; + case VK_OBJECT_TYPE_COMMAND_POOL: + return VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT; + case VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION: + return VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT; + case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE: + return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT; + case VK_OBJECT_TYPE_SURFACE_KHR: + return VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT; + case VK_OBJECT_TYPE_SWAPCHAIN_KHR: + return VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT; + case VK_OBJECT_TYPE_DISPLAY_KHR: + return VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT; + case VK_OBJECT_TYPE_DISPLAY_MODE_KHR: + return VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT; + case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: + return VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT; + case VK_OBJECT_TYPE_CU_MODULE_NVX: + return VK_DEBUG_REPORT_OBJECT_TYPE_CU_MODULE_NVX_EXT; + case VK_OBJECT_TYPE_CU_FUNCTION_NVX: + return VK_DEBUG_REPORT_OBJECT_TYPE_CU_FUNCTION_NVX_EXT; + case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR: + return VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR_EXT; + case VK_OBJECT_TYPE_VALIDATION_CACHE_EXT: + return VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT; + case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV: + return VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT; + default: + break; + } + + return VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT; +} void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) { const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); @@ -1171,6 +1319,16 @@ void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, u name_info.objectHandle = p_object_handle; name_info.pObjectName = obj_data.get_data(); functions.SetDebugUtilsObjectNameEXT(vk_device, &name_info); + } else if (functions.DebugMarkerSetObjectNameEXT != nullptr) { + // Debug marker extensions. + CharString obj_data = p_object_name.utf8(); + VkDebugMarkerObjectNameInfoEXT name_info; + name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; + name_info.pNext = nullptr; + name_info.objectType = _convert_to_debug_report_objectType(p_object_type); + name_info.object = p_object_handle; + name_info.pObjectName = obj_data.get_data(); + functions.DebugMarkerSetObjectNameEXT(vk_device, &name_info); } } @@ -1211,6 +1369,7 @@ Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t ERR_FAIL_COND_V(err != OK, err); max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool"); + breadcrumb_buffer = buffer_create(sizeof(uint32_t), BufferUsageBits::BUFFER_USAGE_TRANSFER_TO_BIT, MemoryAllocationType::MEMORY_ALLOCATION_TYPE_CPU); return OK; } @@ -1279,11 +1438,10 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie // Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads. alloc_create_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; } - alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; alloc_create_info.requiredFlags = (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); } break; case MEMORY_ALLOCATION_TYPE_GPU: { - alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; if (p_size <= SMALL_ALLOCATION_MAX_SIZE) { uint32_t mem_type_index = 0; vmaFindMemoryTypeIndexForBufferInfo(allocator, &create_info, &alloc_create_info, &mem_type_index); @@ -1295,11 +1453,15 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie VkBuffer vk_buffer = VK_NULL_HANDLE; VmaAllocation allocation = nullptr; VmaAllocationInfo alloc_info = {}; - VkResult err = vmaCreateBuffer(allocator, &create_info, &alloc_create_info, &vk_buffer, &allocation, &alloc_info); + + VkResult err = vkCreateBuffer(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER), &vk_buffer); ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + itos(err) + "."); + err = vmaAllocateMemoryForBuffer(allocator, vk_buffer, &alloc_create_info, &allocation, &alloc_info); + ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't allocate memory for buffer of size: " + itos(p_size) + ", error " + itos(err) + "."); + err = vmaBindBufferMemory2(allocator, allocation, 0, vk_buffer, NULL); + ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't bind memory to buffer of size: " + itos(p_size) + ", error " + itos(err) + "."); // Bookkeep. - BufferInfo *buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator); buf_info->vk_buffer = vk_buffer; buf_info->allocation.handle = allocation; @@ -1320,7 +1482,7 @@ bool RenderingDeviceDriverVulkan::buffer_set_texel_format(BufferID p_buffer, Dat view_create_info.format = RD_TO_VK_FORMAT[p_format]; view_create_info.range = buf_info->allocation.size; - VkResult res = vkCreateBufferView(vk_device, &view_create_info, nullptr, &buf_info->vk_view); + VkResult res = vkCreateBufferView(vk_device, &view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW), &buf_info->vk_view); ERR_FAIL_COND_V_MSG(res, false, "Unable to create buffer view, error " + itos(res) + "."); return true; @@ -1329,9 +1491,12 @@ bool RenderingDeviceDriverVulkan::buffer_set_texel_format(BufferID p_buffer, Dat void RenderingDeviceDriverVulkan::buffer_free(BufferID p_buffer) { BufferInfo *buf_info = (BufferInfo *)p_buffer.id; if (buf_info->vk_view) { - vkDestroyBufferView(vk_device, buf_info->vk_view, nullptr); + vkDestroyBufferView(vk_device, buf_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW)); } - vmaDestroyBuffer(allocator, buf_info->vk_buffer, buf_info->allocation.handle); + + vkDestroyBuffer(vk_device, buf_info->vk_buffer, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER)); + vmaFreeMemory(allocator, buf_info->allocation.handle); + VersatileResource::free(resources_allocator, buf_info); } @@ -1502,7 +1667,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat & VmaAllocationCreateInfo alloc_create_info = {}; alloc_create_info.flags = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT : 0; - alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; if (image_size <= SMALL_ALLOCATION_MAX_SIZE) { uint32_t mem_type_index = 0; vmaFindMemoryTypeIndexForImageInfo(allocator, &create_info, &alloc_create_info, &mem_type_index); @@ -1514,8 +1679,13 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat & VkImage vk_image = VK_NULL_HANDLE; VmaAllocation allocation = nullptr; VmaAllocationInfo alloc_info = {}; - VkResult err = vmaCreateImage(allocator, &create_info, &alloc_create_info, &vk_image, &allocation, &alloc_info); - ERR_FAIL_COND_V_MSG(err, TextureID(), "vmaCreateImage failed with error " + itos(err) + "."); + + VkResult err = vkCreateImage(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE), &vk_image); + ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImage failed with error " + itos(err) + "."); + err = vmaAllocateMemoryForImage(allocator, vk_image, &alloc_create_info, &allocation, &alloc_info); + ERR_FAIL_COND_V_MSG(err, TextureID(), "Can't allocate memory for image, error: " + itos(err) + "."); + err = vmaBindImageMemory2(allocator, allocation, 0, vk_image, NULL); + ERR_FAIL_COND_V_MSG(err, TextureID(), "Can't bind memory to image, error: " + itos(err) + "."); // Create view. @@ -1536,16 +1706,28 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat & image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } + VkImageViewASTCDecodeModeEXT decode_mode; + if (enabled_device_extension_names.has(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME)) { + if (image_view_create_info.format >= VK_FORMAT_ASTC_4x4_UNORM_BLOCK && image_view_create_info.format <= VK_FORMAT_ASTC_12x12_SRGB_BLOCK) { + decode_mode.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT; + decode_mode.pNext = nullptr; + decode_mode.decodeMode = VK_FORMAT_R8G8B8A8_UNORM; + image_view_create_info.pNext = &decode_mode; + } + } + VkImageView vk_image_view = VK_NULL_HANDLE; - err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &vk_image_view); + err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &vk_image_view); if (err) { - vmaDestroyImage(allocator, vk_image, allocation); + vkDestroyImage(vk_device, vk_image, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE)); + vmaFreeMemory(allocator, allocation); ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + "."); } // Bookkeep. TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator); + tex_info->vk_image = vk_image; tex_info->vk_view = vk_image_view; tex_info->rd_format = p_format.format; tex_info->vk_create_info = create_info; @@ -1579,7 +1761,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_from_extension(uint64 image_view_create_info.subresourceRange.aspectMask = p_depth_stencil ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; VkImageView vk_image_view = VK_NULL_HANDLE; - VkResult err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &vk_image_view); + VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &vk_image_view); if (err) { ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + "."); } @@ -1634,7 +1816,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or } VkImageView new_vk_image_view = VK_NULL_HANDLE; - VkResult err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &new_vk_image_view); + VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &new_vk_image_view); ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + "."); // Bookkeep. @@ -1687,7 +1869,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex image_view_create_info.subresourceRange.layerCount = p_layers; VkImageView new_vk_image_view = VK_NULL_HANDLE; - VkResult err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &new_vk_image_view); + VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &new_vk_image_view); ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + "."); // Bookkeep. @@ -1707,9 +1889,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex void RenderingDeviceDriverVulkan::texture_free(TextureID p_texture) { TextureInfo *tex_info = (TextureInfo *)p_texture.id; - vkDestroyImageView(vk_device, tex_info->vk_view, nullptr); + vkDestroyImageView(vk_device, tex_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW)); if (tex_info->allocation.handle) { - vmaDestroyImage(allocator, tex_info->vk_view_create_info.image, tex_info->allocation.handle); + vkDestroyImage(vk_device, tex_info->vk_image, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER)); + vmaFreeMemory(allocator, tex_info->allocation.handle); } VersatileResource::free(resources_allocator, tex_info); } @@ -1788,7 +1971,7 @@ uint8_t *RenderingDeviceDriverVulkan::texture_map(TextureID p_texture, const Tex void RenderingDeviceDriverVulkan::texture_unmap(TextureID p_texture) { const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; - vkUnmapMemory(vk_device, tex_info->allocation.info.deviceMemory); + vmaUnmapMemory(allocator, tex_info->allocation.handle); } BitField<RDD::TextureUsageBits> RenderingDeviceDriverVulkan::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { @@ -1869,14 +2052,14 @@ RDD::SamplerID RenderingDeviceDriverVulkan::sampler_create(const SamplerState &p sampler_create_info.unnormalizedCoordinates = p_state.unnormalized_uvw; VkSampler vk_sampler = VK_NULL_HANDLE; - VkResult res = vkCreateSampler(vk_device, &sampler_create_info, nullptr, &vk_sampler); + VkResult res = vkCreateSampler(vk_device, &sampler_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SAMPLER), &vk_sampler); ERR_FAIL_COND_V_MSG(res, SamplerID(), "vkCreateSampler failed with error " + itos(res) + "."); return SamplerID(vk_sampler); } void RenderingDeviceDriverVulkan::sampler_free(SamplerID p_sampler) { - vkDestroySampler(vk_device, (VkSampler)p_sampler.id, nullptr); + vkDestroySampler(vk_device, (VkSampler)p_sampler.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SAMPLER)); } bool RenderingDeviceDriverVulkan::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) { @@ -2051,7 +2234,7 @@ RDD::FenceID RenderingDeviceDriverVulkan::fence_create() { VkFence vk_fence = VK_NULL_HANDLE; VkFenceCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - VkResult err = vkCreateFence(vk_device, &create_info, nullptr, &vk_fence); + VkResult err = vkCreateFence(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FENCE), &vk_fence); ERR_FAIL_COND_V(err != VK_SUCCESS, FenceID()); Fence *fence = memnew(Fence); @@ -2062,10 +2245,13 @@ RDD::FenceID RenderingDeviceDriverVulkan::fence_create() { Error RenderingDeviceDriverVulkan::fence_wait(FenceID p_fence) { Fence *fence = (Fence *)(p_fence.id); - VkResult err = vkWaitForFences(vk_device, 1, &fence->vk_fence, VK_TRUE, UINT64_MAX); - ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); + VkResult fence_status = vkGetFenceStatus(vk_device, fence->vk_fence); + if (fence_status == VK_NOT_READY) { + VkResult err = vkWaitForFences(vk_device, 1, &fence->vk_fence, VK_TRUE, UINT64_MAX); + ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); + } - err = vkResetFences(vk_device, 1, &fence->vk_fence); + VkResult err = vkResetFences(vk_device, 1, &fence->vk_fence); ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); if (fence->queue_signaled_from != nullptr) { @@ -2090,7 +2276,7 @@ Error RenderingDeviceDriverVulkan::fence_wait(FenceID p_fence) { void RenderingDeviceDriverVulkan::fence_free(FenceID p_fence) { Fence *fence = (Fence *)(p_fence.id); - vkDestroyFence(vk_device, fence->vk_fence, nullptr); + vkDestroyFence(vk_device, fence->vk_fence, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FENCE)); memdelete(fence); } @@ -2102,14 +2288,14 @@ RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create() { VkSemaphore semaphore = VK_NULL_HANDLE; VkSemaphoreCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - VkResult err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + VkResult err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore); ERR_FAIL_COND_V(err != VK_SUCCESS, SemaphoreID()); return SemaphoreID(semaphore); } void RenderingDeviceDriverVulkan::semaphore_free(SemaphoreID p_semaphore) { - vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), nullptr); + vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE)); } /******************/ @@ -2236,7 +2422,7 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; for (uint32_t i = 0; i < frame_count; i++) { - err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore); ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); command_queue->present_semaphores.push_back(semaphore); } @@ -2263,6 +2449,11 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu device_queue.submit_mutex.lock(); err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence); device_queue.submit_mutex.unlock(); + + if (err == VK_ERROR_DEVICE_LOST) { + print_lost_device_info(); + CRASH_NOW_MSG("Vulkan device was lost."); + } ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED); if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) { @@ -2354,12 +2545,12 @@ void RenderingDeviceDriverVulkan::command_queue_free(CommandQueueID p_cmd_queue) // Erase all the semaphores used for presentation. for (VkSemaphore semaphore : command_queue->present_semaphores) { - vkDestroySemaphore(vk_device, semaphore, nullptr); + vkDestroySemaphore(vk_device, semaphore, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE)); } // Erase all the semaphores used for image acquisition. for (VkSemaphore semaphore : command_queue->image_semaphores) { - vkDestroySemaphore(vk_device, semaphore, nullptr); + vkDestroySemaphore(vk_device, semaphore, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE)); } // Retrieve the queue family corresponding to the virtual queue. @@ -2387,7 +2578,7 @@ RDD::CommandPoolID RenderingDeviceDriverVulkan::command_pool_create(CommandQueue cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VkCommandPool vk_command_pool = VK_NULL_HANDLE; - VkResult res = vkCreateCommandPool(vk_device, &cmd_pool_info, nullptr, &vk_command_pool); + VkResult res = vkCreateCommandPool(vk_device, &cmd_pool_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL), &vk_command_pool); ERR_FAIL_COND_V_MSG(res, CommandPoolID(), "vkCreateCommandPool failed with error " + itos(res) + "."); CommandPool *command_pool = memnew(CommandPool); @@ -2400,7 +2591,7 @@ void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) { DEV_ASSERT(p_cmd_pool); CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id); - vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, nullptr); + vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL)); memdelete(command_pool); } @@ -2480,7 +2671,7 @@ void RenderingDeviceDriverVulkan::_swap_chain_release(SwapChain *swap_chain) { } for (VkImageView view : swap_chain->image_views) { - vkDestroyImageView(vk_device, view, nullptr); + vkDestroyImageView(vk_device, view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW)); } swap_chain->image_index = UINT_MAX; @@ -2489,7 +2680,7 @@ void RenderingDeviceDriverVulkan::_swap_chain_release(SwapChain *swap_chain) { swap_chain->framebuffers.clear(); if (swap_chain->vk_swapchain != VK_NULL_HANDLE) { - device_functions.DestroySwapchainKHR(vk_device, swap_chain->vk_swapchain, nullptr); + device_functions.DestroySwapchainKHR(vk_device, swap_chain->vk_swapchain, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SWAPCHAIN_KHR)); swap_chain->vk_swapchain = VK_NULL_HANDLE; } @@ -2571,7 +2762,7 @@ RenderingDeviceDriver::SwapChainID RenderingDeviceDriverVulkan::swap_chain_creat pass_info.pSubpasses = &subpass; VkRenderPass render_pass = VK_NULL_HANDLE; - err = _create_render_pass(vk_device, &pass_info, nullptr, &render_pass); + err = _create_render_pass(vk_device, &pass_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &render_pass); ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID()); SwapChain *swap_chain = memnew(SwapChain); @@ -2714,7 +2905,7 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, swap_create_info.compositeAlpha = composite_alpha; swap_create_info.presentMode = present_mode; swap_create_info.clipped = true; - err = device_functions.CreateSwapchainKHR(vk_device, &swap_create_info, nullptr, &swap_chain->vk_swapchain); + err = device_functions.CreateSwapchainKHR(vk_device, &swap_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SWAPCHAIN_KHR), &swap_chain->vk_swapchain); ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); uint32_t image_count = 0; @@ -2742,7 +2933,7 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, VkImageView image_view; for (uint32_t i = 0; i < image_count; i++) { view_create_info.image = swap_chain->images[i]; - err = vkCreateImageView(vk_device, &view_create_info, nullptr, &image_view); + err = vkCreateImageView(vk_device, &view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &image_view); ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); swap_chain->image_views.push_back(image_view); @@ -2761,7 +2952,7 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, VkFramebuffer framebuffer; for (uint32_t i = 0; i < image_count; i++) { fb_create_info.pAttachments = &swap_chain->image_views[i]; - err = vkCreateFramebuffer(vk_device, &fb_create_info, nullptr, &framebuffer); + err = vkCreateFramebuffer(vk_device, &fb_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &framebuffer); ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); swap_chain->framebuffers.push_back(RDD::FramebufferID(framebuffer)); @@ -2792,7 +2983,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::swap_chain_acquire_framebuffer(C // Add a new semaphore if none are free. VkSemaphoreCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore); + err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore); ERR_FAIL_COND_V(err != VK_SUCCESS, FramebufferID()); semaphore_index = command_queue->image_semaphores.size(); @@ -2864,7 +3055,7 @@ void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) { _swap_chain_release(swap_chain); if (swap_chain->render_pass.id != 0) { - vkDestroyRenderPass(vk_device, VkRenderPass(swap_chain->render_pass.id), nullptr); + vkDestroyRenderPass(vk_device, VkRenderPass(swap_chain->render_pass.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); } memdelete(swap_chain); @@ -2890,7 +3081,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID framebuffer_create_info.layers = 1; VkFramebuffer vk_framebuffer = VK_NULL_HANDLE; - VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, nullptr, &vk_framebuffer); + VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &vk_framebuffer); ERR_FAIL_COND_V_MSG(err, FramebufferID(), "vkCreateFramebuffer failed with error " + itos(err) + "."); #if PRINT_NATIVE_COMMANDS @@ -2905,7 +3096,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID } void RenderingDeviceDriverVulkan::framebuffer_free(FramebufferID p_framebuffer) { - vkDestroyFramebuffer(vk_device, (VkFramebuffer)p_framebuffer.id, nullptr); + vkDestroyFramebuffer(vk_device, (VkFramebuffer)p_framebuffer.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER)); } /****************/ @@ -3282,7 +3473,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec shader_module_create_info.pCode = (const uint32_t *)stages_spirv[i].ptr(); VkShaderModule vk_module = VK_NULL_HANDLE; - VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, nullptr, &vk_module); + VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module); if (res) { error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[r_shader_desc.stages[i]]); break; @@ -3309,7 +3500,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec layout_create_info.pBindings = vk_set_bindings[i].ptr(); VkDescriptorSetLayout layout = VK_NULL_HANDLE; - VkResult res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, nullptr, &layout); + VkResult res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout); if (res) { error_text = "Error (" + itos(res) + ") creating descriptor set layout for set " + itos(i); break; @@ -3336,7 +3527,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec pipeline_layout_create_info.pPushConstantRanges = push_constant_range; } - VkResult err = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, nullptr, &shader_info.vk_pipeline_layout); + VkResult err = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout); if (err) { error_text = "Error (" + itos(err) + ") creating pipeline layout."; } @@ -3345,10 +3536,10 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec if (!error_text.is_empty()) { // Clean up if failed. for (uint32_t i = 0; i < shader_info.vk_stages_create_info.size(); i++) { - vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, nullptr); + vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE)); } for (uint32_t i = 0; i < binary_data.set_count; i++) { - vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], nullptr); + vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)); } ERR_FAIL_V_MSG(ShaderID(), error_text); @@ -3365,18 +3556,29 @@ void RenderingDeviceDriverVulkan::shader_free(ShaderID p_shader) { ShaderInfo *shader_info = (ShaderInfo *)p_shader.id; for (uint32_t i = 0; i < shader_info->vk_descriptor_set_layouts.size(); i++) { - vkDestroyDescriptorSetLayout(vk_device, shader_info->vk_descriptor_set_layouts[i], nullptr); + vkDestroyDescriptorSetLayout(vk_device, shader_info->vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)); } - vkDestroyPipelineLayout(vk_device, shader_info->vk_pipeline_layout, nullptr); + vkDestroyPipelineLayout(vk_device, shader_info->vk_pipeline_layout, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT)); - for (uint32_t i = 0; i < shader_info->vk_stages_create_info.size(); i++) { - vkDestroyShaderModule(vk_device, shader_info->vk_stages_create_info[i].module, nullptr); - } + shader_destroy_modules(p_shader); VersatileResource::free(resources_allocator, shader_info); } +void RenderingDeviceDriverVulkan::shader_destroy_modules(ShaderID p_shader) { + ShaderInfo *si = (ShaderInfo *)p_shader.id; + + for (uint32_t i = 0; i < si->vk_stages_create_info.size(); i++) { + if (si->vk_stages_create_info[i].module) { + vkDestroyShaderModule(vk_device, si->vk_stages_create_info[i].module, + VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE)); + si->vk_stages_create_info[i].module = VK_NULL_HANDLE; + } + } + si->vk_stages_create_info.clear(); +} + /*********************/ /**** UNIFORM SET ****/ /*********************/ @@ -3474,7 +3676,7 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat descriptor_set_pool_create_info.pPoolSizes = vk_sizes; VkDescriptorPool vk_pool = VK_NULL_HANDLE; - VkResult res = vkCreateDescriptorPool(vk_device, &descriptor_set_pool_create_info, nullptr, &vk_pool); + VkResult res = vkCreateDescriptorPool(vk_device, &descriptor_set_pool_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL), &vk_pool); if (res) { ERR_FAIL_COND_V_MSG(res, VK_NULL_HANDLE, "vkCreateDescriptorPool failed with error " + itos(res) + "."); } @@ -3494,7 +3696,7 @@ void RenderingDeviceDriverVulkan::_descriptor_set_pool_unreference(DescriptorSet HashMap<VkDescriptorPool, uint32_t>::Iterator pool_rcs_it = p_pool_sets_it->value.find(p_vk_descriptor_pool); pool_rcs_it->value--; if (pool_rcs_it->value == 0) { - vkDestroyDescriptorPool(vk_device, p_vk_descriptor_pool, nullptr); + vkDestroyDescriptorPool(vk_device, p_vk_descriptor_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL)); p_pool_sets_it->value.erase(p_vk_descriptor_pool); if (p_pool_sets_it->value.is_empty()) { descriptor_set_pools.remove(p_pool_sets_it); @@ -3839,7 +4041,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID /******************/ void RenderingDeviceDriverVulkan::pipeline_free(PipelineID p_pipeline) { - vkDestroyPipeline(vk_device, (VkPipeline)p_pipeline.id, nullptr); + vkDestroyPipeline(vk_device, (VkPipeline)p_pipeline.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE)); } // ----- BINDING ----- @@ -3904,7 +4106,7 @@ bool RenderingDeviceDriverVulkan::pipeline_cache_create(const Vector<uint8_t> &p cache_info.flags = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; } - VkResult err = vkCreatePipelineCache(vk_device, &cache_info, nullptr, &pipelines_cache.vk_cache); + VkResult err = vkCreatePipelineCache(vk_device, &cache_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_CACHE), &pipelines_cache.vk_cache); if (err != VK_SUCCESS) { WARN_PRINT("vkCreatePipelinecache failed with error " + itos(err) + "."); return false; @@ -3917,7 +4119,7 @@ bool RenderingDeviceDriverVulkan::pipeline_cache_create(const Vector<uint8_t> &p void RenderingDeviceDriverVulkan::pipeline_cache_free() { DEV_ASSERT(pipelines_cache.vk_cache); - vkDestroyPipelineCache(vk_device, pipelines_cache.vk_cache, nullptr); + vkDestroyPipelineCache(vk_device, pipelines_cache.vk_cache, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_CACHE)); pipelines_cache.vk_cache = VK_NULL_HANDLE; DEV_ASSERT(caching_instance_count > 0); @@ -4101,14 +4303,14 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att } VkRenderPass vk_render_pass = VK_NULL_HANDLE; - VkResult res = _create_render_pass(vk_device, &create_info, nullptr, &vk_render_pass); + VkResult res = _create_render_pass(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass); ERR_FAIL_COND_V_MSG(res, RenderPassID(), "vkCreateRenderPass2KHR failed with error " + itos(res) + "."); return RenderPassID(vk_render_pass); } void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) { - vkDestroyRenderPass(vk_device, (VkRenderPass)p_render_pass.id, nullptr); + vkDestroyRenderPass(vk_device, (VkRenderPass)p_render_pass.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); } // ----- COMMANDS ----- @@ -4550,6 +4752,8 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( pipeline_create_info.pNext = graphics_pipeline_nextptr; pipeline_create_info.stageCount = shader_info->vk_stages_create_info.size(); + ERR_FAIL_COND_V_MSG(pipeline_create_info.stageCount == 0, PipelineID(), + "Cannot create pipeline without shader module, please make sure shader modules are destroyed only after all associated pipelines are created."); VkPipelineShaderStageCreateInfo *vk_pipeline_stages = ALLOCA_ARRAY(VkPipelineShaderStageCreateInfo, shader_info->vk_stages_create_info.size()); for (uint32_t i = 0; i < shader_info->vk_stages_create_info.size(); i++) { @@ -4592,7 +4796,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( // --- VkPipeline vk_pipeline = VK_NULL_HANDLE; - VkResult err = vkCreateGraphicsPipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &vk_pipeline); + VkResult err = vkCreateGraphicsPipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE), &vk_pipeline); ERR_FAIL_COND_V_MSG(err, PipelineID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + "."); return PipelineID(vk_pipeline); @@ -4653,7 +4857,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::compute_pipeline_create(ShaderID p_ } VkPipeline vk_pipeline = VK_NULL_HANDLE; - VkResult err = vkCreateComputePipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &vk_pipeline); + VkResult err = vkCreateComputePipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE), &vk_pipeline); ERR_FAIL_COND_V_MSG(err, PipelineID(), "vkCreateComputePipelines failed with error " + itos(err) + "."); return PipelineID(vk_pipeline); @@ -4672,12 +4876,12 @@ RDD::QueryPoolID RenderingDeviceDriverVulkan::timestamp_query_pool_create(uint32 query_pool_create_info.queryCount = p_query_count; VkQueryPool vk_query_pool = VK_NULL_HANDLE; - vkCreateQueryPool(vk_device, &query_pool_create_info, nullptr, &vk_query_pool); + vkCreateQueryPool(vk_device, &query_pool_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_QUERY_POOL), &vk_query_pool); return RDD::QueryPoolID(vk_query_pool); } void RenderingDeviceDriverVulkan::timestamp_query_pool_free(QueryPoolID p_pool_id) { - vkDestroyQueryPool(vk_device, (VkQueryPool)p_pool_id.id, nullptr); + vkDestroyQueryPool(vk_device, (VkQueryPool)p_pool_id.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_QUERY_POOL)); } void RenderingDeviceDriverVulkan::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) { @@ -4732,6 +4936,21 @@ void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + if (!functions.CmdBeginDebugUtilsLabelEXT) { + if (functions.CmdDebugMarkerBeginEXT) { + // Debug marker extensions. + VkDebugMarkerMarkerInfoEXT marker; + marker.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + marker.pNext = nullptr; + marker.pMarkerName = p_label_name; + marker.color[0] = p_color[0]; + marker.color[1] = p_color[1]; + marker.color[2] = p_color[2]; + marker.color[3] = p_color[3]; + functions.CmdDebugMarkerBeginEXT((VkCommandBuffer)p_cmd_buffer.id, &marker); + } + return; + } VkDebugUtilsLabelEXT label; label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; label.pNext = nullptr; @@ -4745,9 +4964,167 @@ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buff void RenderingDeviceDriverVulkan::command_end_label(CommandBufferID p_cmd_buffer) { const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); + if (!functions.CmdEndDebugUtilsLabelEXT) { + if (functions.CmdDebugMarkerEndEXT) { + // Debug marker extensions. + functions.CmdDebugMarkerEndEXT((VkCommandBuffer)p_cmd_buffer.id); + } + return; + } functions.CmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id); } +/****************/ +/**** DEBUG *****/ +/****************/ +void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) { + if (p_data == BreadcrumbMarker::NONE) { + return; + } + vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, 0, sizeof(uint32_t), p_data); +} + +void RenderingDeviceDriverVulkan::on_device_lost() const { + if (device_functions.GetDeviceFaultInfoEXT == nullptr) { + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "VK_EXT_device_fault not available."); + return; + } + + VkDeviceFaultCountsEXT fault_counts = {}; + fault_counts.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT; + VkResult vkres = device_functions.GetDeviceFaultInfoEXT(vk_device, &fault_counts, nullptr); + + if (vkres != VK_SUCCESS) { + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "vkGetDeviceFaultInfoEXT returned " + itos(vkres) + " when getting fault count, skipping VK_EXT_device_fault report..."); + return; + } + + String err_msg; + VkDeviceFaultInfoEXT fault_info = {}; + fault_info.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT; + fault_info.pVendorInfos = fault_counts.vendorInfoCount + ? (VkDeviceFaultVendorInfoEXT *)memalloc(fault_counts.vendorInfoCount * sizeof(VkDeviceFaultVendorInfoEXT)) + : nullptr; + fault_info.pAddressInfos = + fault_counts.addressInfoCount + ? (VkDeviceFaultAddressInfoEXT *)memalloc(fault_counts.addressInfoCount * sizeof(VkDeviceFaultAddressInfoEXT)) + : nullptr; + fault_counts.vendorBinarySize = 0; + vkres = device_functions.GetDeviceFaultInfoEXT(vk_device, &fault_counts, &fault_info); + if (vkres != VK_SUCCESS) { + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "vkGetDeviceFaultInfoEXT returned " + itos(vkres) + " when getting fault info, skipping VK_EXT_device_fault report..."); + } else { + err_msg += "** Report from VK_EXT_device_fault **"; + err_msg += "\nDescription: " + String(fault_info.description); + err_msg += "\nVendor infos:"; + for (uint32_t vd = 0; vd < fault_counts.vendorInfoCount; ++vd) { + const VkDeviceFaultVendorInfoEXT *vendor_info = &fault_info.pVendorInfos[vd]; + err_msg += "\nInfo " + itos(vd); + err_msg += "\n Description: " + String(vendor_info->description); + err_msg += "\n Fault code : " + itos(vendor_info->vendorFaultCode); + err_msg += "\n Fault data : " + itos(vendor_info->vendorFaultData); + } + + static constexpr const char *addressTypeNames[] = { + "NONE", + "READ_INVALID", + "WRITE_INVALID", + "EXECUTE_INVALID", + "INSTRUCTION_POINTER_UNKNOWN", + "INSTRUCTION_POINTER_INVALID", + "INSTRUCTION_POINTER_FAULT", + }; + err_msg += "\nAddresses info:"; + for (uint32_t ad = 0; ad < fault_counts.addressInfoCount; ++ad) { + const VkDeviceFaultAddressInfoEXT *addr_info = &fault_info.pAddressInfos[ad]; + // From https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDeviceFaultAddressInfoEXT.html + const VkDeviceAddress lower = (addr_info->reportedAddress & ~(addr_info->addressPrecision - 1)); + const VkDeviceAddress upper = (addr_info->reportedAddress | (addr_info->addressPrecision - 1)); + err_msg += "\nInfo " + itos(ad); + err_msg += "\n Type : " + String(addressTypeNames[addr_info->addressType]); + err_msg += "\n Reported address: " + itos(addr_info->reportedAddress); + err_msg += "\n Lower address : " + itos(lower); + err_msg += "\n Upper address : " + itos(upper); + err_msg += "\n Precision : " + itos(addr_info->addressPrecision); + } + } + + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, err_msg); + + if (fault_info.pVendorInfos) { + memfree(fault_info.pVendorInfos); + } + if (fault_info.pAddressInfos) { + memfree(fault_info.pAddressInfos); + } + + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, context_driver->get_driver_and_device_memory_report()); +} + +void RenderingDeviceDriverVulkan::print_lost_device_info() { +#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) + void *breadcrumb_ptr; + vmaFlushAllocation(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, 0, sizeof(uint32_t)); + vmaInvalidateAllocation(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, 0, sizeof(uint32_t)); + + vmaMapMemory(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, &breadcrumb_ptr); + uint32_t last_breadcrumb = *(uint32_t *)breadcrumb_ptr; + vmaUnmapMemory(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle); + uint32_t phase = last_breadcrumb & uint32_t(~((1 << 16) - 1)); + uint32_t user_data = last_breadcrumb & ((1 << 16) - 1); + String error_msg = "Last known breadcrumb: "; + + switch (phase) { + case BreadcrumbMarker::ALPHA_PASS: + error_msg += "ALPHA_PASS"; + break; + case BreadcrumbMarker::BLIT_PASS: + error_msg += "BLIT_PASS"; + break; + case BreadcrumbMarker::DEBUG_PASS: + error_msg += "DEBUG_PASS"; + break; + case BreadcrumbMarker::LIGHTMAPPER_PASS: + error_msg += "LIGHTMAPPER_PASS"; + break; + case BreadcrumbMarker::OPAQUE_PASS: + error_msg += "OPAQUE_PASS"; + break; + case BreadcrumbMarker::POST_PROCESSING_PASS: + error_msg += "POST_PROCESSING_PASS"; + break; + case BreadcrumbMarker::REFLECTION_PROBES: + error_msg += "REFLECTION_PROBES"; + break; + case BreadcrumbMarker::SHADOW_PASS_CUBE: + error_msg += "SHADOW_PASS_CUBE"; + break; + case BreadcrumbMarker::SHADOW_PASS_DIRECTIONAL: + error_msg += "SHADOW_PASS_DIRECTIONAL"; + break; + case BreadcrumbMarker::SKY_PASS: + error_msg += "SKY_PASS"; + break; + case BreadcrumbMarker::TRANSPARENT_PASS: + error_msg += "TRANSPARENT_PASS"; + break; + case BreadcrumbMarker::UI_PASS: + error_msg += "UI_PASS"; + break; + default: + error_msg += "UNKNOWN_BREADCRUMB(" + itos((uint32_t)phase) + ')'; + break; + } + + if (user_data != 0) { + error_msg += " | User data: " + itos(user_data); + } + + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, error_msg); +#endif + on_device_lost(); +} + /********************/ /**** SUBMISSION ****/ /********************/ @@ -5010,9 +5387,12 @@ RenderingDeviceDriverVulkan::RenderingDeviceDriverVulkan(RenderingContextDriverV DEV_ASSERT(p_context_driver != nullptr); context_driver = p_context_driver; + max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool"); } RenderingDeviceDriverVulkan::~RenderingDeviceDriverVulkan() { + buffer_free(breadcrumb_buffer); + while (small_allocs_pools.size()) { HashMap<uint32_t, VmaPool>::Iterator E = small_allocs_pools.begin(); vmaDestroyPool(allocator, E->value); @@ -5021,6 +5401,6 @@ RenderingDeviceDriverVulkan::~RenderingDeviceDriverVulkan() { vmaDestroyAllocator(allocator); if (vk_device != VK_NULL_HANDLE) { - vkDestroyDevice(vk_device, nullptr); + vkDestroyDevice(vk_device, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DEVICE)); } } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 6847ae00be..2615d9824d 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -111,7 +111,18 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { PFN_vkAcquireNextImageKHR AcquireNextImageKHR = nullptr; PFN_vkQueuePresentKHR QueuePresentKHR = nullptr; PFN_vkCreateRenderPass2KHR CreateRenderPass2KHR = nullptr; + + // Debug marker extensions. + PFN_vkCmdDebugMarkerBeginEXT CmdDebugMarkerBeginEXT = nullptr; + PFN_vkCmdDebugMarkerEndEXT CmdDebugMarkerEndEXT = nullptr; + PFN_vkCmdDebugMarkerInsertEXT CmdDebugMarkerInsertEXT = nullptr; + PFN_vkDebugMarkerSetObjectNameEXT DebugMarkerSetObjectNameEXT = nullptr; + + // Debug device fault. + PFN_vkGetDeviceFaultInfoEXT GetDeviceFaultInfoEXT = nullptr; }; + // Debug marker extensions. + VkDebugReportObjectTypeEXT _convert_to_debug_report_objectType(VkObjectType p_object_type); VkDevice vk_device = VK_NULL_HANDLE; RenderingContextDriverVulkan *context_driver = nullptr; @@ -132,6 +143,10 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; bool pipeline_cache_control_support = false; + bool device_fault_support = false; +#if defined(VK_TRACK_DEVICE_MEMORY) + bool device_memory_report_support = false; +#endif DeviceFunctions device_functions; void _register_requested_device_extension(const CharString &p_extension_name, bool p_required); @@ -160,10 +175,13 @@ private: VmaPool _find_or_create_small_allocs_pool(uint32_t p_mem_type_index); +private: + BufferID breadcrumb_buffer; + +public: /*****************/ /**** BUFFERS ****/ /*****************/ -private: struct BufferInfo { VkBuffer vk_buffer = VK_NULL_HANDLE; struct { @@ -174,7 +192,6 @@ private: VkBufferView vk_view = VK_NULL_HANDLE; // For texel buffers. }; -public: virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final; virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final; virtual void buffer_free(BufferID p_buffer) override final; @@ -187,6 +204,7 @@ public: /*****************/ struct TextureInfo { + VkImage vk_image = VK_NULL_HANDLE; VkImageView vk_view = VK_NULL_HANDLE; DataFormat rd_format = DATA_FORMAT_MAX; VkImageCreateInfo vk_create_info = {}; @@ -405,6 +423,7 @@ public: virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final; virtual void shader_free(ShaderID p_shader) override final; + virtual void shader_destroy_modules(ShaderID p_shader) override final; /*********************/ /**** UNIFORM SET ****/ /*********************/ @@ -606,6 +625,13 @@ public: virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final; virtual void command_end_label(CommandBufferID p_cmd_buffer) override final; + /****************/ + /**** DEBUG *****/ + /****************/ + virtual void command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) override final; + void print_lost_device_info(); + void on_device_lost() const; + /********************/ /**** SUBMISSION ****/ /********************/ @@ -620,6 +646,7 @@ public: virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final; virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final; virtual uint64_t get_total_memory_used() override final; + virtual uint64_t limit_get(Limit p_limit) override final; virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; @@ -651,4 +678,6 @@ public: virtual ~RenderingDeviceDriverVulkan(); }; +using VKC = RenderingContextDriverVulkan; + #endif // RENDERING_DEVICE_DRIVER_VULKAN_H diff --git a/drivers/windows/dir_access_windows.cpp b/drivers/windows/dir_access_windows.cpp index 63ba6a6c96..f7632842ed 100644 --- a/drivers/windows/dir_access_windows.cpp +++ b/drivers/windows/dir_access_windows.cpp @@ -35,6 +35,7 @@ #include "core/config/project_settings.h" #include "core/os/memory.h" +#include "core/os/os.h" #include "core/string/print_string.h" #include <stdio.h> @@ -69,9 +70,19 @@ struct DirAccessWindowsPrivate { }; String DirAccessWindows::fix_path(const String &p_path) const { - String r_path = DirAccess::fix_path(p_path); - if (r_path.is_absolute_path() && !r_path.is_network_share_path() && r_path.length() > MAX_PATH) { - r_path = "\\\\?\\" + r_path.replace("/", "\\"); + String r_path = DirAccess::fix_path(p_path.trim_prefix(R"(\\?\)").replace("\\", "/")); + if (r_path.ends_with(":")) { + r_path += "/"; + } + if (r_path.is_relative_path()) { + r_path = current_dir.trim_prefix(R"(\\?\)").replace("\\", "/").path_join(r_path); + } else if (r_path == ".") { + r_path = current_dir.trim_prefix(R"(\\?\)").replace("\\", "/"); + } + r_path = r_path.simplify_path(); + r_path = r_path.replace("/", "\\"); + if (!r_path.is_network_share_path() && !r_path.begins_with(R"(\\?\)")) { + r_path = R"(\\?\)" + r_path; } return r_path; } @@ -140,28 +151,33 @@ String DirAccessWindows::get_drive(int p_drive) { Error DirAccessWindows::change_dir(String p_dir) { GLOBAL_LOCK_FUNCTION - p_dir = fix_path(p_dir); + String dir = fix_path(p_dir); - WCHAR real_current_dir_name[2048]; - GetCurrentDirectoryW(2048, real_current_dir_name); - String prev_dir = String::utf16((const char16_t *)real_current_dir_name); + Char16String real_current_dir_name; + size_t str_len = GetCurrentDirectoryW(0, nullptr); + real_current_dir_name.resize(str_len + 1); + GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw()); + String prev_dir = String::utf16((const char16_t *)real_current_dir_name.get_data()); SetCurrentDirectoryW((LPCWSTR)(current_dir.utf16().get_data())); - bool worked = (SetCurrentDirectoryW((LPCWSTR)(p_dir.utf16().get_data())) != 0); + bool worked = (SetCurrentDirectoryW((LPCWSTR)(dir.utf16().get_data())) != 0); String base = _get_root_path(); if (!base.is_empty()) { - GetCurrentDirectoryW(2048, real_current_dir_name); - String new_dir = String::utf16((const char16_t *)real_current_dir_name).replace("\\", "/"); + str_len = GetCurrentDirectoryW(0, nullptr); + real_current_dir_name.resize(str_len + 1); + GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw()); + String new_dir = String::utf16((const char16_t *)real_current_dir_name.get_data()).trim_prefix(R"(\\?\)").replace("\\", "/"); if (!new_dir.begins_with(base)) { worked = false; } } if (worked) { - GetCurrentDirectoryW(2048, real_current_dir_name); - current_dir = String::utf16((const char16_t *)real_current_dir_name); - current_dir = current_dir.replace("\\", "/"); + str_len = GetCurrentDirectoryW(0, nullptr); + real_current_dir_name.resize(str_len + 1); + GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw()); + current_dir = String::utf16((const char16_t *)real_current_dir_name.get_data()); } SetCurrentDirectoryW((LPCWSTR)(prev_dir.utf16().get_data())); @@ -172,12 +188,6 @@ Error DirAccessWindows::change_dir(String p_dir) { Error DirAccessWindows::make_dir(String p_dir) { GLOBAL_LOCK_FUNCTION - p_dir = fix_path(p_dir); - if (p_dir.is_relative_path()) { - p_dir = current_dir.path_join(p_dir); - p_dir = fix_path(p_dir); - } - if (FileAccessWindows::is_path_invalid(p_dir)) { #ifdef DEBUG_ENABLED WARN_PRINT("The path :" + p_dir + " is a reserved Windows system pipe, so it can't be used for creating directories."); @@ -185,12 +195,12 @@ Error DirAccessWindows::make_dir(String p_dir) { return ERR_INVALID_PARAMETER; } - p_dir = p_dir.simplify_path().replace("/", "\\"); + String dir = fix_path(p_dir); bool success; int err; - success = CreateDirectoryW((LPCWSTR)(p_dir.utf16().get_data()), nullptr); + success = CreateDirectoryW((LPCWSTR)(dir.utf16().get_data()), nullptr); err = GetLastError(); if (success) { @@ -205,9 +215,10 @@ Error DirAccessWindows::make_dir(String p_dir) { } String DirAccessWindows::get_current_dir(bool p_include_drive) const { + String cdir = current_dir.trim_prefix(R"(\\?\)").replace("\\", "/"); String base = _get_root_path(); if (!base.is_empty()) { - String bd = current_dir.replace("\\", "/").replace_first(base, ""); + String bd = cdir.replace_first(base, ""); if (bd.begins_with("/")) { return _get_root_string() + bd.substr(1, bd.length()); } else { @@ -216,30 +227,25 @@ String DirAccessWindows::get_current_dir(bool p_include_drive) const { } if (p_include_drive) { - return current_dir; + return cdir; } else { if (_get_root_string().is_empty()) { - int pos = current_dir.find(":"); + int pos = cdir.find(":"); if (pos != -1) { - return current_dir.substr(pos + 1); + return cdir.substr(pos + 1); } } - return current_dir; + return cdir; } } bool DirAccessWindows::file_exists(String p_file) { GLOBAL_LOCK_FUNCTION - if (!p_file.is_absolute_path()) { - p_file = get_current_dir().path_join(p_file); - } - - p_file = fix_path(p_file); + String file = fix_path(p_file); DWORD fileAttr; - - fileAttr = GetFileAttributesW((LPCWSTR)(p_file.utf16().get_data())); + fileAttr = GetFileAttributesW((LPCWSTR)(file.utf16().get_data())); if (INVALID_FILE_ATTRIBUTES == fileAttr) { return false; } @@ -250,14 +256,10 @@ bool DirAccessWindows::file_exists(String p_file) { bool DirAccessWindows::dir_exists(String p_dir) { GLOBAL_LOCK_FUNCTION - if (p_dir.is_relative_path()) { - p_dir = get_current_dir().path_join(p_dir); - } - - p_dir = fix_path(p_dir); + String dir = fix_path(p_dir); DWORD fileAttr; - fileAttr = GetFileAttributesW((LPCWSTR)(p_dir.utf16().get_data())); + fileAttr = GetFileAttributesW((LPCWSTR)(dir.utf16().get_data())); if (INVALID_FILE_ATTRIBUTES == fileAttr) { return false; } @@ -265,66 +267,63 @@ bool DirAccessWindows::dir_exists(String p_dir) { } Error DirAccessWindows::rename(String p_path, String p_new_path) { - if (p_path.is_relative_path()) { - p_path = get_current_dir().path_join(p_path); - } - - p_path = fix_path(p_path); - - if (p_new_path.is_relative_path()) { - p_new_path = get_current_dir().path_join(p_new_path); - } - - p_new_path = fix_path(p_new_path); + String path = fix_path(p_path); + String new_path = fix_path(p_new_path); // If we're only changing file name case we need to do a little juggling - if (p_path.to_lower() == p_new_path.to_lower()) { - if (dir_exists(p_path)) { + if (path.to_lower() == new_path.to_lower()) { + if (dir_exists(path)) { // The path is a dir; just rename - return ::_wrename((LPCWSTR)(p_path.utf16().get_data()), (LPCWSTR)(p_new_path.utf16().get_data())) == 0 ? OK : FAILED; + return MoveFileW((LPCWSTR)(path.utf16().get_data()), (LPCWSTR)(new_path.utf16().get_data())) != 0 ? OK : FAILED; } // The path is a file; juggle - WCHAR tmpfile[MAX_PATH]; - - if (!GetTempFileNameW((LPCWSTR)(fix_path(get_current_dir()).utf16().get_data()), nullptr, 0, tmpfile)) { - return FAILED; + // Note: do not use GetTempFileNameW, it's not long path aware! + Char16String tmpfile_utf16; + uint64_t id = OS::get_singleton()->get_ticks_usec(); + while (true) { + tmpfile_utf16 = (path + itos(id++) + ".tmp").utf16(); + HANDLE handle = CreateFileW((LPCWSTR)tmpfile_utf16.get_data(), GENERIC_WRITE, 0, NULL, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0); + if (handle != INVALID_HANDLE_VALUE) { + CloseHandle(handle); + break; + } + if (GetLastError() != ERROR_FILE_EXISTS && GetLastError() != ERROR_SHARING_VIOLATION) { + return FAILED; + } } - if (!::ReplaceFileW(tmpfile, (LPCWSTR)(p_path.utf16().get_data()), nullptr, 0, nullptr, nullptr)) { - DeleteFileW(tmpfile); + if (!::ReplaceFileW((LPCWSTR)tmpfile_utf16.get_data(), (LPCWSTR)(path.utf16().get_data()), nullptr, 0, nullptr, nullptr)) { + DeleteFileW((LPCWSTR)tmpfile_utf16.get_data()); return FAILED; } - return ::_wrename(tmpfile, (LPCWSTR)(p_new_path.utf16().get_data())) == 0 ? OK : FAILED; + return MoveFileW((LPCWSTR)tmpfile_utf16.get_data(), (LPCWSTR)(new_path.utf16().get_data())) != 0 ? OK : FAILED; } else { - if (file_exists(p_new_path)) { - if (remove(p_new_path) != OK) { + if (file_exists(new_path)) { + if (remove(new_path) != OK) { return FAILED; } } - return ::_wrename((LPCWSTR)(p_path.utf16().get_data()), (LPCWSTR)(p_new_path.utf16().get_data())) == 0 ? OK : FAILED; + return MoveFileW((LPCWSTR)(path.utf16().get_data()), (LPCWSTR)(new_path.utf16().get_data())) != 0 ? OK : FAILED; } } Error DirAccessWindows::remove(String p_path) { - if (p_path.is_relative_path()) { - p_path = get_current_dir().path_join(p_path); - } - - p_path = fix_path(p_path); + String path = fix_path(p_path); + const Char16String &path_utf16 = path.utf16(); DWORD fileAttr; - fileAttr = GetFileAttributesW((LPCWSTR)(p_path.utf16().get_data())); + fileAttr = GetFileAttributesW((LPCWSTR)(path_utf16.get_data())); if (INVALID_FILE_ATTRIBUTES == fileAttr) { return FAILED; } if ((fileAttr & FILE_ATTRIBUTE_DIRECTORY)) { - return ::_wrmdir((LPCWSTR)(p_path.utf16().get_data())) == 0 ? OK : FAILED; + return RemoveDirectoryW((LPCWSTR)(path_utf16.get_data())) != 0 ? OK : FAILED; } else { - return ::_wunlink((LPCWSTR)(p_path.utf16().get_data())) == 0 ? OK : FAILED; + return DeleteFileW((LPCWSTR)(path_utf16.get_data())) != 0 ? OK : FAILED; } } @@ -339,16 +338,16 @@ uint64_t DirAccessWindows::get_space_left() { } String DirAccessWindows::get_filesystem_type() const { - String path = fix_path(const_cast<DirAccessWindows *>(this)->get_current_dir()); - - int unit_end = path.find(":"); - ERR_FAIL_COND_V(unit_end == -1, String()); - String unit = path.substr(0, unit_end + 1) + "\\"; + String path = current_dir.trim_prefix(R"(\\?\)"); if (path.is_network_share_path()) { return "Network Share"; } + int unit_end = path.find(":"); + ERR_FAIL_COND_V(unit_end == -1, String()); + String unit = path.substr(0, unit_end + 1) + "\\"; + WCHAR szVolumeName[100]; WCHAR szFileSystemName[10]; DWORD dwSerialNumber = 0; @@ -370,11 +369,7 @@ String DirAccessWindows::get_filesystem_type() const { } bool DirAccessWindows::is_case_sensitive(const String &p_path) const { - String f = p_path; - if (!f.is_absolute_path()) { - f = get_current_dir().path_join(f); - } - f = fix_path(f); + String f = fix_path(p_path); HANDLE h_file = ::CreateFileW((LPCWSTR)(f.utf16().get_data()), 0, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, @@ -397,12 +392,7 @@ bool DirAccessWindows::is_case_sensitive(const String &p_path) const { } bool DirAccessWindows::is_link(String p_file) { - String f = p_file; - - if (!f.is_absolute_path()) { - f = get_current_dir().path_join(f); - } - f = fix_path(f); + String f = fix_path(p_file); DWORD attr = GetFileAttributesW((LPCWSTR)(f.utf16().get_data())); if (attr == INVALID_FILE_ATTRIBUTES) { @@ -413,12 +403,7 @@ bool DirAccessWindows::is_link(String p_file) { } String DirAccessWindows::read_link(String p_file) { - String f = p_file; - - if (!f.is_absolute_path()) { - f = get_current_dir().path_join(f); - } - f = fix_path(f); + String f = fix_path(p_file); HANDLE hfile = CreateFileW((LPCWSTR)(f.utf16().get_data()), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, nullptr); if (hfile == INVALID_HANDLE_VALUE) { @@ -434,22 +419,18 @@ String DirAccessWindows::read_link(String p_file) { GetFinalPathNameByHandleW(hfile, (LPWSTR)cs.ptrw(), ret, VOLUME_NAME_DOS | FILE_NAME_NORMALIZED); CloseHandle(hfile); - return String::utf16((const char16_t *)cs.ptr(), ret).trim_prefix(R"(\\?\)"); + return String::utf16((const char16_t *)cs.ptr(), ret).trim_prefix(R"(\\?\)").replace("\\", "/"); } Error DirAccessWindows::create_link(String p_source, String p_target) { - if (p_target.is_relative_path()) { - p_target = get_current_dir().path_join(p_target); - } + String source = fix_path(p_source); + String target = fix_path(p_target); - p_source = fix_path(p_source); - p_target = fix_path(p_target); - - DWORD file_attr = GetFileAttributesW((LPCWSTR)(p_source.utf16().get_data())); + DWORD file_attr = GetFileAttributesW((LPCWSTR)(source.utf16().get_data())); bool is_dir = (file_attr & FILE_ATTRIBUTE_DIRECTORY); DWORD flags = ((is_dir) ? SYMBOLIC_LINK_FLAG_DIRECTORY : 0) | SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE; - if (CreateSymbolicLinkW((LPCWSTR)p_target.utf16().get_data(), (LPCWSTR)p_source.utf16().get_data(), flags) != 0) { + if (CreateSymbolicLinkW((LPCWSTR)target.utf16().get_data(), (LPCWSTR)source.utf16().get_data(), flags) != 0) { return OK; } else { return FAILED; @@ -459,7 +440,12 @@ Error DirAccessWindows::create_link(String p_source, String p_target) { DirAccessWindows::DirAccessWindows() { p = memnew(DirAccessWindowsPrivate); p->h = INVALID_HANDLE_VALUE; - current_dir = "."; + + Char16String real_current_dir_name; + size_t str_len = GetCurrentDirectoryW(0, nullptr); + real_current_dir_name.resize(str_len + 1); + GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw()); + current_dir = String::utf16((const char16_t *)real_current_dir_name.get_data()); DWORD mask = GetLogicalDrives(); diff --git a/drivers/windows/file_access_windows.cpp b/drivers/windows/file_access_windows.cpp index 9885d9d7ee..0243d863f8 100644 --- a/drivers/windows/file_access_windows.cpp +++ b/drivers/windows/file_access_windows.cpp @@ -73,8 +73,18 @@ bool FileAccessWindows::is_path_invalid(const String &p_path) { String FileAccessWindows::fix_path(const String &p_path) const { String r_path = FileAccess::fix_path(p_path); - if (r_path.is_absolute_path() && !r_path.is_network_share_path() && r_path.length() > MAX_PATH) { - r_path = "\\\\?\\" + r_path.replace("/", "\\"); + + if (r_path.is_relative_path()) { + Char16String current_dir_name; + size_t str_len = GetCurrentDirectoryW(0, nullptr); + current_dir_name.resize(str_len + 1); + GetCurrentDirectoryW(current_dir_name.size(), (LPWSTR)current_dir_name.ptrw()); + r_path = String::utf16((const char16_t *)current_dir_name.get_data()).trim_prefix(R"(\\?\)").replace("\\", "/").path_join(r_path); + } + r_path = r_path.simplify_path(); + r_path = r_path.replace("/", "\\"); + if (!r_path.is_network_share_path() && !r_path.begins_with(R"(\\?\)")) { + r_path = R"(\\?\)" + r_path; } return r_path; } @@ -108,9 +118,6 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) { return ERR_INVALID_PARAMETER; } - /* Pretty much every implementation that uses fopen as primary - backend supports utf8 encoding. */ - struct _stat st; if (_wstat((LPCWSTR)(path.utf16().get_data()), &st) == 0) { if (!S_ISREG(st.st_mode)) { @@ -125,7 +132,7 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) { // platforms), we only check for relative paths, or paths in res:// or user://, // other paths aren't likely to be portable anyway. if (p_mode_flags == READ && (p_path.is_relative_path() || get_access_type() != ACCESS_FILESYSTEM)) { - String base_path = path; + String base_path = p_path; String working_path; String proper_path; @@ -144,23 +151,17 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) { } proper_path = "user://"; } + working_path = fix_path(working_path); WIN32_FIND_DATAW d; - Vector<String> parts = base_path.split("/"); + Vector<String> parts = base_path.simplify_path().split("/"); bool mismatch = false; for (const String &part : parts) { - working_path = working_path.path_join(part); - - // Skip if relative. - if (part == "." || part == "..") { - proper_path = proper_path.path_join(part); - continue; - } + working_path = working_path + "\\" + part; HANDLE fnd = FindFirstFileW((LPCWSTR)(working_path.utf16().get_data()), &d); - if (fnd == INVALID_HANDLE_VALUE) { mismatch = false; break; @@ -186,12 +187,22 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) { if (is_backup_save_enabled() && p_mode_flags == WRITE) { save_path = path; // Create a temporary file in the same directory as the target file. - WCHAR tmpFileName[MAX_PATH]; - if (GetTempFileNameW((LPCWSTR)(path.get_base_dir().utf16().get_data()), (LPCWSTR)(path.get_file().utf16().get_data()), 0, tmpFileName) == 0) { - last_error = ERR_FILE_CANT_OPEN; - return last_error; + // Note: do not use GetTempFileNameW, it's not long path aware! + String tmpfile; + uint64_t id = OS::get_singleton()->get_ticks_usec(); + while (true) { + tmpfile = path + itos(id++) + ".tmp"; + HANDLE handle = CreateFileW((LPCWSTR)tmpfile.utf16().get_data(), GENERIC_WRITE, 0, NULL, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0); + if (handle != INVALID_HANDLE_VALUE) { + CloseHandle(handle); + break; + } + if (GetLastError() != ERROR_FILE_EXISTS && GetLastError() != ERROR_SHARING_VIOLATION) { + last_error = ERR_FILE_CANT_WRITE; + return FAILED; + } } - path = tmpFileName; + path = tmpfile; } f = _wfsopen((LPCWSTR)(path.utf16().get_data()), mode_string, is_backup_save_enabled() ? _SH_SECURE : _SH_DENYNO); @@ -235,7 +246,7 @@ void FileAccessWindows::_close() { } else { // Either the target exists and is locked (temporarily, hopefully) // or it doesn't exist; let's assume the latter before re-trying. - rename_error = _wrename((LPCWSTR)(path_utf16.get_data()), (LPCWSTR)(save_path_utf16.get_data())) != 0; + rename_error = MoveFileW((LPCWSTR)(path_utf16.get_data()), (LPCWSTR)(save_path_utf16.get_data())) == 0; } if (!rename_error) { @@ -262,7 +273,7 @@ String FileAccessWindows::get_path() const { } String FileAccessWindows::get_path_absolute() const { - return path; + return path.trim_prefix(R"(\\?\)").replace("\\", "/"); } bool FileAccessWindows::is_open() const { @@ -312,93 +323,9 @@ bool FileAccessWindows::eof_reached() const { return last_error == ERR_FILE_EOF; } -uint8_t FileAccessWindows::get_8() const { - ERR_FAIL_NULL_V(f, 0); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == WRITE) { - fflush(f); - } - prev_op = READ; - } - uint8_t b; - if (fread(&b, 1, 1, f) == 0) { - check_errors(); - b = '\0'; - } - - return b; -} - -uint16_t FileAccessWindows::get_16() const { - ERR_FAIL_NULL_V(f, 0); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == WRITE) { - fflush(f); - } - prev_op = READ; - } - - uint16_t b = 0; - if (fread(&b, 1, 2, f) != 2) { - check_errors(); - } - - if (big_endian) { - b = BSWAP16(b); - } - - return b; -} - -uint32_t FileAccessWindows::get_32() const { - ERR_FAIL_NULL_V(f, 0); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == WRITE) { - fflush(f); - } - prev_op = READ; - } - - uint32_t b = 0; - if (fread(&b, 1, 4, f) != 4) { - check_errors(); - } - - if (big_endian) { - b = BSWAP32(b); - } - - return b; -} - -uint64_t FileAccessWindows::get_64() const { - ERR_FAIL_NULL_V(f, 0); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == WRITE) { - fflush(f); - } - prev_op = READ; - } - - uint64_t b = 0; - if (fread(&b, 1, 8, f) != 8) { - check_errors(); - } - - if (big_endian) { - b = BSWAP64(b); - } - - return b; -} - uint64_t FileAccessWindows::get_buffer(uint8_t *p_dst, uint64_t p_length) const { - ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); ERR_FAIL_NULL_V(f, -1); + ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); if (flags == READ_WRITE || flags == WRITE_READ) { if (prev_op == WRITE) { @@ -406,8 +333,10 @@ uint64_t FileAccessWindows::get_buffer(uint8_t *p_dst, uint64_t p_length) const } prev_op = READ; } + uint64_t read = fread(p_dst, 1, p_length, f); check_errors(); + return read; } @@ -442,77 +371,6 @@ void FileAccessWindows::flush() { } } -void FileAccessWindows::store_8(uint8_t p_dest) { - ERR_FAIL_NULL(f); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == READ) { - if (last_error != ERR_FILE_EOF) { - fseek(f, 0, SEEK_CUR); - } - } - prev_op = WRITE; - } - fwrite(&p_dest, 1, 1, f); -} - -void FileAccessWindows::store_16(uint16_t p_dest) { - ERR_FAIL_NULL(f); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == READ) { - if (last_error != ERR_FILE_EOF) { - fseek(f, 0, SEEK_CUR); - } - } - prev_op = WRITE; - } - - if (big_endian) { - p_dest = BSWAP16(p_dest); - } - - fwrite(&p_dest, 1, 2, f); -} - -void FileAccessWindows::store_32(uint32_t p_dest) { - ERR_FAIL_NULL(f); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == READ) { - if (last_error != ERR_FILE_EOF) { - fseek(f, 0, SEEK_CUR); - } - } - prev_op = WRITE; - } - - if (big_endian) { - p_dest = BSWAP32(p_dest); - } - - fwrite(&p_dest, 1, 4, f); -} - -void FileAccessWindows::store_64(uint64_t p_dest) { - ERR_FAIL_NULL(f); - - if (flags == READ_WRITE || flags == WRITE_READ) { - if (prev_op == READ) { - if (last_error != ERR_FILE_EOF) { - fseek(f, 0, SEEK_CUR); - } - } - prev_op = WRITE; - } - - if (big_endian) { - p_dest = BSWAP64(p_dest); - } - - fwrite(&p_dest, 1, 8, f); -} - void FileAccessWindows::store_buffer(const uint8_t *p_src, uint64_t p_length) { ERR_FAIL_NULL(f); ERR_FAIL_COND(!p_src && p_length > 0); @@ -525,6 +383,7 @@ void FileAccessWindows::store_buffer(const uint8_t *p_src, uint64_t p_length) { } prev_op = WRITE; } + ERR_FAIL_COND(fwrite(p_src, 1, p_length, f) != (size_t)p_length); } @@ -549,7 +408,7 @@ uint64_t FileAccessWindows::_get_modified_time(const String &p_file) { } String file = fix_path(p_file); - if (file.ends_with("/") && file != "/") { + if (file.ends_with("\\") && file != "\\") { file = file.substr(0, file.length() - 1); } @@ -582,14 +441,15 @@ bool FileAccessWindows::_get_hidden_attribute(const String &p_file) { Error FileAccessWindows::_set_hidden_attribute(const String &p_file, bool p_hidden) { String file = fix_path(p_file); + const Char16String &file_utf16 = file.utf16(); - DWORD attrib = GetFileAttributesW((LPCWSTR)file.utf16().get_data()); + DWORD attrib = GetFileAttributesW((LPCWSTR)file_utf16.get_data()); ERR_FAIL_COND_V_MSG(attrib == INVALID_FILE_ATTRIBUTES, FAILED, "Failed to get attributes for: " + p_file); BOOL ok; if (p_hidden) { - ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib | FILE_ATTRIBUTE_HIDDEN); + ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib | FILE_ATTRIBUTE_HIDDEN); } else { - ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib & ~FILE_ATTRIBUTE_HIDDEN); + ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib & ~FILE_ATTRIBUTE_HIDDEN); } ERR_FAIL_COND_V_MSG(!ok, FAILED, "Failed to set attributes for: " + p_file); @@ -606,14 +466,15 @@ bool FileAccessWindows::_get_read_only_attribute(const String &p_file) { Error FileAccessWindows::_set_read_only_attribute(const String &p_file, bool p_ro) { String file = fix_path(p_file); + const Char16String &file_utf16 = file.utf16(); - DWORD attrib = GetFileAttributesW((LPCWSTR)file.utf16().get_data()); + DWORD attrib = GetFileAttributesW((LPCWSTR)file_utf16.get_data()); ERR_FAIL_COND_V_MSG(attrib == INVALID_FILE_ATTRIBUTES, FAILED, "Failed to get attributes for: " + p_file); BOOL ok; if (p_ro) { - ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib | FILE_ATTRIBUTE_READONLY); + ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib | FILE_ATTRIBUTE_READONLY); } else { - ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib & ~FILE_ATTRIBUTE_READONLY); + ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib & ~FILE_ATTRIBUTE_READONLY); } ERR_FAIL_COND_V_MSG(!ok, FAILED, "Failed to set attributes for: " + p_file); diff --git a/drivers/windows/file_access_windows.h b/drivers/windows/file_access_windows.h index a25bbcfb3a..f458ff9c6c 100644 --- a/drivers/windows/file_access_windows.h +++ b/drivers/windows/file_access_windows.h @@ -69,20 +69,12 @@ public: virtual bool eof_reached() const override; ///< reading passed EOF - virtual uint8_t get_8() const override; ///< get a byte - virtual uint16_t get_16() const override; - virtual uint32_t get_32() const override; - virtual uint64_t get_64() const override; virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override; virtual Error get_error() const override; ///< get last error virtual Error resize(int64_t p_length) override; virtual void flush() override; - virtual void store_8(uint8_t p_dest) override; ///< store a byte - virtual void store_16(uint16_t p_dest) override; - virtual void store_32(uint32_t p_dest) override; - virtual void store_64(uint64_t p_dest) override; virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes virtual bool file_exists(const String &p_name) override; ///< return true if a file exists diff --git a/drivers/windows/file_access_windows_pipe.cpp b/drivers/windows/file_access_windows_pipe.cpp index 7902c8e1d8..0c953b14aa 100644 --- a/drivers/windows/file_access_windows_pipe.cpp +++ b/drivers/windows/file_access_windows_pipe.cpp @@ -96,22 +96,9 @@ String FileAccessWindowsPipe::get_path_absolute() const { return path_src; } -uint8_t FileAccessWindowsPipe::get_8() const { - ERR_FAIL_COND_V_MSG(fd[0] == 0, 0, "Pipe must be opened before use."); - - uint8_t b; - if (!ReadFile(fd[0], &b, 1, nullptr, nullptr)) { - last_error = ERR_FILE_CANT_READ; - b = '\0'; - } else { - last_error = OK; - } - return b; -} - uint64_t FileAccessWindowsPipe::get_buffer(uint8_t *p_dst, uint64_t p_length) const { - ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); ERR_FAIL_COND_V_MSG(fd[0] == 0, -1, "Pipe must be opened before use."); + ERR_FAIL_COND_V(!p_dst && p_length > 0, -1); DWORD read = -1; if (!ReadFile(fd[0], p_dst, p_length, &read, nullptr) || read != p_length) { @@ -126,15 +113,6 @@ Error FileAccessWindowsPipe::get_error() const { return last_error; } -void FileAccessWindowsPipe::store_8(uint8_t p_src) { - ERR_FAIL_COND_MSG(fd[1] == 0, "Pipe must be opened before use."); - if (!WriteFile(fd[1], &p_src, 1, nullptr, nullptr)) { - last_error = ERR_FILE_CANT_WRITE; - } else { - last_error = OK; - } -} - void FileAccessWindowsPipe::store_buffer(const uint8_t *p_src, uint64_t p_length) { ERR_FAIL_COND_MSG(fd[1] == 0, "Pipe must be opened before use."); ERR_FAIL_COND(!p_src && p_length > 0); diff --git a/drivers/windows/file_access_windows_pipe.h b/drivers/windows/file_access_windows_pipe.h index b885ef78e6..4e9bd036ae 100644 --- a/drivers/windows/file_access_windows_pipe.h +++ b/drivers/windows/file_access_windows_pipe.h @@ -64,14 +64,12 @@ public: virtual bool eof_reached() const override { return false; } - virtual uint8_t get_8() const override; ///< get a byte virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override; virtual Error get_error() const override; ///< get last error virtual Error resize(int64_t p_length) override { return ERR_UNAVAILABLE; } virtual void flush() override {} - virtual void store_8(uint8_t p_src) override; ///< store a byte virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes virtual bool file_exists(const String &p_name) override { return false; } diff --git a/drivers/winmidi/midi_driver_winmidi.cpp b/drivers/winmidi/midi_driver_winmidi.cpp index 07f0226c5d..0f37f63ccd 100644 --- a/drivers/winmidi/midi_driver_winmidi.cpp +++ b/drivers/winmidi/midi_driver_winmidi.cpp @@ -36,26 +36,42 @@ void MIDIDriverWinMidi::read(HMIDIIN hMidiIn, UINT wMsg, DWORD_PTR dwInstance, DWORD_PTR dwParam1, DWORD_PTR dwParam2) { if (wMsg == MIM_DATA) { - receive_input_packet((int)dwInstance, (uint64_t)dwParam2, (uint8_t *)&dwParam1, 3); + // For MIM_DATA: dwParam1 = wMidiMessage, dwParam2 = dwTimestamp. + // Windows implementation has already unpacked running status and dropped any SysEx, + // so we can just forward straight to the event. + const uint8_t *midi_msg = (uint8_t *)&dwParam1; + send_event((int)dwInstance, midi_msg[0], &midi_msg[1], 2); } } Error MIDIDriverWinMidi::open() { + int device_index = 0; for (UINT i = 0; i < midiInGetNumDevs(); i++) { HMIDIIN midi_in; + MIDIINCAPS caps; - MMRESULT res = midiInOpen(&midi_in, i, (DWORD_PTR)read, (DWORD_PTR)i, CALLBACK_FUNCTION); - if (res == MMSYSERR_NOERROR) { + MMRESULT open_res = midiInOpen(&midi_in, i, (DWORD_PTR)read, + (DWORD_PTR)device_index, CALLBACK_FUNCTION); + MMRESULT caps_res = midiInGetDevCaps(i, &caps, sizeof(MIDIINCAPS)); + + if (open_res == MMSYSERR_NOERROR) { midiInStart(midi_in); - connected_sources.insert(i, midi_in); + connected_sources.push_back(midi_in); + if (caps_res == MMSYSERR_NOERROR) { + connected_input_names.push_back(caps.szPname); + } else { + // Should push something even if we don't get a name, + // so that the IDs line up correctly on the script side. + connected_input_names.push_back("ERROR"); + } + // Only increment device index for successfully connected devices. + device_index++; } else { char err[256]; - midiInGetErrorText(res, err, 256); + midiInGetErrorText(open_res, err, 256); ERR_PRINT("midiInOpen error: " + String(err)); - MIDIINCAPS caps; - res = midiInGetDevCaps(i, &caps, sizeof(MIDIINCAPS)); - if (res == MMSYSERR_NOERROR) { + if (caps_res == MMSYSERR_NOERROR) { ERR_PRINT("Can't open MIDI device \"" + String(caps.szPname) + "\", is it being used by another application?"); } } @@ -64,25 +80,6 @@ Error MIDIDriverWinMidi::open() { return OK; } -PackedStringArray MIDIDriverWinMidi::get_connected_inputs() { - PackedStringArray list; - - for (int i = 0; i < connected_sources.size(); i++) { - HMIDIIN midi_in = connected_sources[i]; - UINT id = 0; - MMRESULT res = midiInGetID(midi_in, &id); - if (res == MMSYSERR_NOERROR) { - MIDIINCAPS caps; - res = midiInGetDevCaps(i, &caps, sizeof(MIDIINCAPS)); - if (res == MMSYSERR_NOERROR) { - list.push_back(caps.szPname); - } - } - } - - return list; -} - void MIDIDriverWinMidi::close() { for (int i = 0; i < connected_sources.size(); i++) { HMIDIIN midi_in = connected_sources[i]; @@ -90,9 +87,7 @@ void MIDIDriverWinMidi::close() { midiInClose(midi_in); } connected_sources.clear(); -} - -MIDIDriverWinMidi::MIDIDriverWinMidi() { + connected_input_names.clear(); } MIDIDriverWinMidi::~MIDIDriverWinMidi() { diff --git a/drivers/winmidi/midi_driver_winmidi.h b/drivers/winmidi/midi_driver_winmidi.h index f3e016f378..7a75252233 100644 --- a/drivers/winmidi/midi_driver_winmidi.h +++ b/drivers/winmidi/midi_driver_winmidi.h @@ -48,12 +48,10 @@ class MIDIDriverWinMidi : public MIDIDriver { static void CALLBACK read(HMIDIIN hMidiIn, UINT wMsg, DWORD_PTR dwInstance, DWORD_PTR dwParam1, DWORD_PTR dwParam2); public: - virtual Error open(); - virtual void close(); + virtual Error open() override; + virtual void close() override; - virtual PackedStringArray get_connected_inputs(); - - MIDIDriverWinMidi(); + MIDIDriverWinMidi() = default; virtual ~MIDIDriverWinMidi(); }; |