summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/SCsub12
-rw-r--r--drivers/alsamidi/midi_driver_alsamidi.cpp177
-rw-r--r--drivers/alsamidi/midi_driver_alsamidi.h41
-rw-r--r--drivers/coreaudio/audio_driver_coreaudio.cpp21
-rw-r--r--drivers/coremidi/midi_driver_coremidi.cpp79
-rw-r--r--drivers/coremidi/midi_driver_coremidi.h21
-rw-r--r--drivers/d3d12/SCsub1
-rw-r--r--drivers/d3d12/d3d12ma.cpp12
-rw-r--r--drivers/d3d12/dxil_hash.cpp209
-rw-r--r--drivers/d3d12/dxil_hash.h39
-rw-r--r--drivers/d3d12/rendering_context_driver_d3d12.cpp22
-rw-r--r--drivers/d3d12/rendering_context_driver_d3d12.h9
-rw-r--r--drivers/d3d12/rendering_device_driver_d3d12.cpp199
-rw-r--r--drivers/d3d12/rendering_device_driver_d3d12.h39
-rw-r--r--drivers/egl/egl_manager.cpp4
-rw-r--r--drivers/gles3/rasterizer_canvas_gles3.cpp35
-rw-r--r--drivers/gles3/rasterizer_canvas_gles3.h3
-rw-r--r--drivers/gles3/rasterizer_gles3.cpp13
-rw-r--r--drivers/gles3/rasterizer_gles3.h2
-rw-r--r--drivers/gles3/rasterizer_scene_gles3.cpp35
-rw-r--r--drivers/gles3/rasterizer_scene_gles3.h3
-rw-r--r--drivers/gles3/shader_gles3.cpp5
-rw-r--r--drivers/gles3/shaders/canvas.glsl13
-rw-r--r--drivers/gles3/shaders/canvas_uniforms_inc.glsl1
-rw-r--r--drivers/gles3/shaders/scene.glsl171
-rw-r--r--drivers/gles3/shaders/skeleton.glsl2
-rw-r--r--drivers/gles3/shaders/sky.glsl37
-rw-r--r--drivers/gles3/shaders/stdlib_inc.glsl18
-rw-r--r--drivers/gles3/storage/config.cpp24
-rw-r--r--drivers/gles3/storage/config.h3
-rw-r--r--drivers/gles3/storage/light_storage.cpp3
-rw-r--r--drivers/gles3/storage/light_storage.h3
-rw-r--r--drivers/gles3/storage/material_storage.cpp28
-rw-r--r--drivers/gles3/storage/mesh_storage.cpp55
-rw-r--r--drivers/gles3/storage/mesh_storage.h56
-rw-r--r--drivers/gles3/storage/render_scene_buffers_gles3.cpp6
-rw-r--r--drivers/gles3/storage/render_scene_buffers_gles3.h4
-rw-r--r--drivers/gles3/storage/texture_storage.cpp42
-rw-r--r--drivers/gles3/storage/texture_storage.h3
-rw-r--r--drivers/metal/README.md39
-rw-r--r--drivers/metal/SCsub49
-rw-r--r--drivers/metal/metal_device_properties.h141
-rw-r--r--drivers/metal/metal_device_properties.mm327
-rw-r--r--drivers/metal/metal_objects.h909
-rw-r--r--drivers/metal/metal_objects.mm1581
-rw-r--r--drivers/metal/metal_utils.h101
-rw-r--r--drivers/metal/pixel_formats.h416
-rw-r--r--drivers/metal/pixel_formats.mm1298
-rw-r--r--drivers/metal/rendering_context_driver_metal.h206
-rw-r--r--drivers/metal/rendering_context_driver_metal.mm134
-rw-r--r--drivers/metal/rendering_device_driver_metal.h437
-rw-r--r--drivers/metal/rendering_device_driver_metal.mm3965
-rw-r--r--drivers/unix/file_access_unix.cpp95
-rw-r--r--drivers/unix/file_access_unix.h8
-rw-r--r--drivers/unix/file_access_unix_pipe.cpp25
-rw-r--r--drivers/unix/file_access_unix_pipe.h2
-rw-r--r--drivers/vulkan/SCsub4
-rw-r--r--drivers/vulkan/rendering_context_driver_vulkan.cpp357
-rw-r--r--drivers/vulkan/rendering_context_driver_vulkan.h52
-rw-r--r--drivers/vulkan/rendering_device_driver_vulkan.cpp514
-rw-r--r--drivers/vulkan/rendering_device_driver_vulkan.h33
-rw-r--r--drivers/windows/dir_access_windows.cpp192
-rw-r--r--drivers/windows/file_access_windows.cpp231
-rw-r--r--drivers/windows/file_access_windows.h8
-rw-r--r--drivers/windows/file_access_windows_pipe.cpp24
-rw-r--r--drivers/windows/file_access_windows_pipe.h2
-rw-r--r--drivers/winmidi/midi_driver_winmidi.cpp55
-rw-r--r--drivers/winmidi/midi_driver_winmidi.h8
68 files changed, 11588 insertions, 1075 deletions
diff --git a/drivers/SCsub b/drivers/SCsub
index e77b96cc87..44d29fb7c1 100644
--- a/drivers/SCsub
+++ b/drivers/SCsub
@@ -3,6 +3,7 @@
Import("env")
env.drivers_sources = []
+supported = env.get("supported", [])
# OS drivers
SConscript("unix/SCsub")
@@ -17,6 +18,9 @@ if env["platform"] == "windows":
if not env.msvc:
SConscript("backtrace/SCsub")
if env["xaudio2"]:
+ if "xaudio2" not in supported:
+ print("Target platform '{}' does not support the XAudio2 audio driver. Aborting.".format(env["platform"]))
+ Exit(255)
SConscript("xaudio2/SCsub")
# Midi drivers
@@ -28,11 +32,19 @@ SConscript("winmidi/SCsub")
if env["vulkan"]:
SConscript("vulkan/SCsub")
if env["d3d12"]:
+ if "d3d12" not in supported:
+ print("Target platform '{}' does not support the D3D12 rendering driver. Aborting.".format(env["platform"]))
+ Exit(255)
SConscript("d3d12/SCsub")
if env["opengl3"]:
SConscript("gl_context/SCsub")
SConscript("gles3/SCsub")
SConscript("egl/SCsub")
+if env["metal"]:
+ if "metal" not in supported:
+ print("Target platform '{}' does not support the Metal rendering driver. Aborting.".format(env["platform"]))
+ Exit(255)
+ SConscript("metal/SCsub")
# Core dependencies
SConscript("png/SCsub")
diff --git a/drivers/alsamidi/midi_driver_alsamidi.cpp b/drivers/alsamidi/midi_driver_alsamidi.cpp
index b87be69cc5..445fc4a993 100644
--- a/drivers/alsamidi/midi_driver_alsamidi.cpp
+++ b/drivers/alsamidi/midi_driver_alsamidi.cpp
@@ -37,137 +37,36 @@
#include <errno.h>
-MIDIDriverALSAMidi::MessageCategory MIDIDriverALSAMidi::msg_category(uint8_t msg_part) {
- if (msg_part >= 0xf8) {
- return MessageCategory::RealTime;
- } else if (msg_part >= 0xf0) {
- // System Exclusive begin/end are specified as System Common Category messages,
- // but we separate them here and give them their own categories as their
- // behavior is significantly different.
- if (msg_part == 0xf0) {
- return MessageCategory::SysExBegin;
- } else if (msg_part == 0xf7) {
- return MessageCategory::SysExEnd;
- }
- return MessageCategory::SystemCommon;
- } else if (msg_part >= 0x80) {
- return MessageCategory::Voice;
- }
- return MessageCategory::Data;
-}
-
-size_t MIDIDriverALSAMidi::msg_expected_data(uint8_t status_byte) {
- if (msg_category(status_byte) == MessageCategory::Voice) {
- // Voice messages have a channel number in the status byte, mask it out.
- status_byte &= 0xf0;
- }
-
- switch (status_byte) {
- case 0x80: // Note Off
- case 0x90: // Note On
- case 0xA0: // Polyphonic Key Pressure (Aftertouch)
- case 0xB0: // Control Change (CC)
- case 0xE0: // Pitch Bend Change
- case 0xF2: // Song Position Pointer
- return 2;
-
- case 0xC0: // Program Change
- case 0xD0: // Channel Pressure (Aftertouch)
- case 0xF1: // MIDI Time Code Quarter Frame
- case 0xF3: // Song Select
- return 1;
- }
+MIDIDriverALSAMidi::InputConnection::InputConnection(int p_device_index,
+ snd_rawmidi_t *p_rawmidi) :
+ parser(p_device_index), rawmidi_ptr(p_rawmidi) {}
- return 0;
-}
-
-void MIDIDriverALSAMidi::InputConnection::parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver,
- uint64_t timestamp, int device_index) {
- switch (msg_category(byte)) {
- case MessageCategory::RealTime:
- // Real-Time messages are single byte messages that can
- // occur at any point.
- // We pass them straight through.
- driver.receive_input_packet(device_index, timestamp, &byte, 1);
- break;
-
- case MessageCategory::Data:
- // We don't currently forward System Exclusive messages so skip their data.
- // Collect any expected data for other message types.
- if (!skipping_sys_ex && expected_data > received_data) {
- buffer[received_data + 1] = byte;
- received_data++;
-
- // Forward a complete message and reset relevant state.
- if (received_data == expected_data) {
- driver.receive_input_packet(device_index, timestamp, buffer, received_data + 1);
- received_data = 0;
-
- if (msg_category(buffer[0]) != MessageCategory::Voice) {
- // Voice Category messages can be sent with "running status".
- // This means they don't resend the status byte until it changes.
- // For other categories, we reset expected data, to require a new status byte.
- expected_data = 0;
- }
- }
- }
- break;
-
- case MessageCategory::SysExBegin:
- buffer[0] = byte;
- skipping_sys_ex = true;
- break;
-
- case MessageCategory::SysExEnd:
- expected_data = 0;
- skipping_sys_ex = false;
- break;
-
- case MessageCategory::Voice:
- case MessageCategory::SystemCommon:
- buffer[0] = byte;
- received_data = 0;
- expected_data = msg_expected_data(byte);
- skipping_sys_ex = false;
- if (expected_data == 0) {
- driver.receive_input_packet(device_index, timestamp, &byte, 1);
- }
- break;
- }
-}
-
-int MIDIDriverALSAMidi::InputConnection::read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index) {
- int ret;
+void MIDIDriverALSAMidi::InputConnection::read() {
+ int read_count;
do {
- uint8_t byte = 0;
- ret = snd_rawmidi_read(rawmidi_ptr, &byte, 1);
+ uint8_t buffer[32];
+ read_count = snd_rawmidi_read(rawmidi_ptr, buffer, sizeof(buffer));
- if (ret < 0) {
- if (ret != -EAGAIN) {
- ERR_PRINT("snd_rawmidi_read error: " + String(snd_strerror(ret)));
+ if (read_count < 0) {
+ if (read_count != -EAGAIN) {
+ ERR_PRINT("snd_rawmidi_read error: " + String(snd_strerror(read_count)));
}
} else {
- parse_byte(byte, driver, timestamp, device_index);
+ for (int i = 0; i < read_count; i++) {
+ parser.parse_fragment(buffer[i]);
+ }
}
- } while (ret > 0);
-
- return ret;
+ } while (read_count > 0);
}
void MIDIDriverALSAMidi::thread_func(void *p_udata) {
MIDIDriverALSAMidi *md = static_cast<MIDIDriverALSAMidi *>(p_udata);
- uint64_t timestamp = 0;
while (!md->exit_thread.is_set()) {
md->lock();
-
- InputConnection *connections = md->connected_inputs.ptrw();
- size_t connection_count = md->connected_inputs.size();
-
- for (size_t i = 0; i < connection_count; i++) {
- connections[i].read_in(*md, timestamp, (int)i);
+ for (InputConnection &conn : md->connected_inputs) {
+ conn.read();
}
-
md->unlock();
OS::get_singleton()->delay_usec(1000);
@@ -181,15 +80,25 @@ Error MIDIDriverALSAMidi::open() {
return ERR_CANT_OPEN;
}
- int i = 0;
- for (void **n = hints; *n != nullptr; n++) {
- char *name = snd_device_name_get_hint(*n, "NAME");
+ lock();
+ int device_index = 0;
+ for (void **h = hints; *h != nullptr; h++) {
+ char *name = snd_device_name_get_hint(*h, "NAME");
if (name != nullptr) {
snd_rawmidi_t *midi_in;
int ret = snd_rawmidi_open(&midi_in, nullptr, name, SND_RAWMIDI_NONBLOCK);
if (ret >= 0) {
- connected_inputs.insert(i++, InputConnection(midi_in));
+ // Get display name.
+ snd_rawmidi_info_t *info;
+ snd_rawmidi_info_malloc(&info);
+ snd_rawmidi_info(midi_in, info);
+ connected_input_names.push_back(snd_rawmidi_info_get_name(info));
+ snd_rawmidi_info_free(info);
+
+ connected_inputs.push_back(InputConnection(device_index, midi_in));
+ // Only increment device_index for successfully connected devices.
+ device_index++;
}
}
@@ -198,6 +107,7 @@ Error MIDIDriverALSAMidi::open() {
}
}
snd_device_name_free_hint(hints);
+ unlock();
exit_thread.clear();
thread.start(MIDIDriverALSAMidi::thread_func, this);
@@ -211,11 +121,12 @@ void MIDIDriverALSAMidi::close() {
thread.wait_to_finish();
}
- for (int i = 0; i < connected_inputs.size(); i++) {
- snd_rawmidi_t *midi_in = connected_inputs[i].rawmidi_ptr;
- snd_rawmidi_close(midi_in);
+ for (const InputConnection &conn : connected_inputs) {
+ snd_rawmidi_close(conn.rawmidi_ptr);
}
+
connected_inputs.clear();
+ connected_input_names.clear();
}
void MIDIDriverALSAMidi::lock() const {
@@ -226,24 +137,6 @@ void MIDIDriverALSAMidi::unlock() const {
mutex.unlock();
}
-PackedStringArray MIDIDriverALSAMidi::get_connected_inputs() {
- PackedStringArray list;
-
- lock();
- for (int i = 0; i < connected_inputs.size(); i++) {
- snd_rawmidi_t *midi_in = connected_inputs[i].rawmidi_ptr;
- snd_rawmidi_info_t *info;
-
- snd_rawmidi_info_malloc(&info);
- snd_rawmidi_info(midi_in, info);
- list.push_back(snd_rawmidi_info_get_name(info));
- snd_rawmidi_info_free(info);
- }
- unlock();
-
- return list;
-}
-
MIDIDriverALSAMidi::MIDIDriverALSAMidi() {
exit_thread.clear();
}
diff --git a/drivers/alsamidi/midi_driver_alsamidi.h b/drivers/alsamidi/midi_driver_alsamidi.h
index 95ded3b1c9..45811bec47 100644
--- a/drivers/alsamidi/midi_driver_alsamidi.h
+++ b/drivers/alsamidi/midi_driver_alsamidi.h
@@ -51,24 +51,15 @@ class MIDIDriverALSAMidi : public MIDIDriver {
Thread thread;
Mutex mutex;
- class InputConnection {
- public:
+ struct InputConnection {
InputConnection() = default;
- InputConnection(snd_rawmidi_t *midi_in) :
- rawmidi_ptr{ midi_in } {}
-
- // Read in and parse available data, forwarding any complete messages through the driver.
- int read_in(MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index);
+ InputConnection(int p_device_index, snd_rawmidi_t *p_rawmidi);
+ Parser parser;
snd_rawmidi_t *rawmidi_ptr = nullptr;
- private:
- static const size_t MSG_BUFFER_SIZE = 3;
- uint8_t buffer[MSG_BUFFER_SIZE] = { 0 };
- size_t expected_data = 0;
- size_t received_data = 0;
- bool skipping_sys_ex = false;
- void parse_byte(uint8_t byte, MIDIDriverALSAMidi &driver, uint64_t timestamp, int device_index);
+ // Read in and parse available data, forwarding complete events to Input.
+ void read();
};
Vector<InputConnection> connected_inputs;
@@ -77,30 +68,12 @@ class MIDIDriverALSAMidi : public MIDIDriver {
static void thread_func(void *p_udata);
- enum class MessageCategory {
- Data,
- Voice,
- SysExBegin,
- SystemCommon, // excluding System Exclusive Begin/End
- SysExEnd,
- RealTime,
- };
-
- // If the passed byte is a status byte, return the associated message category,
- // else return MessageCategory::Data.
- static MessageCategory msg_category(uint8_t msg_part);
-
- // Return the number of data bytes expected for the provided status byte.
- static size_t msg_expected_data(uint8_t status_byte);
-
void lock() const;
void unlock() const;
public:
- virtual Error open();
- virtual void close();
-
- virtual PackedStringArray get_connected_inputs();
+ virtual Error open() override;
+ virtual void close() override;
MIDIDriverALSAMidi();
virtual ~MIDIDriverALSAMidi();
diff --git a/drivers/coreaudio/audio_driver_coreaudio.cpp b/drivers/coreaudio/audio_driver_coreaudio.cpp
index 98a8d4b2ef..fd0adb1fd1 100644
--- a/drivers/coreaudio/audio_driver_coreaudio.cpp
+++ b/drivers/coreaudio/audio_driver_coreaudio.cpp
@@ -66,6 +66,11 @@ OSStatus AudioDriverCoreAudio::output_device_address_cb(AudioObjectID inObjectID
return noErr;
}
+
+// Switch to kAudioObjectPropertyElementMain everywhere to remove deprecated warnings.
+#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 120000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 150000)
+#define kAudioObjectPropertyElementMain kAudioObjectPropertyElementMaster
+#endif
#endif
Error AudioDriverCoreAudio::init() {
@@ -89,7 +94,7 @@ Error AudioDriverCoreAudio::init() {
AudioObjectPropertyAddress prop;
prop.mSelector = kAudioHardwarePropertyDefaultOutputDevice;
prop.mScope = kAudioObjectPropertyScopeGlobal;
- prop.mElement = kAudioObjectPropertyElementMaster;
+ prop.mElement = kAudioObjectPropertyElementMain;
result = AudioObjectAddPropertyListener(kAudioObjectSystemObject, &prop, &output_device_address_cb, this);
ERR_FAIL_COND_V(result != noErr, FAILED);
@@ -319,7 +324,7 @@ void AudioDriverCoreAudio::finish() {
AudioObjectPropertyAddress prop;
prop.mSelector = kAudioHardwarePropertyDefaultOutputDevice;
prop.mScope = kAudioObjectPropertyScopeGlobal;
- prop.mElement = kAudioObjectPropertyElementMaster;
+ prop.mElement = kAudioObjectPropertyElementMain;
result = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, &prop, &output_device_address_cb, this);
if (result != noErr) {
@@ -358,7 +363,7 @@ Error AudioDriverCoreAudio::init_input_device() {
AudioObjectPropertyAddress prop;
prop.mSelector = kAudioHardwarePropertyDefaultInputDevice;
prop.mScope = kAudioObjectPropertyScopeGlobal;
- prop.mElement = kAudioObjectPropertyElementMaster;
+ prop.mElement = kAudioObjectPropertyElementMain;
result = AudioObjectAddPropertyListener(kAudioObjectSystemObject, &prop, &input_device_address_cb, this);
ERR_FAIL_COND_V(result != noErr, FAILED);
@@ -375,7 +380,7 @@ Error AudioDriverCoreAudio::init_input_device() {
#ifdef MACOS_ENABLED
AudioDeviceID deviceId;
size = sizeof(AudioDeviceID);
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMain };
result = AudioObjectGetPropertyData(kAudioObjectSystemObject, &property, 0, nullptr, &size, &deviceId);
ERR_FAIL_COND_V(result != noErr, FAILED);
@@ -453,7 +458,7 @@ void AudioDriverCoreAudio::finish_input_device() {
AudioObjectPropertyAddress prop;
prop.mSelector = kAudioHardwarePropertyDefaultInputDevice;
prop.mScope = kAudioObjectPropertyScopeGlobal;
- prop.mElement = kAudioObjectPropertyElementMaster;
+ prop.mElement = kAudioObjectPropertyElementMain;
result = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, &prop, &input_device_address_cb, this);
if (result != noErr) {
@@ -504,7 +509,7 @@ PackedStringArray AudioDriverCoreAudio::_get_device_list(bool input) {
prop.mSelector = kAudioHardwarePropertyDevices;
prop.mScope = kAudioObjectPropertyScopeGlobal;
- prop.mElement = kAudioObjectPropertyElementMaster;
+ prop.mElement = kAudioObjectPropertyElementMain;
UInt32 size = 0;
AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &prop, 0, nullptr, &size);
@@ -563,7 +568,7 @@ void AudioDriverCoreAudio::_set_device(const String &output_device, bool input)
prop.mSelector = kAudioHardwarePropertyDevices;
prop.mScope = kAudioObjectPropertyScopeGlobal;
- prop.mElement = kAudioObjectPropertyElementMaster;
+ prop.mElement = kAudioObjectPropertyElementMain;
UInt32 size = 0;
AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &prop, 0, nullptr, &size);
@@ -619,7 +624,7 @@ void AudioDriverCoreAudio::_set_device(const String &output_device, bool input)
// If we haven't found the desired device get the system default one
UInt32 size = sizeof(AudioDeviceID);
UInt32 elem = input ? kAudioHardwarePropertyDefaultInputDevice : kAudioHardwarePropertyDefaultOutputDevice;
- AudioObjectPropertyAddress property = { elem, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+ AudioObjectPropertyAddress property = { elem, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMain };
OSStatus result = AudioObjectGetPropertyData(kAudioObjectSystemObject, &property, 0, nullptr, &size, &deviceId);
ERR_FAIL_COND(result != noErr);
diff --git a/drivers/coremidi/midi_driver_coremidi.cpp b/drivers/coremidi/midi_driver_coremidi.cpp
index 87fc7612f7..f6cc59471e 100644
--- a/drivers/coremidi/midi_driver_coremidi.cpp
+++ b/drivers/coremidi/midi_driver_coremidi.cpp
@@ -37,16 +37,30 @@
#import <CoreAudio/HostTime.h>
#import <CoreServices/CoreServices.h>
+Mutex MIDIDriverCoreMidi::mutex;
+bool MIDIDriverCoreMidi::core_midi_closed = false;
+
+MIDIDriverCoreMidi::InputConnection::InputConnection(int p_device_index, MIDIEndpointRef p_source) :
+ parser(p_device_index), source(p_source) {}
+
void MIDIDriverCoreMidi::read(const MIDIPacketList *packet_list, void *read_proc_ref_con, void *src_conn_ref_con) {
- MIDIPacket *packet = const_cast<MIDIPacket *>(packet_list->packet);
- int *device_index = static_cast<int *>(src_conn_ref_con);
- for (UInt32 i = 0; i < packet_list->numPackets; i++) {
- receive_input_packet(*device_index, packet->timeStamp, packet->data, packet->length);
- packet = MIDIPacketNext(packet);
+ MutexLock lock(mutex);
+ if (!core_midi_closed) {
+ InputConnection *source = static_cast<InputConnection *>(src_conn_ref_con);
+ const MIDIPacket *packet = packet_list->packet;
+ for (UInt32 packet_index = 0; packet_index < packet_list->numPackets; packet_index++) {
+ for (UInt16 data_index = 0; data_index < packet->length; data_index++) {
+ source->parser.parse_fragment(packet->data[data_index]);
+ }
+ packet = MIDIPacketNext(packet);
+ }
}
}
Error MIDIDriverCoreMidi::open() {
+ ERR_FAIL_COND_V_MSG(client || core_midi_closed, FAILED,
+ "MIDIDriverCoreMidi cannot be reopened.");
+
CFStringRef name = CFStringCreateWithCString(nullptr, "Godot", kCFStringEncodingASCII);
OSStatus result = MIDIClientCreate(name, nullptr, nullptr, &client);
CFRelease(name);
@@ -61,12 +75,27 @@ Error MIDIDriverCoreMidi::open() {
return ERR_CANT_OPEN;
}
- int sources = MIDIGetNumberOfSources();
- for (int i = 0; i < sources; i++) {
+ int source_count = MIDIGetNumberOfSources();
+ int connection_index = 0;
+ for (int i = 0; i < source_count; i++) {
MIDIEndpointRef source = MIDIGetSource(i);
if (source) {
- MIDIPortConnectSource(port_in, source, static_cast<void *>(&i));
- connected_sources.insert(i, source);
+ InputConnection *conn = memnew(InputConnection(connection_index, source));
+ const OSStatus res = MIDIPortConnectSource(port_in, source, static_cast<void *>(conn));
+ if (res != noErr) {
+ memdelete(conn);
+ } else {
+ connected_sources.push_back(conn);
+
+ CFStringRef nameRef = nullptr;
+ char name[256];
+ MIDIObjectGetStringProperty(source, kMIDIPropertyDisplayName, &nameRef);
+ CFStringGetCString(nameRef, name, sizeof(name), kCFStringEncodingUTF8);
+ CFRelease(nameRef);
+ connected_input_names.push_back(name);
+
+ connection_index++; // Contiguous index for successfully connected inputs.
+ }
}
}
@@ -74,11 +103,17 @@ Error MIDIDriverCoreMidi::open() {
}
void MIDIDriverCoreMidi::close() {
- for (int i = 0; i < connected_sources.size(); i++) {
- MIDIEndpointRef source = connected_sources[i];
- MIDIPortDisconnectSource(port_in, source);
+ mutex.lock();
+ core_midi_closed = true;
+ mutex.unlock();
+
+ for (InputConnection *conn : connected_sources) {
+ MIDIPortDisconnectSource(port_in, conn->source);
+ memdelete(conn);
}
+
connected_sources.clear();
+ connected_input_names.clear();
if (port_in != 0) {
MIDIPortDispose(port_in);
@@ -91,26 +126,6 @@ void MIDIDriverCoreMidi::close() {
}
}
-PackedStringArray MIDIDriverCoreMidi::get_connected_inputs() {
- PackedStringArray list;
-
- for (int i = 0; i < connected_sources.size(); i++) {
- MIDIEndpointRef source = connected_sources[i];
- CFStringRef ref = nullptr;
- char name[256];
-
- MIDIObjectGetStringProperty(source, kMIDIPropertyDisplayName, &ref);
- CFStringGetCString(ref, name, sizeof(name), kCFStringEncodingUTF8);
- CFRelease(ref);
-
- list.push_back(name);
- }
-
- return list;
-}
-
-MIDIDriverCoreMidi::MIDIDriverCoreMidi() {}
-
MIDIDriverCoreMidi::~MIDIDriverCoreMidi() {
close();
}
diff --git a/drivers/coremidi/midi_driver_coremidi.h b/drivers/coremidi/midi_driver_coremidi.h
index 38fb515664..02cbc6234c 100644
--- a/drivers/coremidi/midi_driver_coremidi.h
+++ b/drivers/coremidi/midi_driver_coremidi.h
@@ -34,6 +34,7 @@
#ifdef COREMIDI_ENABLED
#include "core/os/midi_driver.h"
+#include "core/os/mutex.h"
#include "core/templates/vector.h"
#import <CoreMIDI/CoreMIDI.h>
@@ -43,17 +44,25 @@ class MIDIDriverCoreMidi : public MIDIDriver {
MIDIClientRef client = 0;
MIDIPortRef port_in;
- Vector<MIDIEndpointRef> connected_sources;
+ struct InputConnection {
+ InputConnection() = default;
+ InputConnection(int p_device_index, MIDIEndpointRef p_source);
+ Parser parser;
+ MIDIEndpointRef source;
+ };
+
+ Vector<InputConnection *> connected_sources;
+
+ static Mutex mutex;
+ static bool core_midi_closed;
static void read(const MIDIPacketList *packet_list, void *read_proc_ref_con, void *src_conn_ref_con);
public:
- virtual Error open();
- virtual void close();
-
- PackedStringArray get_connected_inputs();
+ virtual Error open() override;
+ virtual void close() override;
- MIDIDriverCoreMidi();
+ MIDIDriverCoreMidi() = default;
virtual ~MIDIDriverCoreMidi();
};
diff --git a/drivers/d3d12/SCsub b/drivers/d3d12/SCsub
index 35227ebe08..482a549189 100644
--- a/drivers/d3d12/SCsub
+++ b/drivers/d3d12/SCsub
@@ -136,7 +136,6 @@ if env.msvc:
]
else:
extra_defines += [
- ("__REQUIRED_RPCNDR_H_VERSION__", 475),
"HAVE_STRUCT_TIMESPEC",
]
diff --git a/drivers/d3d12/d3d12ma.cpp b/drivers/d3d12/d3d12ma.cpp
index 51171141de..b7c9eb7ec0 100644
--- a/drivers/d3d12/d3d12ma.cpp
+++ b/drivers/d3d12/d3d12ma.cpp
@@ -43,6 +43,18 @@
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wnonnull-compare"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic ignored "-Wstring-plus-int"
+#pragma clang diagnostic ignored "-Wswitch"
+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
+#pragma clang diagnostic ignored "-Wtautological-undefined-compare"
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-but-set-variable"
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wunused-private-field"
+#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
#endif
#if defined(_MSC_VER)
diff --git a/drivers/d3d12/dxil_hash.cpp b/drivers/d3d12/dxil_hash.cpp
new file mode 100644
index 0000000000..f94a4a30df
--- /dev/null
+++ b/drivers/d3d12/dxil_hash.cpp
@@ -0,0 +1,209 @@
+/**************************************************************************/
+/* dxil_hash.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+// Based on the patched public domain implementation released by Microsoft here:
+// https://github.com/microsoft/hlsl-specs/blob/main/proposals/infra/INF-0004-validator-hashing.md
+
+#include "dxil_hash.h"
+
+#include <memory.h>
+
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+static const BYTE padding[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static void FF(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) {
+ a += ((b & c) | (~b & d)) + x + ac;
+ a = ((a << s) | (a >> (32 - s))) + b;
+}
+
+static void GG(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) {
+ a += ((b & d) | (c & ~d)) + x + ac;
+ a = ((a << s) | (a >> (32 - s))) + b;
+}
+
+static void HH(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) {
+ a += (b ^ c ^ d) + x + ac;
+ a = ((a << s) | (a >> (32 - s))) + b;
+}
+
+static void II(UINT &a, UINT b, UINT c, UINT d, UINT x, UINT8 s, UINT ac) {
+ a += (c ^ (b | ~d)) + x + ac;
+ a = ((a << s) | (a >> (32 - s))) + b;
+}
+
+void compute_dxil_hash(const BYTE *pData, UINT byteCount, BYTE *pOutHash) {
+ UINT leftOver = byteCount & 0x3f;
+ UINT padAmount;
+ bool bTwoRowsPadding = false;
+ if (leftOver < 56) {
+ padAmount = 56 - leftOver;
+ } else {
+ padAmount = 120 - leftOver;
+ bTwoRowsPadding = true;
+ }
+ UINT padAmountPlusSize = padAmount + 8;
+ UINT state[4] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 };
+ UINT N = (byteCount + padAmountPlusSize) >> 6;
+ UINT offset = 0;
+ UINT NextEndState = bTwoRowsPadding ? N - 2 : N - 1;
+ const BYTE *pCurrData = pData;
+ for (UINT i = 0; i < N; i++, offset += 64, pCurrData += 64) {
+ UINT x[16];
+ const UINT *pX;
+ if (i == NextEndState) {
+ if (!bTwoRowsPadding && i == N - 1) {
+ UINT remainder = byteCount - offset;
+ x[0] = byteCount << 3;
+ memcpy((BYTE *)x + 4, pCurrData, remainder);
+ memcpy((BYTE *)x + 4 + remainder, padding, padAmount);
+ x[15] = 1 | (byteCount << 1);
+ } else if (bTwoRowsPadding) {
+ if (i == N - 2) {
+ UINT remainder = byteCount - offset;
+ memcpy(x, pCurrData, remainder);
+ memcpy((BYTE *)x + remainder, padding, padAmount - 56);
+ NextEndState = N - 1;
+ } else if (i == N - 1) {
+ x[0] = byteCount << 3;
+ memcpy((BYTE *)x + 4, padding + padAmount - 56, 56);
+ x[15] = 1 | (byteCount << 1);
+ }
+ }
+ pX = x;
+ } else {
+ pX = (const UINT *)pCurrData;
+ }
+
+ UINT a = state[0];
+ UINT b = state[1];
+ UINT c = state[2];
+ UINT d = state[3];
+
+ /* Round 1 */
+ FF(a, b, c, d, pX[0], S11, 0xd76aa478); /* 1 */
+ FF(d, a, b, c, pX[1], S12, 0xe8c7b756); /* 2 */
+ FF(c, d, a, b, pX[2], S13, 0x242070db); /* 3 */
+ FF(b, c, d, a, pX[3], S14, 0xc1bdceee); /* 4 */
+ FF(a, b, c, d, pX[4], S11, 0xf57c0faf); /* 5 */
+ FF(d, a, b, c, pX[5], S12, 0x4787c62a); /* 6 */
+ FF(c, d, a, b, pX[6], S13, 0xa8304613); /* 7 */
+ FF(b, c, d, a, pX[7], S14, 0xfd469501); /* 8 */
+ FF(a, b, c, d, pX[8], S11, 0x698098d8); /* 9 */
+ FF(d, a, b, c, pX[9], S12, 0x8b44f7af); /* 10 */
+ FF(c, d, a, b, pX[10], S13, 0xffff5bb1); /* 11 */
+ FF(b, c, d, a, pX[11], S14, 0x895cd7be); /* 12 */
+ FF(a, b, c, d, pX[12], S11, 0x6b901122); /* 13 */
+ FF(d, a, b, c, pX[13], S12, 0xfd987193); /* 14 */
+ FF(c, d, a, b, pX[14], S13, 0xa679438e); /* 15 */
+ FF(b, c, d, a, pX[15], S14, 0x49b40821); /* 16 */
+
+ /* Round 2 */
+ GG(a, b, c, d, pX[1], S21, 0xf61e2562); /* 17 */
+ GG(d, a, b, c, pX[6], S22, 0xc040b340); /* 18 */
+ GG(c, d, a, b, pX[11], S23, 0x265e5a51); /* 19 */
+ GG(b, c, d, a, pX[0], S24, 0xe9b6c7aa); /* 20 */
+ GG(a, b, c, d, pX[5], S21, 0xd62f105d); /* 21 */
+ GG(d, a, b, c, pX[10], S22, 0x2441453); /* 22 */
+ GG(c, d, a, b, pX[15], S23, 0xd8a1e681); /* 23 */
+ GG(b, c, d, a, pX[4], S24, 0xe7d3fbc8); /* 24 */
+ GG(a, b, c, d, pX[9], S21, 0x21e1cde6); /* 25 */
+ GG(d, a, b, c, pX[14], S22, 0xc33707d6); /* 26 */
+ GG(c, d, a, b, pX[3], S23, 0xf4d50d87); /* 27 */
+ GG(b, c, d, a, pX[8], S24, 0x455a14ed); /* 28 */
+ GG(a, b, c, d, pX[13], S21, 0xa9e3e905); /* 29 */
+ GG(d, a, b, c, pX[2], S22, 0xfcefa3f8); /* 30 */
+ GG(c, d, a, b, pX[7], S23, 0x676f02d9); /* 31 */
+ GG(b, c, d, a, pX[12], S24, 0x8d2a4c8a); /* 32 */
+
+ /* Round 3 */
+ HH(a, b, c, d, pX[5], S31, 0xfffa3942); /* 33 */
+ HH(d, a, b, c, pX[8], S32, 0x8771f681); /* 34 */
+ HH(c, d, a, b, pX[11], S33, 0x6d9d6122); /* 35 */
+ HH(b, c, d, a, pX[14], S34, 0xfde5380c); /* 36 */
+ HH(a, b, c, d, pX[1], S31, 0xa4beea44); /* 37 */
+ HH(d, a, b, c, pX[4], S32, 0x4bdecfa9); /* 38 */
+ HH(c, d, a, b, pX[7], S33, 0xf6bb4b60); /* 39 */
+ HH(b, c, d, a, pX[10], S34, 0xbebfbc70); /* 40 */
+ HH(a, b, c, d, pX[13], S31, 0x289b7ec6); /* 41 */
+ HH(d, a, b, c, pX[0], S32, 0xeaa127fa); /* 42 */
+ HH(c, d, a, b, pX[3], S33, 0xd4ef3085); /* 43 */
+ HH(b, c, d, a, pX[6], S34, 0x4881d05); /* 44 */
+ HH(a, b, c, d, pX[9], S31, 0xd9d4d039); /* 45 */
+ HH(d, a, b, c, pX[12], S32, 0xe6db99e5); /* 46 */
+ HH(c, d, a, b, pX[15], S33, 0x1fa27cf8); /* 47 */
+ HH(b, c, d, a, pX[2], S34, 0xc4ac5665); /* 48 */
+
+ /* Round 4 */
+ II(a, b, c, d, pX[0], S41, 0xf4292244); /* 49 */
+ II(d, a, b, c, pX[7], S42, 0x432aff97); /* 50 */
+ II(c, d, a, b, pX[14], S43, 0xab9423a7); /* 51 */
+ II(b, c, d, a, pX[5], S44, 0xfc93a039); /* 52 */
+ II(a, b, c, d, pX[12], S41, 0x655b59c3); /* 53 */
+ II(d, a, b, c, pX[3], S42, 0x8f0ccc92); /* 54 */
+ II(c, d, a, b, pX[10], S43, 0xffeff47d); /* 55 */
+ II(b, c, d, a, pX[1], S44, 0x85845dd1); /* 56 */
+ II(a, b, c, d, pX[8], S41, 0x6fa87e4f); /* 57 */
+ II(d, a, b, c, pX[15], S42, 0xfe2ce6e0); /* 58 */
+ II(c, d, a, b, pX[6], S43, 0xa3014314); /* 59 */
+ II(b, c, d, a, pX[13], S44, 0x4e0811a1); /* 60 */
+ II(a, b, c, d, pX[4], S41, 0xf7537e82); /* 61 */
+ II(d, a, b, c, pX[11], S42, 0xbd3af235); /* 62 */
+ II(c, d, a, b, pX[2], S43, 0x2ad7d2bb); /* 63 */
+ II(b, c, d, a, pX[9], S44, 0xeb86d391); /* 64 */
+
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ }
+
+ memcpy(pOutHash, state, 16);
+}
diff --git a/drivers/d3d12/dxil_hash.h b/drivers/d3d12/dxil_hash.h
new file mode 100644
index 0000000000..db8ee85a0d
--- /dev/null
+++ b/drivers/d3d12/dxil_hash.h
@@ -0,0 +1,39 @@
+/**************************************************************************/
+/* dxil_hash.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#ifndef DXIL_HASH_H
+#define DXIL_HASH_H
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+void compute_dxil_hash(const BYTE *pData, UINT byteCount, BYTE *pOutHash);
+
+#endif // DXIL_HASH_H
diff --git a/drivers/d3d12/rendering_context_driver_d3d12.cpp b/drivers/d3d12/rendering_context_driver_d3d12.cpp
index 128b8bcd03..8fa495f5c4 100644
--- a/drivers/d3d12/rendering_context_driver_d3d12.cpp
+++ b/drivers/d3d12/rendering_context_driver_d3d12.cpp
@@ -43,12 +43,20 @@
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wswitch"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic ignored "-Wstring-plus-int"
+#pragma clang diagnostic ignored "-Wswitch"
+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
#endif
#include "dxcapi.h"
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
+#elif defined(__clang__)
+#pragma clang diagnostic pop
#endif
#if !defined(_MSC_VER)
@@ -63,10 +71,6 @@ const GUID CLSID_D3D12DeviceFactoryGodot = { 0x114863bf, 0xc386, 0x4aee, { 0xb3,
const GUID CLSID_D3D12DebugGodot = { 0xf2352aeb, 0xdd84, 0x49fe, { 0xb9, 0x7b, 0xa9, 0xdc, 0xfd, 0xcc, 0x1b, 0x4f } };
const GUID CLSID_D3D12SDKConfigurationGodot = { 0x7cda6aca, 0xa03e, 0x49c8, { 0x94, 0x58, 0x03, 0x34, 0xd2, 0x0e, 0x07, 0xce } };
-extern "C" {
-char godot_nir_arch_name[32];
-}
-
#ifdef PIX_ENABLED
#if defined(__GNUC__)
#define _MSC_VER 1800
@@ -78,12 +82,14 @@ char godot_nir_arch_name[32];
#endif
#endif
-RenderingContextDriverD3D12::RenderingContextDriverD3D12() {
- CharString cs = Engine::get_singleton()->get_architecture_name().ascii();
- memcpy(godot_nir_arch_name, (const char *)cs.get_data(), cs.size());
-}
+RenderingContextDriverD3D12::RenderingContextDriverD3D12() {}
RenderingContextDriverD3D12::~RenderingContextDriverD3D12() {
+ // Let's release manually everything that may still be holding
+ // onto the DLLs before freeing them.
+ device_factory.Reset();
+ dxgi_factory.Reset();
+
if (lib_d3d12) {
FreeLibrary(lib_d3d12);
}
diff --git a/drivers/d3d12/rendering_context_driver_d3d12.h b/drivers/d3d12/rendering_context_driver_d3d12.h
index 2e286b6927..a2d828ded1 100644
--- a/drivers/d3d12/rendering_context_driver_d3d12.h
+++ b/drivers/d3d12/rendering_context_driver_d3d12.h
@@ -46,6 +46,13 @@
#pragma GCC diagnostic ignored "-Wswitch"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic ignored "-Wstring-plus-int"
+#pragma clang diagnostic ignored "-Wswitch"
+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
+#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
#endif
#if defined(AS)
@@ -59,6 +66,8 @@
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
+#elif defined(__clang__)
+#pragma clang diagnostic pop
#endif
using Microsoft::WRL::ComPtr;
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp
index fb278a4d56..a445006058 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.cpp
+++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp
@@ -36,6 +36,7 @@
#include "thirdparty/zlib/zlib.h"
#include "d3d12_godot_nir_bridge.h"
+#include "dxil_hash.h"
#include "rendering_context_driver_d3d12.h"
// No point in fighting warnings in Mesa.
@@ -51,9 +52,14 @@
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wswitch"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic ignored "-Wstring-plus-int"
+#pragma clang diagnostic ignored "-Wswitch"
+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
#endif
-#include "dxil_validator.h"
#include "nir_spirv.h"
#include "nir_to_dxil.h"
#include "spirv_to_dxil.h"
@@ -63,6 +69,8 @@ extern "C" {
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
+#elif defined(__clang__)
+#pragma clang diagnostic pop
#endif
#if defined(_MSC_VER)
@@ -96,11 +104,6 @@ static const D3D12_RANGE VOID_RANGE = {};
static const uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 1);
static const uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 2);
-#ifdef DEV_ENABLED
-//#define DEBUG_COUNT_BARRIERS
-#define CUSTOM_INFO_QUEUE_ENABLED 0
-#endif
-
/*****************/
/**** GENERIC ****/
/*****************/
@@ -869,6 +872,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
+ D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COMMON;
switch (p_allocation_type) {
case MEMORY_ALLOCATION_TYPE_CPU: {
bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT);
@@ -876,10 +880,12 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
if (is_src && !is_dst) {
// Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM.
allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
+ initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
}
if (is_dst && !is_src) {
// Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads.
allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;
+ initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
}
} break;
case MEMORY_ALLOCATION_TYPE_GPU: {
@@ -908,7 +914,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
res = allocator->CreateResource(
&allocation_desc,
reinterpret_cast<const D3D12_RESOURCE_DESC *>(&resource_desc),
- D3D12_RESOURCE_STATE_COMMON,
+ initial_state,
nullptr,
allocation.GetAddressOf(),
IID_PPV_ARGS(buffer.GetAddressOf()));
@@ -922,7 +928,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel
buf_info->resource = buffer.Get();
buf_info->owner_info.resource = buffer;
buf_info->owner_info.allocation = allocation;
- buf_info->owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_COMMON);
+ buf_info->owner_info.states.subresource_states.push_back(initial_state);
buf_info->states_ptr = &buf_info->owner_info.states;
buf_info->size = p_size;
buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
@@ -1462,7 +1468,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex
uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format;
}
- if (p_slice_type != -1) {
+ if (p_slice_type != (TextureSliceType)-1) {
// Complete description with slicing.
switch (p_slice_type) {
@@ -1560,7 +1566,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(Tex
tex_info->states_ptr = owner_tex_info->states_ptr;
tex_info->format = p_view.format;
tex_info->desc = new_tex_resource_desc;
- if (p_slice_type == -1) {
+ if (p_slice_type == (TextureSliceType)-1) {
tex_info->base_layer = owner_tex_info->base_layer;
tex_info->layers = owner_tex_info->layers;
tex_info->base_mip = owner_tex_info->base_mip;
@@ -1741,7 +1747,7 @@ RDD::SamplerID RenderingDeviceDriverD3D12::sampler_create(const SamplerState &p_
slot = 1;
} else {
for (uint32_t i = 1; i < samplers.size(); i++) {
- if (samplers[i].Filter == INT_MAX) {
+ if ((int)samplers[i].Filter == INT_MAX) {
slot = i;
break;
}
@@ -2137,33 +2143,59 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_
for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {
const TextureBarrier &texture_barrier_rd = p_texture_barriers[i];
const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id);
+ if (texture_info->main_texture) {
+ texture_info = texture_info->main_texture;
+ }
_rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore);
_rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter);
texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout);
texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout);
texture_barrier_d3d12.pResource = texture_info->resource;
- texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap;
- texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count;
- texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer;
- texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count;
- texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect);
- texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format);
+ if (texture_barrier_rd.subresources.mipmap_count == texture_info->mipmaps && texture_barrier_rd.subresources.layer_count == texture_info->layers) {
+ // So, all resources. Then, let's be explicit about it so D3D12 doesn't think
+ // we are dealing with a subset of subresources.
+ texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = 0xffffffff;
+ texture_barrier_d3d12.Subresources.NumMipLevels = 0;
+ // Because NumMipLevels == 0, all the other fields are ignored by D3D12.
+ } else {
+ texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap;
+ texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count;
+ texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer;
+ texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count;
+ texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect);
+ texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format);
+ }
texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE;
texture_barriers.push_back(texture_barrier_d3d12);
}
// Define the barrier groups and execute.
+
D3D12_BARRIER_GROUP barrier_groups[3] = {};
- barrier_groups[0].Type = D3D12_BARRIER_TYPE_GLOBAL;
- barrier_groups[1].Type = D3D12_BARRIER_TYPE_BUFFER;
- barrier_groups[2].Type = D3D12_BARRIER_TYPE_TEXTURE;
- barrier_groups[0].NumBarriers = global_barriers.size();
- barrier_groups[1].NumBarriers = buffer_barriers.size();
- barrier_groups[2].NumBarriers = texture_barriers.size();
- barrier_groups[0].pGlobalBarriers = global_barriers.ptr();
- barrier_groups[1].pBufferBarriers = buffer_barriers.ptr();
- barrier_groups[2].pTextureBarriers = texture_barriers.ptr();
- cmd_list_7->Barrier(ARRAY_SIZE(barrier_groups), barrier_groups);
+ uint32_t barrier_groups_count = 0;
+
+ if (!global_barriers.is_empty()) {
+ D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++];
+ barrier_group.Type = D3D12_BARRIER_TYPE_GLOBAL;
+ barrier_group.NumBarriers = global_barriers.size();
+ barrier_group.pGlobalBarriers = global_barriers.ptr();
+ }
+
+ if (!buffer_barriers.is_empty()) {
+ D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++];
+ barrier_group.Type = D3D12_BARRIER_TYPE_BUFFER;
+ barrier_group.NumBarriers = buffer_barriers.size();
+ barrier_group.pBufferBarriers = buffer_barriers.ptr();
+ }
+
+ if (!texture_barriers.is_empty()) {
+ D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++];
+ barrier_group.Type = D3D12_BARRIER_TYPE_TEXTURE;
+ barrier_group.NumBarriers = texture_barriers.size();
+ barrier_group.pTextureBarriers = texture_barriers.ptr();
+ }
+
+ cmd_list_7->Barrier(barrier_groups_count, barrier_groups);
}
/****************/
@@ -2703,6 +2735,8 @@ D3D12_UNORDERED_ACCESS_VIEW_DESC RenderingDeviceDriverD3D12::_make_ranged_uav_fo
uav_desc.Texture3D.MipSlice = mip;
uav_desc.Texture3D.WSize >>= p_mipmap_offset;
} break;
+ default:
+ break;
}
return uav_desc;
@@ -2859,23 +2893,6 @@ static uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDevice::SHADER_STAGE_M
/* SHADER_STAGE_COMPUTE */ 2,
};
-dxil_validator *RenderingDeviceDriverD3D12::_get_dxil_validator_for_current_thread() {
- MutexLock lock(dxil_mutex);
-
- int thread_idx = WorkerThreadPool::get_singleton()->get_thread_index();
- if (dxil_validators.has(thread_idx)) {
- return dxil_validators[thread_idx];
- }
-
-#ifdef DEV_ENABLED
- print_verbose("Creating DXIL validator for worker thread index " + itos(thread_idx));
-#endif
-
- dxil_validator *dxil_validator = dxil_create_validator(nullptr);
- dxil_validators.insert(thread_idx, dxil_validator);
- return dxil_validator;
-}
-
uint32_t RenderingDeviceDriverD3D12::_shader_patch_dxil_specialization_constant(
PipelineSpecializationConstantType p_type,
const void *p_value,
@@ -2998,40 +3015,20 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants(
ShaderStage stage = E.key;
if ((stages_re_sign_mask & (1 << stage))) {
Vector<uint8_t> &bytecode = E.value;
- bool sign_ok = _shader_sign_dxil_bytecode(stage, bytecode);
- ERR_FAIL_COND_V(!sign_ok, false);
+ _shader_sign_dxil_bytecode(stage, bytecode);
}
}
return true;
}
-bool RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) {
- dxil_validator *validator = _get_dxil_validator_for_current_thread();
- if (!validator) {
- if (is_in_developer_mode()) {
- return true;
- } else {
- OS::get_singleton()->alert("Shader validation failed: DXIL.dll was not found, and developer mode is disabled.\n\nClick OK to exit.");
- CRASH_NOW();
- }
- }
-
- char *err = nullptr;
- bool res = dxil_validate_module(validator, r_dxil_blob.ptrw(), r_dxil_blob.size(), &err);
- if (!res) {
- if (err) {
- ERR_FAIL_COND_V_MSG(!res, false, "Shader signing invocation at stage " + String(SHADER_STAGE_NAMES[p_stage]) + " failed:\n" + String(err));
- } else {
- ERR_FAIL_COND_V_MSG(!res, false, "Shader signing invocation at stage " + String(SHADER_STAGE_NAMES[p_stage]) + " failed.");
- }
- }
-
- return true;
+void RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob) {
+ uint8_t *w = r_dxil_blob.ptrw();
+ compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4);
}
String RenderingDeviceDriverD3D12::shader_get_binary_cache_key() {
- return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model) + (is_in_developer_mode() ? "dev" : "");
+ return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model);
}
Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) {
@@ -3299,10 +3296,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec
nir_to_dxil_options nir_to_dxil_options = {};
nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN;
nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(shader_capabilities.shader_model);
- dxil_validator *validator = _get_dxil_validator_for_current_thread();
- if (validator) {
- nir_to_dxil_options.validator_version_max = dxil_get_validator_version(validator);
- }
+ nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION;
nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks;
dxil_logger logger = {};
@@ -3353,8 +3347,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec
for (KeyValue<ShaderStage, Vector<uint8_t>> &E : dxil_blobs) {
ShaderStage stage = E.key;
Vector<uint8_t> &dxil_blob = E.value;
- bool sign_ok = _shader_sign_dxil_bytecode(stage, dxil_blob);
- ERR_FAIL_COND_V(!sign_ok, Vector<uint8_t>());
+ _shader_sign_dxil_bytecode(stage, dxil_blob);
}
// Build the root signature.
@@ -3822,6 +3815,11 @@ void RenderingDeviceDriverD3D12::shader_free(ShaderID p_shader) {
VersatileResource::free(resources_allocator, shader_info_in);
}
+void RenderingDeviceDriverD3D12::shader_destroy_modules(ShaderID p_shader) {
+ ShaderInfo *shader_info_in = (ShaderInfo *)p_shader.id;
+ shader_info_in->stages_bytecode.clear();
+}
+
/*********************/
/**** UNIFORM SET ****/
/*********************/
@@ -4094,7 +4092,6 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun
{
uniform_set_info->resource_states.reserve(resource_states.size());
- uint32_t i = 0;
for (const KeyValue<ResourceInfo *, NeededState> &E : resource_states) {
UniformSetInfo::StateRequirement sr;
sr.resource = E.key;
@@ -4102,7 +4099,6 @@ RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<Boun
sr.states = E.value.states;
sr.shader_uniform_idx_mask = E.value.shader_uniform_idx_mask;
uniform_set_info->resource_states.push_back(sr);
- i++;
}
}
@@ -5092,6 +5088,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) {
clear.aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT);
clear.color_attachment = i;
+ tex_info->pending_clear.remove_from_list();
}
} else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
if (pass_info->attachments[i].stencil_load_op == ATTACHMENT_LOAD_OP_CLEAR) {
@@ -5370,10 +5367,12 @@ void RenderingDeviceDriverD3D12::command_bind_render_pipeline(CommandBufferID p_
cmd_buf_info->cmd_list->OMSetBlendFactor(pso_extra_info.dyn_params.blend_constant.components);
cmd_buf_info->cmd_list->OMSetStencilRef(pso_extra_info.dyn_params.stencil_reference);
- ComPtr<ID3D12GraphicsCommandList1> command_list_1;
- cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf());
- if (command_list_1) {
- command_list_1->OMSetDepthBounds(pso_extra_info.dyn_params.depth_bounds_min, pso_extra_info.dyn_params.depth_bounds_max);
+ if (misc_features_support.depth_bounds_supported) {
+ ComPtr<ID3D12GraphicsCommandList1> command_list_1;
+ cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf());
+ if (command_list_1) {
+ command_list_1->OMSetDepthBounds(pso_extra_info.dyn_params.depth_bounds_min, pso_extra_info.dyn_params.depth_bounds_max);
+ }
}
cmd_buf_info->render_pass_state.vf_info = pso_extra_info.vf_info;
@@ -5763,8 +5762,15 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create(
(&pipeline_desc.DepthStencilState)->BackFace.StencilDepthFailOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.back_op.depth_fail];
(&pipeline_desc.DepthStencilState)->BackFace.StencilFunc = RD_TO_D3D12_COMPARE_OP[p_depth_stencil_state.back_op.compare];
- pso_extra_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f;
- pso_extra_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f;
+ if (misc_features_support.depth_bounds_supported) {
+ pso_extra_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f;
+ pso_extra_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f;
+ } else {
+ if (p_depth_stencil_state.enable_depth_range) {
+ WARN_PRINT_ONCE("Depth bounds test is not supported by the GPU driver.");
+ }
+ }
+
pso_extra_info.dyn_params.stencil_reference = p_depth_stencil_state.front_op.reference;
}
@@ -6035,6 +6041,10 @@ void RenderingDeviceDriverD3D12::command_end_label(CommandBufferID p_cmd_buffer)
#endif
}
+void RenderingDeviceDriverD3D12::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) {
+ // TODO: Implement via DRED.
+}
+
/********************/
/**** SUBMISSION ****/
/********************/
@@ -6281,15 +6291,6 @@ RenderingDeviceDriverD3D12::RenderingDeviceDriverD3D12(RenderingContextDriverD3D
}
RenderingDeviceDriverD3D12::~RenderingDeviceDriverD3D12() {
- {
- MutexLock lock(dxil_mutex);
- for (const KeyValue<int, dxil_validator *> &E : dxil_validators) {
- if (E.value) {
- dxil_destroy_validator(E.value);
- }
- }
- }
-
glsl_type_singleton_decref();
}
@@ -6485,6 +6486,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
subgroup_capabilities.wave_ops_supported = options1.WaveOps;
}
+ D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2 = {};
+ res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2));
+ if (SUCCEEDED(res)) {
+ misc_features_support.depth_bounds_supported = options2.DepthBoundsTestSupported;
+ }
+
D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {};
res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3));
if (SUCCEEDED(res)) {
@@ -6570,6 +6577,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
print_verbose(String("- D3D12 16-bit ops supported: ") + (shader_capabilities.native_16bit_ops ? "yes" : "no"));
+ if (misc_features_support.depth_bounds_supported) {
+ print_verbose("- Depth bounds test supported");
+ } else {
+ print_verbose("- Depth bounds test not supported");
+ }
+
return OK;
}
@@ -6635,7 +6648,7 @@ Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) {
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
- CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
+ //CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
uint32_t resource_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame");
uint32_t sampler_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame");
uint32_t misc_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame");
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h
index 1782819238..d8381279ec 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.h
+++ b/drivers/d3d12/rendering_device_driver_d3d12.h
@@ -36,6 +36,11 @@
#include "core/templates/self_list.h"
#include "servers/rendering/rendering_device_driver.h"
+#ifndef _MSC_VER
+// Match current version used by MinGW, MSVC and Direct3D 12 headers use 500.
+#define __REQUIRED_RPCNDR_H_VERSION__ 475
+#endif
+
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
@@ -43,6 +48,13 @@
#pragma GCC diagnostic ignored "-Wswitch"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#pragma clang diagnostic ignored "-Wstring-plus-int"
+#pragma clang diagnostic ignored "-Wswitch"
+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
+#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
#endif
#include "d3dx12.h"
@@ -59,13 +71,19 @@
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
+#elif defined(__clang__)
+#pragma clang diagnostic pop
#endif
using Microsoft::WRL::ComPtr;
#define D3D12_BITCODE_OFFSETS_NUM_STAGES 3
-struct dxil_validator;
+#ifdef DEV_ENABLED
+//#define DEBUG_COUNT_BARRIERS
+#define CUSTOM_INFO_QUEUE_ENABLED 0
+#endif
+
class RenderingContextDriverD3D12;
// Design principles:
@@ -126,6 +144,10 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
bool enhanced_barriers_supported = false;
};
+ struct MiscFeaturesSupport {
+ bool depth_bounds_supported = false;
+ };
+
RenderingContextDriverD3D12 *context_driver = nullptr;
RenderingContextDriver::Device context_device;
ComPtr<IDXGIAdapter> adapter;
@@ -141,6 +163,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
StorageBufferCapabilities storage_buffer_capabilities;
FormatCapabilities format_capabilities;
BarrierCapabilities barrier_capabilities;
+ MiscFeaturesSupport misc_features_support;
String pipeline_cache_id;
class DescriptorsHeap {
@@ -257,7 +280,7 @@ private:
LocalVector<D3D12_RESOURCE_BARRIER> res_barriers;
uint32_t res_barriers_count = 0;
uint32_t res_barriers_batch = 0;
-#ifdef DEV_ENABLED
+#ifdef DEBUG_COUNT_BARRIERS
int frame_barriers_count = 0;
int frame_barriers_batches_count = 0;
uint64_t frame_barriers_cpu_time = 0;
@@ -678,10 +701,6 @@ private:
uint32_t root_signature_crc = 0;
};
- Mutex dxil_mutex;
- HashMap<int, dxil_validator *> dxil_validators; // One per WorkerThreadPool thread used for shader compilation, plus one (-1) for all the other.
-
- dxil_validator *_get_dxil_validator_for_current_thread();
uint32_t _shader_patch_dxil_specialization_constant(
PipelineSpecializationConstantType p_type,
const void *p_value,
@@ -692,7 +711,7 @@ private:
const ShaderInfo *p_shader_info,
VectorView<PipelineSpecializationConstant> p_specialization_constants,
HashMap<ShaderStage, Vector<uint8_t>> &r_final_stages_bytecode);
- bool _shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob);
+ void _shader_sign_dxil_bytecode(ShaderStage p_stage, Vector<uint8_t> &r_dxil_blob);
public:
virtual String shader_get_binary_cache_key() override final;
@@ -700,6 +719,7 @@ public:
virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final;
virtual uint32_t shader_get_layout_hash(ShaderID p_shader) override final;
virtual void shader_free(ShaderID p_shader) override final;
+ virtual void shader_destroy_modules(ShaderID p_shader) override final;
/*********************/
/**** UNIFORM SET ****/
@@ -931,6 +951,11 @@ public:
virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final;
virtual void command_end_label(CommandBufferID p_cmd_buffer) override final;
+ /****************/
+ /**** DEBUG *****/
+ /****************/
+ virtual void command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) override final;
+
/********************/
/**** SUBMISSION ****/
/********************/
diff --git a/drivers/egl/egl_manager.cpp b/drivers/egl/egl_manager.cpp
index 9c1d08331d..4477ba7752 100644
--- a/drivers/egl/egl_manager.cpp
+++ b/drivers/egl/egl_manager.cpp
@@ -357,7 +357,7 @@ Error EGLManager::initialize(void *p_native_display) {
// have to temporarily get a proper display and reload EGL once again to
// initialize everything else.
if (!gladLoaderLoadEGL(EGL_NO_DISPLAY)) {
- ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL.");
+ ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL dynamic library.");
}
EGLDisplay tmp_display = EGL_NO_DISPLAY;
@@ -387,7 +387,7 @@ Error EGLManager::initialize(void *p_native_display) {
int version = gladLoaderLoadEGL(tmp_display);
if (!version) {
eglTerminate(tmp_display);
- ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL.");
+ ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Can't load EGL dynamic library.");
}
int major = GLAD_VERSION_MAJOR(version);
diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp
index 941b1a1b28..b9206f310e 100644
--- a/drivers/gles3/rasterizer_canvas_gles3.cpp
+++ b/drivers/gles3/rasterizer_canvas_gles3.cpp
@@ -647,18 +647,17 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
_record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, Point2());
} else {
Point2 start_pos = ci->repeat_size * -(ci->repeat_times / 2);
- Point2 end_pos = ci->repeat_size * ci->repeat_times + ci->repeat_size + start_pos;
- Point2 pos = start_pos;
-
- do {
- do {
- _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, pos);
- pos.y += ci->repeat_size.y;
- } while (pos.y < end_pos.y);
-
- pos.x += ci->repeat_size.x;
- pos.y = start_pos.y;
- } while (pos.x < end_pos.x);
+ Point2 offset;
+
+ int repeat_times_x = ci->repeat_size.x ? ci->repeat_times : 0;
+ int repeat_times_y = ci->repeat_size.y ? ci->repeat_times : 0;
+ for (int ry = 0; ry <= repeat_times_y; ry++) {
+ offset.y = start_pos.y + ry * ci->repeat_size.y;
+ for (int rx = 0; rx <= repeat_times_x; rx++) {
+ offset.x = start_pos.x + rx * ci->repeat_size.x;
+ _record_item_commands(ci, p_to_render_target, p_canvas_transform_inverse, current_clip, blend_mode, p_lights, index, batch_broken, r_sdf_used, offset);
+ }
+ }
}
}
@@ -809,7 +808,7 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
state.last_item_index += index;
}
-void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, const Point2 &p_offset) {
+void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, const Point2 &p_repeat_offset) {
RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? state.default_filter : p_item->texture_filter;
if (texture_filter != state.canvas_instance_batches[state.current_batch_index].filter) {
@@ -826,11 +825,11 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_rend
state.canvas_instance_batches[state.current_batch_index].repeat = texture_repeat;
}
- Transform2D base_transform = p_canvas_transform_inverse * p_item->final_transform;
-
- if (p_offset.x || p_offset.y) {
- base_transform *= Transform2D(0, p_offset / p_item->xform_curr.get_scale()); // TODO: Interpolate or explain why not needed.
+ Transform2D base_transform = p_item->final_transform;
+ if (p_item->repeat_source_item && (p_repeat_offset.x || p_repeat_offset.y)) {
+ base_transform.columns[2] += p_item->repeat_source_item->final_transform.basis_xform(p_repeat_offset);
}
+ base_transform = p_canvas_transform_inverse * base_transform;
Transform2D draw_transform; // Used by transform command
@@ -1735,7 +1734,7 @@ void RasterizerCanvasGLES3::light_update_directional_shadow(RID p_rid, int p_sha
Vector2 center = p_clip_rect.get_center();
- float to_edge_distance = ABS(light_dir.dot(p_clip_rect.get_support(light_dir)) - light_dir.dot(center));
+ float to_edge_distance = ABS(light_dir.dot(p_clip_rect.get_support(-light_dir)) - light_dir.dot(center));
Vector2 from_pos = center - light_dir * (to_edge_distance + p_cull_distance);
float distance = to_edge_distance * 2.0 + p_cull_distance;
diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h
index 7fc9992c3d..027f717eb7 100644
--- a/drivers/gles3/rasterizer_canvas_gles3.h
+++ b/drivers/gles3/rasterizer_canvas_gles3.h
@@ -63,7 +63,6 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender {
FLAGS_TRANSPOSE_RECT = (1 << 10),
FLAGS_NINEPACH_DRAW_CENTER = (1 << 12),
- FLAGS_USING_PARTICLES = (1 << 13),
FLAGS_USE_SKELETON = (1 << 15),
FLAGS_NINEPATCH_H_MODE_SHIFT = 16,
@@ -363,7 +362,7 @@ public:
void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used, RenderingMethod::RenderInfo *r_render_info = nullptr) override;
void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
- void _record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch, bool &r_sdf_used, const Point2 &p_offset);
+ void _record_item_commands(const Item *p_item, RID p_render_target, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch, bool &r_sdf_used, const Point2 &p_repeat_offset);
void _render_batch(Light *p_lights, uint32_t p_index, RenderingMethod::RenderInfo *r_render_info = nullptr);
bool _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization);
void _new_batch(bool &r_batch_broken);
diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp
index ae39c86d44..19ef3d416c 100644
--- a/drivers/gles3/rasterizer_gles3.cpp
+++ b/drivers/gles3/rasterizer_gles3.cpp
@@ -62,6 +62,10 @@
#define _EXT_DEBUG_SEVERITY_LOW_ARB 0x9148
#define _EXT_DEBUG_OUTPUT 0x92E0
+#ifndef GL_FRAMEBUFFER_SRGB
+#define GL_FRAMEBUFFER_SRGB 0x8DB9
+#endif
+
#ifndef GLAPIENTRY
#if defined(WINDOWS_ENABLED)
#define GLAPIENTRY APIENTRY
@@ -72,7 +76,7 @@
#if !defined(IOS_ENABLED) && !defined(WEB_ENABLED)
// We include EGL below to get debug callback on GLES2 platforms,
-// but EGL is not available on iOS.
+// but EGL is not available on iOS or the web.
#define CAN_DEBUG
#endif
@@ -107,7 +111,7 @@ void RasterizerGLES3::end_frame(bool p_swap_buffers) {
utils->capture_timestamps_end();
}
-void RasterizerGLES3::end_viewport(bool p_swap_buffers) {
+void RasterizerGLES3::gl_end_frame(bool p_swap_buffers) {
if (p_swap_buffers) {
DisplayServer::get_singleton()->swap_buffers();
} else {
@@ -345,6 +349,9 @@ RasterizerGLES3::RasterizerGLES3() {
}
}
+ // Disable OpenGL linear to sRGB conversion, because Godot will always do this conversion itself.
+ glDisable(GL_FRAMEBUFFER_SRGB);
+
// OpenGL needs to be initialized before initializing the Rasterizers
config = memnew(GLES3::Config);
utilities = memnew(GLES3::Utilities);
@@ -491,7 +498,7 @@ void RasterizerGLES3::set_boot_image(const Ref<Image> &p_image, const Color &p_c
copy_effects->copy_to_rect(screenrect);
glBindTexture(GL_TEXTURE_2D, 0);
- end_viewport(true);
+ gl_end_frame(true);
texture_storage->texture_free(texture);
}
diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h
index 0d0c26016d..80a4a792bb 100644
--- a/drivers/gles3/rasterizer_gles3.h
+++ b/drivers/gles3/rasterizer_gles3.h
@@ -99,7 +99,7 @@ public:
void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount);
- void end_viewport(bool p_swap_buffers);
+ void gl_end_frame(bool p_swap_buffers);
void end_frame(bool p_swap_buffers);
void finalize();
diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp
index 9ea030bbd4..3ed8042f3f 100644
--- a/drivers/gles3/rasterizer_scene_gles3.cpp
+++ b/drivers/gles3/rasterizer_scene_gles3.cpp
@@ -777,7 +777,6 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection,
ERR_FAIL_COND(p_env.is_null());
Sky *sky = sky_owner.get_or_null(environment_get_sky(p_env));
- ERR_FAIL_NULL(sky);
GLES3::SkyMaterialData *material_data = nullptr;
RID sky_material;
@@ -851,6 +850,15 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection,
material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::SKY_ENERGY_MULTIPLIER, p_sky_energy_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::LUMINANCE_MULTIPLIER, p_luminance_multiplier, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+ Color fog_color = environment_get_fog_light_color(p_env).srgb_to_linear() * environment_get_fog_light_energy(p_env);
+ material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_ENABLED, environment_get_fog_enabled(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+ material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_AERIAL_PERSPECTIVE, environment_get_fog_aerial_perspective(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+ material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_LIGHT_COLOR, fog_color, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+ material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_SUN_SCATTER, environment_get_fog_sun_scatter(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+ material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_DENSITY, environment_get_fog_density(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+ material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::FOG_SKY_AFFECT, environment_get_fog_sky_affect(p_env), shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+ material_storage->shaders.sky_shader.version_set_uniform(SkyShaderGLES3::DIRECTIONAL_LIGHT_COUNT, sky_globals.directional_light_count, shader_data->version, SkyShaderGLES3::MODE_BACKGROUND, spec_constants);
+
if (p_use_multiview) {
glBindBufferBase(GL_UNIFORM_BUFFER, SKY_MULTIVIEW_UNIFORM_LOCATION, scene_state.multiview_buffer);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
@@ -1420,7 +1428,7 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const
#else
bool force_alpha = false;
#endif
- if (!force_alpha && (surf->flags & GeometryInstanceSurface::FLAG_PASS_OPAQUE)) {
+ if (!force_alpha && (surf->flags & (GeometryInstanceSurface::FLAG_PASS_DEPTH | GeometryInstanceSurface::FLAG_PASS_OPAQUE))) {
rl->add_element(surf);
}
if (force_alpha || (surf->flags & GeometryInstanceSurface::FLAG_PASS_ALPHA)) {
@@ -2247,7 +2255,6 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
bool glow_enabled = false;
if (p_environment.is_valid()) {
glow_enabled = environment_get_glow_enabled(p_environment);
- rb->ensure_internal_buffers(); // Ensure our intermediate buffer is available if glow is enabled
if (glow_enabled) {
// If glow is enabled, we apply tonemapping etc. in post, so disable it during rendering
apply_color_adjustments_in_post = true;
@@ -2339,7 +2346,6 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
if (render_data.environment.is_valid()) {
bool use_bcs = environment_get_adjustments_enabled(render_data.environment);
if (use_bcs) {
- rb->ensure_internal_buffers();
apply_color_adjustments_in_post = true;
}
@@ -2473,6 +2479,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
if (is_reflection_probe) {
fbo = GLES3::LightStorage::get_singleton()->reflection_probe_instance_get_framebuffer(render_data.reflection_probe, render_data.reflection_probe_pass);
} else {
+ rb->set_apply_color_adjustments_in_post(apply_color_adjustments_in_post);
fbo = rb->get_render_fbo();
}
@@ -2500,7 +2507,9 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
glColorMask(0, 0, 0, 0);
RasterizerGLES3::clear_depth(0.0);
glClear(GL_DEPTH_BUFFER_BIT);
- glDrawBuffers(0, nullptr);
+ // Some desktop GL implementations fall apart when using Multiview with GL_NONE.
+ GLuint db = p_camera_data->view_count > 1 ? GL_COLOR_ATTACHMENT0 : GL_NONE;
+ glDrawBuffers(1, &db);
uint64_t spec_constant = SceneShaderGLES3::DISABLE_FOG | SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL |
SceneShaderGLES3::DISABLE_LIGHTMAP | SceneShaderGLES3::DISABLE_LIGHT_OMNI |
@@ -2586,7 +2595,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
scene_state.enable_gl_depth_draw(false);
- if (draw_sky) {
+ if (draw_sky || draw_sky_fog_only) {
RENDER_TIMESTAMP("Render Sky");
scene_state.enable_gl_depth_test(true);
@@ -3201,6 +3210,10 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
if (lm->uses_spherical_harmonics) {
spec_constants |= SceneShaderGLES3::USE_SH_LIGHTMAP;
}
+
+ if (lightmap_bicubic_upscale) {
+ spec_constants |= SceneShaderGLES3::LIGHTMAP_BICUBIC_FILTER;
+ }
} else if (inst->lightmap_sh) {
spec_constants |= SceneShaderGLES3::USE_LIGHTMAP_CAPTURE;
} else {
@@ -3343,6 +3356,11 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
Vector4 uv_scale(inst->lightmap_uv_scale.position.x, inst->lightmap_uv_scale.position.y, inst->lightmap_uv_scale.size.x, inst->lightmap_uv_scale.size.y);
material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::LIGHTMAP_UV_SCALE, uv_scale, shader->version, instance_variant, spec_constants);
+ if (lightmap_bicubic_upscale) {
+ Vector2 light_texture_size(lm->light_texture_size.x, lm->light_texture_size.y);
+ material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::LIGHTMAP_TEXTURE_SIZE, light_texture_size, shader->version, instance_variant, spec_constants);
+ }
+
float exposure_normalization = 1.0;
if (p_render_data->camera_attributes.is_valid()) {
float enf = RSG::camera_attributes->camera_attributes_get_exposure_normalization_factor(p_render_data->camera_attributes);
@@ -4038,6 +4056,10 @@ void RasterizerSceneGLES3::decals_set_filter(RS::DecalFilter p_filter) {
void RasterizerSceneGLES3::light_projectors_set_filter(RS::LightProjectorFilter p_filter) {
}
+void RasterizerSceneGLES3::lightmaps_set_bicubic_filter(bool p_enable) {
+ lightmap_bicubic_upscale = p_enable;
+}
+
RasterizerSceneGLES3::RasterizerSceneGLES3() {
singleton = this;
@@ -4051,6 +4073,7 @@ RasterizerSceneGLES3::RasterizerSceneGLES3() {
positional_soft_shadow_filter_set_quality((RS::ShadowQuality)(int)GLOBAL_GET("rendering/lights_and_shadows/positional_shadow/soft_shadow_filter_quality"));
directional_soft_shadow_filter_set_quality((RS::ShadowQuality)(int)GLOBAL_GET("rendering/lights_and_shadows/directional_shadow/soft_shadow_filter_quality"));
+ lightmaps_set_bicubic_filter(GLOBAL_GET("rendering/lightmapping/lightmap_gi/use_bicubic_filter"));
{
// Setup Lights
diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h
index 4c70c43244..e4af8f99e9 100644
--- a/drivers/gles3/rasterizer_scene_gles3.h
+++ b/drivers/gles3/rasterizer_scene_gles3.h
@@ -680,6 +680,8 @@ protected:
bool glow_bicubic_upscale = false;
RS::EnvironmentSSRRoughnessQuality ssr_roughness_quality = RS::ENV_SSR_ROUGHNESS_QUALITY_LOW;
+ bool lightmap_bicubic_upscale = false;
+
/* Sky */
struct SkyGlobals {
@@ -863,6 +865,7 @@ public:
void decals_set_filter(RS::DecalFilter p_filter) override;
void light_projectors_set_filter(RS::LightProjectorFilter p_filter) override;
+ virtual void lightmaps_set_bicubic_filter(bool p_enable) override;
RasterizerSceneGLES3();
~RasterizerSceneGLES3();
diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp
index 4a15ed827a..5a0f394db0 100644
--- a/drivers/gles3/shader_gles3.cpp
+++ b/drivers/gles3/shader_gles3.cpp
@@ -698,7 +698,8 @@ void ShaderGLES3::_clear_version(Version *p_version) {
void ShaderGLES3::_initialize_version(Version *p_version) {
ERR_FAIL_COND(p_version->variants.size() > 0);
- if (shader_cache_dir_valid && _load_from_cache(p_version)) {
+ bool use_cache = shader_cache_dir_valid && !(feedback_count > 0 && GLES3::Config::get_singleton()->disable_transform_feedback_shader_cache);
+ if (use_cache && _load_from_cache(p_version)) {
return;
}
p_version->variants.reserve(variant_count);
@@ -709,7 +710,7 @@ void ShaderGLES3::_initialize_version(Version *p_version) {
_compile_specialization(spec, i, p_version, specialization_default_mask);
p_version->variants[i].insert(specialization_default_mask, spec);
}
- if (shader_cache_dir_valid) {
+ if (use_cache) {
_save_to_cache(p_version);
}
}
diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl
index 65332c06be..e358230747 100644
--- a/drivers/gles3/shaders/canvas.glsl
+++ b/drivers/gles3/shaders/canvas.glsl
@@ -239,13 +239,6 @@ void main() {
model_matrix = model_matrix * transpose(mat4(instance_xform0, instance_xform1, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)));
#endif // USE_INSTANCING
-#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE)
- if (bool(read_draw_data_flags & FLAGS_USING_PARTICLES)) {
- //scale by texture size
- vertex /= read_draw_data_color_texture_pixel_size;
- }
-#endif
-
vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size;
#ifdef USE_POINT_SIZE
@@ -346,14 +339,16 @@ uniform sampler2D color_texture; //texunit:0
layout(location = 0) out vec4 frag_color;
+/* clang-format off */
+// This needs to be outside clang-format so the ubo comment is in the right place
#ifdef MATERIAL_UNIFORMS_USED
-layout(std140) uniform MaterialUniforms{
-//ubo:4
+layout(std140) uniform MaterialUniforms{ //ubo:4
#MATERIAL_UNIFORMS
};
#endif
+/* clang-format on */
#GLOBALS
diff --git a/drivers/gles3/shaders/canvas_uniforms_inc.glsl b/drivers/gles3/shaders/canvas_uniforms_inc.glsl
index 21fd4d3d9d..f6ad2b730a 100644
--- a/drivers/gles3/shaders/canvas_uniforms_inc.glsl
+++ b/drivers/gles3/shaders/canvas_uniforms_inc.glsl
@@ -14,7 +14,6 @@
#define FLAGS_TRANSPOSE_RECT uint(1 << 10)
// (1 << 11) is for FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR in RD backends, unused here.
#define FLAGS_NINEPACH_DRAW_CENTER uint(1 << 12)
-#define FLAGS_USING_PARTICLES uint(1 << 13)
#define FLAGS_NINEPATCH_H_MODE_SHIFT 16
#define FLAGS_NINEPATCH_V_MODE_SHIFT 18
diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl
index be7a6aba57..6143ce2167 100644
--- a/drivers/gles3/shaders/scene.glsl
+++ b/drivers/gles3/shaders/scene.glsl
@@ -36,6 +36,7 @@ ADDITIVE_OMNI = false
ADDITIVE_SPOT = false
RENDER_MATERIAL = false
SECOND_REFLECTION_PROBE = false
+LIGHTMAP_BICUBIC_FILTER = false
#[vertex]
@@ -43,6 +44,7 @@ SECOND_REFLECTION_PROBE = false
#define M_PI 3.14159265359
#define SHADER_IS_SRGB true
+#define SHADER_SPACE_FAR -1.0
#include "stdlib_inc.glsl"
@@ -582,6 +584,9 @@ void main() {
/* clang-format on */
#define SHADER_IS_SRGB true
+#define SHADER_SPACE_FAR -1.0
+
+#define FLAGS_NON_UNIFORM_SCALE (1 << 4)
/* Varyings */
@@ -869,13 +874,15 @@ uniform lowp uint directional_shadow_index;
#if !defined(ADDITIVE_OMNI)
float sample_shadow(highp sampler2DShadow shadow, float shadow_pixel_size, vec4 pos) {
- float avg = textureProj(shadow, pos);
+ // Use textureProjLod with LOD set to 0.0 over textureProj, as textureProj not working correctly on ANGLE with Metal backend.
+ // https://github.com/godotengine/godot/issues/93537
+ float avg = textureProjLod(shadow, pos, 0.0);
#ifdef SHADOW_MODE_PCF_13
pos /= pos.w;
- avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size * 2.0, 0.0), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size * 2.0, 0.0), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size * 2.0), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size * 2.0), pos.zw));
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size * 2.0, 0.0), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size * 2.0, 0.0), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size * 2.0), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size * 2.0), pos.zw), 0.0);
// Early bail if distant samples are fully shaded (or none are shaded) to improve performance.
if (avg <= 0.000001) {
@@ -886,23 +893,23 @@ float sample_shadow(highp sampler2DShadow shadow, float shadow_pixel_size, vec4
return 1.0;
}
- avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, shadow_pixel_size), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, shadow_pixel_size), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, -shadow_pixel_size), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, -shadow_pixel_size), pos.zw));
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, shadow_pixel_size), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, shadow_pixel_size), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, -shadow_pixel_size), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, -shadow_pixel_size), pos.zw), 0.0);
return avg * (1.0 / 13.0);
#endif
#ifdef SHADOW_MODE_PCF_5
pos /= pos.w;
- avg += textureProj(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw));
- avg += textureProj(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw));
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(shadow_pixel_size, 0.0), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(-shadow_pixel_size, 0.0), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, shadow_pixel_size), pos.zw), 0.0);
+ avg += textureProjLod(shadow, vec4(pos.xy + vec2(0.0, -shadow_pixel_size), pos.zw), 0.0);
return avg * (1.0 / 5.0);
#endif
@@ -921,6 +928,10 @@ uniform lowp uint lightmap_slice;
uniform highp vec4 lightmap_uv_scale;
uniform float lightmap_exposure_normalization;
+#ifdef LIGHTMAP_BICUBIC_FILTER
+uniform highp vec2 lightmap_texture_size;
+#endif
+
#ifdef USE_SH_LIGHTMAP
uniform mediump mat3 lightmap_normal_xform;
#endif // USE_SH_LIGHTMAP
@@ -953,6 +964,7 @@ ivec2 multiview_uv(ivec2 uv) {
uniform highp mat4 world_transform;
uniform mediump float opaque_prepass_threshold;
+uniform highp uint model_flags;
#if defined(RENDER_MATERIAL)
layout(location = 0) out vec4 albedo_output_buffer;
@@ -1412,6 +1424,67 @@ void reflection_process(samplerCube reflection_map,
#endif // !MODE_RENDER_DEPTH
+#ifdef LIGHTMAP_BICUBIC_FILTER
+// w0, w1, w2, and w3 are the four cubic B-spline basis functions
+float w0(float a) {
+ return (1.0 / 6.0) * (a * (a * (-a + 3.0) - 3.0) + 1.0);
+}
+
+float w1(float a) {
+ return (1.0 / 6.0) * (a * a * (3.0 * a - 6.0) + 4.0);
+}
+
+float w2(float a) {
+ return (1.0 / 6.0) * (a * (a * (-3.0 * a + 3.0) + 3.0) + 1.0);
+}
+
+float w3(float a) {
+ return (1.0 / 6.0) * (a * a * a);
+}
+
+// g0 and g1 are the two amplitude functions
+float g0(float a) {
+ return w0(a) + w1(a);
+}
+
+float g1(float a) {
+ return w2(a) + w3(a);
+}
+
+// h0 and h1 are the two offset functions
+float h0(float a) {
+ return -1.0 + w1(a) / (w0(a) + w1(a));
+}
+
+float h1(float a) {
+ return 1.0 + w3(a) / (w2(a) + w3(a));
+}
+
+vec4 textureArray_bicubic(sampler2DArray tex, vec3 uv, vec2 texture_size) {
+ vec2 texel_size = vec2(1.0) / texture_size;
+
+ uv.xy = uv.xy * texture_size + vec2(0.5);
+
+ vec2 iuv = floor(uv.xy);
+ vec2 fuv = fract(uv.xy);
+
+ float g0x = g0(fuv.x);
+ float g1x = g1(fuv.x);
+ float h0x = h0(fuv.x);
+ float h1x = h1(fuv.x);
+ float h0y = h0(fuv.y);
+ float h1y = h1(fuv.y);
+
+ vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5)) * texel_size;
+ vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5)) * texel_size;
+ vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5)) * texel_size;
+ vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5)) * texel_size;
+
+ return (g0(fuv.y) * (g0x * texture(tex, vec3(p0, uv.z)) + g1x * texture(tex, vec3(p1, uv.z)))) +
+ (g1(fuv.y) * (g0x * texture(tex, vec3(p2, uv.z)) + g1x * texture(tex, vec3(p3, uv.z))));
+}
+#endif //LIGHTMAP_BICUBIC_FILTER
+
void main() {
//lay out everything, whatever is unused is optimized away anyway
vec3 vertex = vertex_interp;
@@ -1519,6 +1592,13 @@ void main() {
vec3 light_vertex = vertex;
#endif //LIGHT_VERTEX_USED
+ highp mat3 model_normal_matrix;
+ if (bool(model_flags & uint(FLAGS_NON_UNIFORM_SCALE))) {
+ model_normal_matrix = transpose(inverse(mat3(model_matrix)));
+ } else {
+ model_normal_matrix = mat3(model_matrix);
+ }
+
{
#CODE : FRAGMENT
}
@@ -1607,6 +1687,7 @@ void main() {
#ifdef BASE_PASS
/////////////////////// LIGHTING //////////////////////////////
+#ifndef AMBIENT_LIGHT_DISABLED
// IBL precalculations
float ndotv = clamp(dot(normal, view), 0.0, 1.0);
vec3 F = f0 + (max(vec3(1.0 - roughness), f0) - f0) * pow(1.0 - ndotv, 5.0);
@@ -1719,43 +1800,45 @@ void main() {
#ifdef USE_SH_LIGHTMAP
uvw.z *= 4.0; // SH textures use 4 times more data.
+
+#ifdef LIGHTMAP_BICUBIC_FILTER
+ vec3 lm_light_l0 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 0.0), lightmap_texture_size).rgb;
+ vec3 lm_light_l1n1 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 1.0), lightmap_texture_size).rgb;
+ vec3 lm_light_l1_0 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 2.0), lightmap_texture_size).rgb;
+ vec3 lm_light_l1p1 = textureArray_bicubic(lightmap_textures, uvw + vec3(0.0, 0.0, 3.0), lightmap_texture_size).rgb;
+#else
vec3 lm_light_l0 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 0.0), 0.0).rgb;
vec3 lm_light_l1n1 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 1.0), 0.0).rgb;
vec3 lm_light_l1_0 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb;
vec3 lm_light_l1p1 = textureLod(lightmap_textures, uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb;
+#endif
vec3 n = normalize(lightmap_normal_xform * normal);
- ambient_light += lm_light_l0 * 0.282095f;
- ambient_light += lm_light_l1n1 * 0.32573 * n.y * lightmap_exposure_normalization;
- ambient_light += lm_light_l1_0 * 0.32573 * n.z * lightmap_exposure_normalization;
- ambient_light += lm_light_l1p1 * 0.32573 * n.x * lightmap_exposure_normalization;
- if (metallic > 0.01) { // Since the more direct bounced light is lost, we can kind of fake it with this trick.
- vec3 r = reflect(normalize(-vertex), normal);
- specular_light += lm_light_l1n1 * 0.32573 * r.y * lightmap_exposure_normalization;
- specular_light += lm_light_l1_0 * 0.32573 * r.z * lightmap_exposure_normalization;
- specular_light += lm_light_l1p1 * 0.32573 * r.x * lightmap_exposure_normalization;
- }
+ ambient_light += lm_light_l0 * lightmap_exposure_normalization;
+ ambient_light += lm_light_l1n1 * n.y * lightmap_exposure_normalization;
+ ambient_light += lm_light_l1_0 * n.z * lightmap_exposure_normalization;
+ ambient_light += lm_light_l1p1 * n.x * lightmap_exposure_normalization;
+#else
+#ifdef LIGHTMAP_BICUBIC_FILTER
+ ambient_light += textureArray_bicubic(lightmap_textures, uvw, lightmap_texture_size).rgb * lightmap_exposure_normalization;
#else
ambient_light += textureLod(lightmap_textures, uvw, 0.0).rgb * lightmap_exposure_normalization;
#endif
+#endif
}
#endif // USE_LIGHTMAP
#endif // USE_LIGHTMAP_CAPTURE
#endif // !DISABLE_LIGHTMAP
- {
-#if defined(AMBIENT_LIGHT_DISABLED)
- ambient_light = vec3(0.0, 0.0, 0.0);
-#else
- ambient_light *= albedo.rgb;
- ambient_light *= ao;
-#endif // AMBIENT_LIGHT_DISABLED
- }
+ ambient_light *= albedo.rgb;
+ ambient_light *= ao;
+
+#endif // !AMBIENT_LIGHT_DISABLED
// convert ao to direct light ao
ao = mix(1.0, ao, ao_light_affect);
-
+#ifndef AMBIENT_LIGHT_DISABLED
{
#if defined(DIFFUSE_TOON)
//simplify for toon, as
@@ -1777,6 +1860,8 @@ void main() {
#endif
}
+#endif // !AMBIENT_LIGHT_DISABLED
+
#ifndef DISABLE_LIGHT_DIRECTIONAL
for (uint i = uint(0); i < scene_data.directional_light_count; i++) {
#if defined(USE_LIGHTMAP) && !defined(DISABLE_LIGHTMAP)
@@ -1869,7 +1954,7 @@ void main() {
alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0));
#if defined(ALPHA_SCISSOR_USED)
- if (alpha < alpha_scissor) {
+ if (alpha < alpha_scissor_threshold) {
discard;
}
#endif // !ALPHA_SCISSOR_USED
@@ -1920,11 +2005,7 @@ void main() {
fog.xy = unpackHalf2x16(fog_rg);
fog.zw = unpackHalf2x16(fog_ba);
-#ifndef DISABLE_FOG
- if (scene_data.fog_enabled) {
- frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a);
- }
-#endif // !DISABLE_FOG
+ frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a);
#endif // !FOG_DISABLED
// Tonemap before writing as we are writing to an sRGB framebuffer
@@ -2131,11 +2212,7 @@ void main() {
fog.xy = unpackHalf2x16(fog_rg);
fog.zw = unpackHalf2x16(fog_ba);
-#ifndef DISABLE_FOG
- if (scene_data.fog_enabled) {
- additive_light_color *= (1.0 - fog.a);
- }
-#endif // !DISABLE_FOG
+ additive_light_color *= (1.0 - fog.a);
#endif // !FOG_DISABLED
// Tonemap before writing as we are writing to an sRGB framebuffer
diff --git a/drivers/gles3/shaders/skeleton.glsl b/drivers/gles3/shaders/skeleton.glsl
index aad856a5a2..66befbc3b2 100644
--- a/drivers/gles3/shaders/skeleton.glsl
+++ b/drivers/gles3/shaders/skeleton.glsl
@@ -59,7 +59,7 @@ layout(location = 10) in highp uvec4 in_bone_attrib;
layout(location = 11) in mediump vec4 in_weight_attrib;
#endif
-uniform mediump sampler2D skeleton_texture; // texunit:0
+uniform highp sampler2D skeleton_texture; // texunit:0
#endif
/* clang-format on */
diff --git a/drivers/gles3/shaders/sky.glsl b/drivers/gles3/shaders/sky.glsl
index 9de65ba960..f734e4b355 100644
--- a/drivers/gles3/shaders/sky.glsl
+++ b/drivers/gles3/shaders/sky.glsl
@@ -108,11 +108,11 @@ uniform float sky_energy_multiplier;
uniform float luminance_multiplier;
uniform float fog_aerial_perspective;
-uniform vec3 fog_light_color;
+uniform vec4 fog_light_color;
uniform float fog_sun_scatter;
uniform bool fog_enabled;
uniform float fog_density;
-uniform float z_far;
+uniform float fog_sky_affect;
uniform uint directional_light_count;
#ifdef USE_MULTIVIEW
@@ -135,6 +135,24 @@ vec3 interleaved_gradient_noise(vec2 pos) {
}
#endif
+#if !defined(DISABLE_FOG)
+vec4 fog_process(vec3 view, vec3 sky_color) {
+ vec3 fog_color = mix(fog_light_color.rgb, sky_color, fog_aerial_perspective);
+
+ if (fog_sun_scatter > 0.001) {
+ vec4 sun_scatter = vec4(0.0);
+ float sun_total = 0.0;
+ for (uint i = 0u; i < directional_light_count; i++) {
+ vec3 light_color = directional_lights.data[i].color_size.xyz * directional_lights.data[i].direction_energy.w;
+ float light_amount = pow(max(dot(view, directional_lights.data[i].direction_energy.xyz), 0.0), 8.0);
+ fog_color += light_color * light_amount * fog_sun_scatter;
+ }
+ }
+
+ return vec4(fog_color, 1.0);
+}
+#endif // !DISABLE_FOG
+
void main() {
vec3 cube_normal;
#ifdef USE_MULTIVIEW
@@ -203,6 +221,21 @@ void main() {
// Convert to Linear for tonemapping so color matches scene shader better
color = srgb_to_linear(color);
+
+#if !defined(DISABLE_FOG) && !defined(USE_CUBEMAP_PASS)
+
+ // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky.
+ if (fog_enabled) {
+ vec4 fog = fog_process(cube_normal, color.rgb);
+ color.rgb = mix(color.rgb, fog.rgb, fog.a * fog_sky_affect);
+ }
+
+ if (custom_fog.a > 0.0) {
+ color.rgb = mix(color.rgb, custom_fog.rgb, custom_fog.a);
+ }
+
+#endif // DISABLE_FOG
+
color *= exposure;
#ifdef APPLY_TONEMAPPING
color = apply_tonemapping(color, white);
diff --git a/drivers/gles3/shaders/stdlib_inc.glsl b/drivers/gles3/shaders/stdlib_inc.glsl
index 029084c34c..f88c218506 100644
--- a/drivers/gles3/shaders/stdlib_inc.glsl
+++ b/drivers/gles3/shaders/stdlib_inc.glsl
@@ -9,19 +9,17 @@
// Floating point pack/unpack functions are part of the GLSL ES 300 specification used by web and mobile.
uint float2half(uint f) {
- uint e = f & uint(0x7f800000);
- if (e <= uint(0x38000000)) {
- return uint(0);
- } else {
- return ((f >> uint(16)) & uint(0x8000)) |
- (((e - uint(0x38000000)) >> uint(13)) & uint(0x7c00)) |
- ((f >> uint(13)) & uint(0x03ff));
- }
+ uint b = f + uint(0x00001000);
+ uint e = (b & uint(0x7F800000)) >> 23;
+ uint m = b & uint(0x007FFFFF);
+ return (b & uint(0x80000000)) >> uint(16) | uint(e > uint(112)) * ((((e - uint(112)) << uint(10)) & uint(0x7C00)) | m >> uint(13)) | (uint(e < uint(113)) & uint(e > uint(101))) * ((((uint(0x007FF000) + m) >> (uint(125) - e)) + uint(1)) >> uint(1)) | uint(e > uint(143)) * uint(0x7FFF);
}
uint half2float(uint h) {
- uint h_e = h & uint(0x7c00);
- return ((h & uint(0x8000)) << uint(16)) | uint((h_e >> uint(10)) != uint(0)) * (((h_e + uint(0x1c000)) << uint(13)) | ((h & uint(0x03ff)) << uint(13)));
+ uint e = (h & uint(0x7C00)) >> uint(10);
+ uint m = (h & uint(0x03FF)) << uint(13);
+ uint v = m >> uint(23);
+ return (h & uint(0x8000)) << uint(16) | uint(e != uint(0)) * ((e + uint(112)) << uint(23) | m) | (uint(e == uint(0)) & uint(m != uint(0))) * ((v - uint(37)) << uint(23) | ((m << (uint(150) - v)) & uint(0x007FE000)));
}
uint godot_packHalf2x16(vec2 v) {
diff --git a/drivers/gles3/storage/config.cpp b/drivers/gles3/storage/config.cpp
index 1a14902c7c..2b3c19dbb8 100644
--- a/drivers/gles3/storage/config.cpp
+++ b/drivers/gles3/storage/config.cpp
@@ -35,6 +35,10 @@
#include "../rasterizer_gles3.h"
#include "texture_storage.h"
+#ifdef WEB_ENABLED
+#include <emscripten/html5_webgl.h>
+#endif
+
using namespace GLES3;
#define _GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF
@@ -44,6 +48,23 @@ Config *Config::singleton = nullptr;
Config::Config() {
singleton = this;
+#ifdef WEB_ENABLED
+ // Starting with Emscripten 3.1.51, glGetStringi(GL_EXTENSIONS, i) will only ever return
+ // a fixed list of extensions, regardless of what additional extensions are enabled. This
+ // isn't very useful for us in determining which extensions we can rely on here. So, instead
+ // we use emscripten_webgl_get_supported_extensions() to get all supported extensions, which
+ // is what Emscripten 3.1.50 and earlier do.
+ {
+ char *extension_array_string = emscripten_webgl_get_supported_extensions();
+ PackedStringArray extension_array = String((const char *)extension_array_string).split(" ");
+ extensions.reserve(extension_array.size() * 2);
+ for (const String &s : extension_array) {
+ extensions.insert(s);
+ extensions.insert("GL_" + s);
+ }
+ free(extension_array_string);
+ }
+#else
{
GLint max_extensions = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &max_extensions);
@@ -55,6 +76,7 @@ Config::Config() {
extensions.insert((const char *)s);
}
}
+#endif
bptc_supported = extensions.has("GL_ARB_texture_compression_bptc") || extensions.has("EXT_texture_compression_bptc");
astc_supported = extensions.has("GL_KHR_texture_compression_astc") || extensions.has("GL_OES_texture_compression_astc") || extensions.has("GL_KHR_texture_compression_astc_ldr") || extensions.has("GL_KHR_texture_compression_astc_hdr");
@@ -196,6 +218,8 @@ Config::Config() {
//https://github.com/godotengine/godot/issues/92662#issuecomment-2161199477
//disable_particles_workaround = false;
}
+ } else if (rendering_device_name == "PowerVR Rogue GE8320") {
+ disable_transform_feedback_shader_cache = true;
}
}
diff --git a/drivers/gles3/storage/config.h b/drivers/gles3/storage/config.h
index 0c9f9bc275..ff72fc5b58 100644
--- a/drivers/gles3/storage/config.h
+++ b/drivers/gles3/storage/config.h
@@ -96,6 +96,9 @@ public:
bool disable_particles_workaround = false; // set to 'true' to disable 'GPUParticles'
bool flip_xy_workaround = false;
+ // PowerVR GE 8320 workaround
+ bool disable_transform_feedback_shader_cache = false;
+
#ifdef ANDROID_ENABLED
PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC eglFramebufferTextureMultiviewOVR = nullptr;
PFNGLTEXSTORAGE3DMULTISAMPLEPROC eglTexStorage3DMultisample = nullptr;
diff --git a/drivers/gles3/storage/light_storage.cpp b/drivers/gles3/storage/light_storage.cpp
index f9547502f4..aab1aadf02 100644
--- a/drivers/gles3/storage/light_storage.cpp
+++ b/drivers/gles3/storage/light_storage.cpp
@@ -1046,6 +1046,9 @@ void LightStorage::lightmap_set_textures(RID p_lightmap, RID p_light, bool p_use
lightmap->light_texture = p_light;
lightmap->uses_spherical_harmonics = p_uses_spherical_haromics;
+ Vector3i light_texture_size = GLES3::TextureStorage::get_singleton()->texture_get_size(lightmap->light_texture);
+ lightmap->light_texture_size = Vector2i(light_texture_size.x, light_texture_size.y);
+
GLuint tex = GLES3::TextureStorage::get_singleton()->texture_get_texid(lightmap->light_texture);
glBindTexture(GL_TEXTURE_2D_ARRAY, tex);
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
diff --git a/drivers/gles3/storage/light_storage.h b/drivers/gles3/storage/light_storage.h
index b6e64c9492..ed00dd235f 100644
--- a/drivers/gles3/storage/light_storage.h
+++ b/drivers/gles3/storage/light_storage.h
@@ -180,6 +180,7 @@ struct Lightmap {
bool interior = false;
AABB bounds = AABB(Vector3(), Vector3(1, 1, 1));
float baked_exposure = 1.0;
+ Vector2i light_texture_size;
int32_t array_index = -1; //unassigned
PackedVector3Array points;
PackedColorArray point_sh;
@@ -202,7 +203,7 @@ struct LightmapInstance {
class LightStorage : public RendererLightStorage {
public:
- enum ShadowAtlastQuadrant {
+ enum ShadowAtlastQuadrant : uint32_t {
QUADRANT_SHIFT = 27,
OMNI_LIGHT_FLAG = 1 << 26,
SHADOW_INDEX_MASK = OMNI_LIGHT_FLAG - 1,
diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp
index bacf607c66..a37eba3b15 100644
--- a/drivers/gles3/storage/material_storage.cpp
+++ b/drivers/gles3/storage/material_storage.cpp
@@ -586,11 +586,7 @@ void ShaderData::get_shader_uniform_list(List<PropertyInfo> *p_param_list) const
if (E.value.scope != ShaderLanguage::ShaderNode::Uniform::SCOPE_LOCAL) {
continue;
}
- if (E.value.texture_order >= 0) {
- filtered_uniforms.push_back(Pair<StringName, int>(E.key, E.value.texture_order + 100000));
- } else {
- filtered_uniforms.push_back(Pair<StringName, int>(E.key, E.value.order));
- }
+ filtered_uniforms.push_back(Pair<StringName, int>(E.key, E.value.prop_order));
}
int uniform_count = filtered_uniforms.size();
sorter.sort(filtered_uniforms.ptr(), uniform_count);
@@ -640,7 +636,7 @@ bool ShaderData::is_parameter_texture(const StringName &p_param) const {
return false;
}
- return uniforms[p_param].texture_order >= 0;
+ return uniforms[p_param].is_texture();
}
///////////////////////////////////////////////////////////////////////////
@@ -719,7 +715,7 @@ void MaterialData::update_uniform_buffer(const HashMap<StringName, ShaderLanguag
bool uses_global_buffer = false;
for (const KeyValue<StringName, ShaderLanguage::ShaderNode::Uniform> &E : p_uniforms) {
- if (E.value.order < 0) {
+ if (E.value.is_texture()) {
continue; // texture, does not go here
}
@@ -874,7 +870,8 @@ void MaterialData::update_textures(const HashMap<StringName, Variant> &p_paramet
if (V->value.is_array()) {
Array array = (Array)V->value;
if (uniform_array_size > 0) {
- for (int j = 0; j < array.size(); j++) {
+ int size = MIN(uniform_array_size, array.size());
+ for (int j = 0; j < size; j++) {
textures.push_back(array[j]);
}
} else {
@@ -1111,15 +1108,15 @@ MaterialStorage::MaterialStorage() {
global_shader_uniforms.buffer_size = MAX(16, (int)GLOBAL_GET("rendering/limits/global_shader_variables/buffer_size"));
if (global_shader_uniforms.buffer_size * sizeof(GlobalShaderUniforms::Value) > uint32_t(Config::get_singleton()->max_uniform_buffer_size)) {
+ // Limit to maximum support UBO size.
global_shader_uniforms.buffer_size = uint32_t(Config::get_singleton()->max_uniform_buffer_size) / sizeof(GlobalShaderUniforms::Value);
- WARN_PRINT("Project setting \"rendering/limits/global_shader_variables/buffer_size\" exceeds maximum uniform buffer size of: " + itos(Config::get_singleton()->max_uniform_buffer_size / sizeof(GlobalShaderUniforms::Value)) + ". Falling back on maximum buffer size.");
}
global_shader_uniforms.buffer_values = memnew_arr(GlobalShaderUniforms::Value, global_shader_uniforms.buffer_size);
memset(global_shader_uniforms.buffer_values, 0, sizeof(GlobalShaderUniforms::Value) * global_shader_uniforms.buffer_size);
global_shader_uniforms.buffer_usage = memnew_arr(GlobalShaderUniforms::ValueUsage, global_shader_uniforms.buffer_size);
- global_shader_uniforms.buffer_dirty_regions = memnew_arr(bool, global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE);
- memset(global_shader_uniforms.buffer_dirty_regions, 0, sizeof(bool) * global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE);
+ global_shader_uniforms.buffer_dirty_regions = memnew_arr(bool, 1 + (global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE));
+ memset(global_shader_uniforms.buffer_dirty_regions, 0, sizeof(bool) * (1 + (global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE)));
glGenBuffers(1, &global_shader_uniforms.buffer);
glBindBuffer(GL_UNIFORM_BUFFER, global_shader_uniforms.buffer);
glBufferData(GL_UNIFORM_BUFFER, sizeof(GlobalShaderUniforms::Value) * global_shader_uniforms.buffer_size, nullptr, GL_DYNAMIC_DRAW);
@@ -1276,11 +1273,12 @@ MaterialStorage::MaterialStorage() {
actions.renames["CUSTOM2"] = "custom2_attrib";
actions.renames["CUSTOM3"] = "custom3_attrib";
actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB";
+ actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR";
actions.renames["LIGHT_VERTEX"] = "light_vertex";
actions.renames["NODE_POSITION_WORLD"] = "model_matrix[3].xyz";
actions.renames["CAMERA_POSITION_WORLD"] = "scene_data.inv_view_matrix[3].xyz";
- actions.renames["CAMERA_DIRECTION_WORLD"] = "scene_data.view_matrix[3].xyz";
+ actions.renames["CAMERA_DIRECTION_WORLD"] = "scene_data.inv_view_matrix[2].xyz";
actions.renames["CAMERA_VISIBLE_LAYERS"] = "scene_data.camera_visible_layers";
actions.renames["NODE_POSITION_VIEW"] = "(scene_data.view_matrix * model_matrix)[3].xyz";
@@ -1788,7 +1786,7 @@ void MaterialStorage::global_shader_parameter_add(const StringName &p_name, RS::
//is vector, allocate in buffer and update index
gv.buffer_index = _global_shader_uniform_allocate(gv.buffer_elements);
- ERR_FAIL_COND_MSG(gv.buffer_index < 0, vformat("Failed allocating global variable '%s' out of buffer memory. Consider increasing it in the Project Settings.", String(p_name)));
+ ERR_FAIL_COND_MSG(gv.buffer_index < 0, vformat("Failed allocating global variable '%s' out of buffer memory. Consider increasing rendering/limits/global_shader_variables/buffer_size in the Project Settings. Maximum items supported by this hardware is: %d.", String(p_name), Config::get_singleton()->max_uniform_buffer_size / sizeof(GlobalShaderUniforms::Value)));
global_shader_uniforms.buffer_usage[gv.buffer_index].elements = gv.buffer_elements;
_global_shader_uniform_store_in_buffer(gv.buffer_index, gv.type, gv.value);
_global_shader_uniform_mark_buffer_dirty(gv.buffer_index, gv.buffer_elements);
@@ -1999,7 +1997,7 @@ int32_t MaterialStorage::global_shader_parameters_instance_allocate(RID p_instan
ERR_FAIL_COND_V(global_shader_uniforms.instance_buffer_pos.has(p_instance), -1);
int32_t pos = _global_shader_uniform_allocate(ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES);
global_shader_uniforms.instance_buffer_pos[p_instance] = pos; //save anyway
- ERR_FAIL_COND_V_MSG(pos < 0, -1, "Too many instances using shader instance variables. Increase buffer size in Project Settings.");
+ ERR_FAIL_COND_V_MSG(pos < 0, -1, vformat("Too many instances using shader instance variables. Consider increasing rendering/limits/global_shader_variables/buffer_size in the Project Settings. Maximum items supported by this hardware is: %d.", Config::get_singleton()->max_uniform_buffer_size / sizeof(GlobalShaderUniforms::Value)));
global_shader_uniforms.buffer_usage[pos].elements = ShaderLanguage::MAX_INSTANCE_UNIFORM_INDICES;
return pos;
}
@@ -2079,7 +2077,7 @@ void MaterialStorage::global_shader_parameters_instance_update(RID p_instance, i
void MaterialStorage::_update_global_shader_uniforms() {
MaterialStorage *material_storage = MaterialStorage::get_singleton();
if (global_shader_uniforms.buffer_dirty_region_count > 0) {
- uint32_t total_regions = global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE;
+ uint32_t total_regions = 1 + (global_shader_uniforms.buffer_size / GlobalShaderUniforms::BUFFER_DIRTY_REGION_SIZE);
if (total_regions / global_shader_uniforms.buffer_dirty_region_count <= 4) {
// 25% of regions dirty, just update all buffer
glBindBuffer(GL_UNIFORM_BUFFER, global_shader_uniforms.buffer);
diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp
index d8a5b960b8..b55a2e0a8a 100644
--- a/drivers/gles3/storage/mesh_storage.cpp
+++ b/drivers/gles3/storage/mesh_storage.cpp
@@ -301,7 +301,7 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface)
Vector<uint8_t> ir = new_surface.index_data;
wr = wf_indices.ptrw();
- if (new_surface.vertex_count < (1 << 16)) {
+ if (new_surface.vertex_count <= 65536) {
// Read 16 bit indices.
const uint16_t *src_idx = (const uint16_t *)ir.ptr();
for (uint32_t i = 0; i + 5 < wf_index_count; i += 6) {
@@ -1432,15 +1432,17 @@ void MeshStorage::update_mesh_instances() {
/* MULTIMESH API */
-RID MeshStorage::multimesh_allocate() {
+RID MeshStorage::_multimesh_allocate() {
return multimesh_owner.allocate_rid();
}
-void MeshStorage::multimesh_initialize(RID p_rid) {
+void MeshStorage::_multimesh_initialize(RID p_rid) {
multimesh_owner.initialize_rid(p_rid, MultiMesh());
}
-void MeshStorage::multimesh_free(RID p_rid) {
+void MeshStorage::_multimesh_free(RID p_rid) {
+ // Remove from interpolator.
+ _interpolation_data.notify_free_multimesh(p_rid);
_update_dirty_multimeshes();
multimesh_allocate_data(p_rid, 0, RS::MULTIMESH_TRANSFORM_2D);
MultiMesh *multimesh = multimesh_owner.get_or_null(p_rid);
@@ -1448,7 +1450,7 @@ void MeshStorage::multimesh_free(RID p_rid) {
multimesh_owner.free(p_rid);
}
-void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors, bool p_use_custom_data) {
+void MeshStorage::_multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors, bool p_use_custom_data) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
@@ -1495,13 +1497,13 @@ void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS::
multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_MULTIMESH);
}
-int MeshStorage::multimesh_get_instance_count(RID p_multimesh) const {
+int MeshStorage::_multimesh_get_instance_count(RID p_multimesh) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, 0);
return multimesh->instances;
}
-void MeshStorage::multimesh_set_mesh(RID p_multimesh, RID p_mesh) {
+void MeshStorage::_multimesh_set_mesh(RID p_multimesh, RID p_mesh) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
if (multimesh->mesh == p_mesh || p_mesh.is_null()) {
@@ -1651,7 +1653,7 @@ void MeshStorage::_multimesh_re_create_aabb(MultiMesh *multimesh, const float *p
multimesh->aabb = aabb;
}
-void MeshStorage::multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) {
+void MeshStorage::_multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
ERR_FAIL_INDEX(p_index, multimesh->instances);
@@ -1681,7 +1683,7 @@ void MeshStorage::multimesh_instance_set_transform(RID p_multimesh, int p_index,
_multimesh_mark_dirty(multimesh, p_index, true);
}
-void MeshStorage::multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) {
+void MeshStorage::_multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
ERR_FAIL_INDEX(p_index, multimesh->instances);
@@ -1707,7 +1709,7 @@ void MeshStorage::multimesh_instance_set_transform_2d(RID p_multimesh, int p_ind
_multimesh_mark_dirty(multimesh, p_index, true);
}
-void MeshStorage::multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) {
+void MeshStorage::_multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
ERR_FAIL_INDEX(p_index, multimesh->instances);
@@ -1727,7 +1729,7 @@ void MeshStorage::multimesh_instance_set_color(RID p_multimesh, int p_index, con
_multimesh_mark_dirty(multimesh, p_index, false);
}
-void MeshStorage::multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) {
+void MeshStorage::_multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
ERR_FAIL_INDEX(p_index, multimesh->instances);
@@ -1746,27 +1748,27 @@ void MeshStorage::multimesh_instance_set_custom_data(RID p_multimesh, int p_inde
_multimesh_mark_dirty(multimesh, p_index, false);
}
-RID MeshStorage::multimesh_get_mesh(RID p_multimesh) const {
+RID MeshStorage::_multimesh_get_mesh(RID p_multimesh) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, RID());
return multimesh->mesh;
}
-void MeshStorage::multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) {
+void MeshStorage::_multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
multimesh->custom_aabb = p_aabb;
multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB);
}
-AABB MeshStorage::multimesh_get_custom_aabb(RID p_multimesh) const {
+AABB MeshStorage::_multimesh_get_custom_aabb(RID p_multimesh) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, AABB());
return multimesh->custom_aabb;
}
-AABB MeshStorage::multimesh_get_aabb(RID p_multimesh) const {
+AABB MeshStorage::_multimesh_get_aabb(RID p_multimesh) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, AABB());
if (multimesh->custom_aabb != AABB()) {
@@ -1778,7 +1780,7 @@ AABB MeshStorage::multimesh_get_aabb(RID p_multimesh) const {
return multimesh->aabb;
}
-Transform3D MeshStorage::multimesh_instance_get_transform(RID p_multimesh, int p_index) const {
+Transform3D MeshStorage::_multimesh_instance_get_transform(RID p_multimesh, int p_index) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, Transform3D());
ERR_FAIL_INDEX_V(p_index, multimesh->instances, Transform3D());
@@ -1809,7 +1811,7 @@ Transform3D MeshStorage::multimesh_instance_get_transform(RID p_multimesh, int p
return t;
}
-Transform2D MeshStorage::multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const {
+Transform2D MeshStorage::_multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, Transform2D());
ERR_FAIL_INDEX_V(p_index, multimesh->instances, Transform2D());
@@ -1834,7 +1836,7 @@ Transform2D MeshStorage::multimesh_instance_get_transform_2d(RID p_multimesh, in
return t;
}
-Color MeshStorage::multimesh_instance_get_color(RID p_multimesh, int p_index) const {
+Color MeshStorage::_multimesh_instance_get_color(RID p_multimesh, int p_index) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, Color());
ERR_FAIL_INDEX_V(p_index, multimesh->instances, Color());
@@ -1858,7 +1860,7 @@ Color MeshStorage::multimesh_instance_get_color(RID p_multimesh, int p_index) co
return c;
}
-Color MeshStorage::multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const {
+Color MeshStorage::_multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, Color());
ERR_FAIL_INDEX_V(p_index, multimesh->instances, Color());
@@ -1882,7 +1884,7 @@ Color MeshStorage::multimesh_instance_get_custom_data(RID p_multimesh, int p_ind
return c;
}
-void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) {
+void MeshStorage::_multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
@@ -1971,7 +1973,7 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_b
}
}
-Vector<float> MeshStorage::multimesh_get_buffer(RID p_multimesh) const {
+Vector<float> MeshStorage::_multimesh_get_buffer(RID p_multimesh) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, Vector<float>());
Vector<float> ret;
@@ -2043,7 +2045,7 @@ Vector<float> MeshStorage::multimesh_get_buffer(RID p_multimesh) const {
}
}
-void MeshStorage::multimesh_set_visible_instances(RID p_multimesh, int p_visible) {
+void MeshStorage::_multimesh_set_visible_instances(RID p_multimesh, int p_visible) {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL(multimesh);
ERR_FAIL_COND(p_visible < -1 || p_visible > multimesh->instances);
@@ -2065,12 +2067,19 @@ void MeshStorage::multimesh_set_visible_instances(RID p_multimesh, int p_visible
multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_MULTIMESH_VISIBLE_INSTANCES);
}
-int MeshStorage::multimesh_get_visible_instances(RID p_multimesh) const {
+int MeshStorage::_multimesh_get_visible_instances(RID p_multimesh) const {
MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
ERR_FAIL_NULL_V(multimesh, 0);
return multimesh->visible_instances;
}
+MeshStorage::MultiMeshInterpolator *MeshStorage::_multimesh_get_interpolator(RID p_multimesh) const {
+ MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
+ ERR_FAIL_NULL_V_MSG(multimesh, nullptr, "Multimesh not found: " + itos(p_multimesh.get_id()));
+
+ return &multimesh->interpolator;
+}
+
void MeshStorage::_update_dirty_multimeshes() {
while (multimesh_dirty_list) {
MultiMesh *multimesh = multimesh_dirty_list;
diff --git a/drivers/gles3/storage/mesh_storage.h b/drivers/gles3/storage/mesh_storage.h
index d246e7725c..a2edbb9c48 100644
--- a/drivers/gles3/storage/mesh_storage.h
+++ b/drivers/gles3/storage/mesh_storage.h
@@ -205,6 +205,8 @@ struct MultiMesh {
bool dirty = false;
MultiMesh *dirty_list = nullptr;
+ RendererMeshStorage::MultiMeshInterpolator interpolator;
+
Dependency dependency;
};
@@ -493,32 +495,34 @@ public:
MultiMesh *get_multimesh(RID p_rid) { return multimesh_owner.get_or_null(p_rid); };
bool owns_multimesh(RID p_rid) { return multimesh_owner.owns(p_rid); };
- virtual RID multimesh_allocate() override;
- virtual void multimesh_initialize(RID p_rid) override;
- virtual void multimesh_free(RID p_rid) override;
- virtual void multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors = false, bool p_use_custom_data = false) override;
- virtual int multimesh_get_instance_count(RID p_multimesh) const override;
-
- virtual void multimesh_set_mesh(RID p_multimesh, RID p_mesh) override;
- virtual void multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) override;
- virtual void multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) override;
- virtual void multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) override;
- virtual void multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) override;
-
- virtual RID multimesh_get_mesh(RID p_multimesh) const override;
- virtual void multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) override;
- virtual AABB multimesh_get_custom_aabb(RID p_multimesh) const override;
- virtual AABB multimesh_get_aabb(RID p_multimesh) const override;
-
- virtual Transform3D multimesh_instance_get_transform(RID p_multimesh, int p_index) const override;
- virtual Transform2D multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const override;
- virtual Color multimesh_instance_get_color(RID p_multimesh, int p_index) const override;
- virtual Color multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const override;
- virtual void multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) override;
- virtual Vector<float> multimesh_get_buffer(RID p_multimesh) const override;
-
- virtual void multimesh_set_visible_instances(RID p_multimesh, int p_visible) override;
- virtual int multimesh_get_visible_instances(RID p_multimesh) const override;
+ virtual RID _multimesh_allocate() override;
+ virtual void _multimesh_initialize(RID p_rid) override;
+ virtual void _multimesh_free(RID p_rid) override;
+ virtual void _multimesh_allocate_data(RID p_multimesh, int p_instances, RS::MultimeshTransformFormat p_transform_format, bool p_use_colors = false, bool p_use_custom_data = false) override;
+ virtual int _multimesh_get_instance_count(RID p_multimesh) const override;
+
+ virtual void _multimesh_set_mesh(RID p_multimesh, RID p_mesh) override;
+ virtual void _multimesh_instance_set_transform(RID p_multimesh, int p_index, const Transform3D &p_transform) override;
+ virtual void _multimesh_instance_set_transform_2d(RID p_multimesh, int p_index, const Transform2D &p_transform) override;
+ virtual void _multimesh_instance_set_color(RID p_multimesh, int p_index, const Color &p_color) override;
+ virtual void _multimesh_instance_set_custom_data(RID p_multimesh, int p_index, const Color &p_color) override;
+
+ virtual RID _multimesh_get_mesh(RID p_multimesh) const override;
+ virtual void _multimesh_set_custom_aabb(RID p_multimesh, const AABB &p_aabb) override;
+ virtual AABB _multimesh_get_custom_aabb(RID p_multimesh) const override;
+ virtual AABB _multimesh_get_aabb(RID p_multimesh) const override;
+
+ virtual Transform3D _multimesh_instance_get_transform(RID p_multimesh, int p_index) const override;
+ virtual Transform2D _multimesh_instance_get_transform_2d(RID p_multimesh, int p_index) const override;
+ virtual Color _multimesh_instance_get_color(RID p_multimesh, int p_index) const override;
+ virtual Color _multimesh_instance_get_custom_data(RID p_multimesh, int p_index) const override;
+ virtual void _multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) override;
+ virtual Vector<float> _multimesh_get_buffer(RID p_multimesh) const override;
+
+ virtual void _multimesh_set_visible_instances(RID p_multimesh, int p_visible) override;
+ virtual int _multimesh_get_visible_instances(RID p_multimesh) const override;
+
+ virtual MultiMeshInterpolator *_multimesh_get_interpolator(RID p_multimesh) const override;
void _update_dirty_multimeshes();
diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.cpp b/drivers/gles3/storage/render_scene_buffers_gles3.cpp
index e4f1a01f68..c91547d2b1 100644
--- a/drivers/gles3/storage/render_scene_buffers_gles3.cpp
+++ b/drivers/gles3/storage/render_scene_buffers_gles3.cpp
@@ -194,7 +194,7 @@ void RenderSceneBuffersGLES3::_check_render_buffers() {
ERR_FAIL_COND(view_count == 0);
- bool use_internal_buffer = scaling_3d_mode != RS::VIEWPORT_SCALING_3D_MODE_OFF || needs_internal_buffers;
+ bool use_internal_buffer = scaling_3d_mode != RS::VIEWPORT_SCALING_3D_MODE_OFF || apply_color_adjustments_in_post;
uint32_t depth_format_size = 3;
bool use_multiview = view_count > 1;
@@ -558,8 +558,8 @@ void RenderSceneBuffersGLES3::_clear_back_buffers() {
}
}
-void RenderSceneBuffersGLES3::ensure_internal_buffers() {
- needs_internal_buffers = true;
+void RenderSceneBuffersGLES3::set_apply_color_adjustments_in_post(bool p_apply_in_post) {
+ apply_color_adjustments_in_post = p_apply_in_post;
}
void RenderSceneBuffersGLES3::check_glow_buffers() {
diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h
index 8273c18b8e..a7a676ad33 100644
--- a/drivers/gles3/storage/render_scene_buffers_gles3.h
+++ b/drivers/gles3/storage/render_scene_buffers_gles3.h
@@ -50,7 +50,7 @@ public:
//bool use_taa = false;
//bool use_debanding = false;
uint32_t view_count = 1;
- bool needs_internal_buffers = false;
+ bool apply_color_adjustments_in_post = false;
RID render_target;
@@ -106,12 +106,12 @@ public:
virtual void set_fsr_sharpness(float p_fsr_sharpness) override{};
virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override{};
virtual void set_use_debanding(bool p_use_debanding) override{};
+ void set_apply_color_adjustments_in_post(bool p_apply_in_post);
void free_render_buffer_data();
void check_backbuffer(bool p_need_color, bool p_need_depth); // Check if we need to initialize our backbuffer.
void check_glow_buffers(); // Check if we need to initialize our glow buffers.
- void ensure_internal_buffers();
GLuint get_render_fbo();
GLuint get_msaa3d_fbo() {
diff --git a/drivers/gles3/storage/texture_storage.cpp b/drivers/gles3/storage/texture_storage.cpp
index 2dcf623995..36393dde86 100644
--- a/drivers/gles3/storage/texture_storage.cpp
+++ b/drivers/gles3/storage/texture_storage.cpp
@@ -1030,10 +1030,8 @@ Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const {
if (texture->compressed) {
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glGetCompressedTexImage(texture->target, i, &w[ofs]);
-
} else {
glPixelStorei(GL_PACK_ALIGNMENT, 1);
-
glGetTexImage(texture->target, i, texture->gl_format_cache, texture->gl_type_cache, &w[ofs]);
}
}
@@ -1391,8 +1389,22 @@ void TextureStorage::texture_debug_usage(List<RS::TextureInfo> *r_info) {
tinfo.format = t->format;
tinfo.width = t->alloc_width;
tinfo.height = t->alloc_height;
- tinfo.depth = t->depth;
tinfo.bytes = t->total_data_size;
+
+ switch (t->type) {
+ case Texture::TYPE_3D:
+ tinfo.depth = t->depth;
+ break;
+
+ case Texture::TYPE_LAYERED:
+ tinfo.depth = t->layers;
+ break;
+
+ default:
+ tinfo.depth = 0;
+ break;
+ }
+
r_info->push_back(tinfo);
}
}
@@ -1493,17 +1505,15 @@ void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image,
int tsize = 0;
for (int i = 0; i < mipmaps; i++) {
- int size, ofs;
+ int64_t size, ofs;
img->get_mipmap_offset_and_size(i, ofs, size);
if (compressed) {
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
if (texture->target == GL_TEXTURE_2D_ARRAY) {
if (p_initialize) {
- glCompressedTexImage3D(GL_TEXTURE_2D_ARRAY, i, internal_format, w, h, texture->layers, 0,
- size * texture->layers, &read[ofs]);
- } else {
- glCompressedTexSubImage3D(GL_TEXTURE_2D_ARRAY, i, 0, 0, p_layer, w, h, 1, internal_format, size, &read[ofs]);
+ glCompressedTexImage3D(GL_TEXTURE_2D_ARRAY, i, internal_format, w, h, texture->layers, 0, size * texture->layers, nullptr);
}
+ glCompressedTexSubImage3D(GL_TEXTURE_2D_ARRAY, i, 0, 0, p_layer, w, h, 1, internal_format, size, &read[ofs]);
} else {
glCompressedTexImage2D(blit_target, i, internal_format, w, h, 0, size, &read[ofs]);
}
@@ -1525,7 +1535,11 @@ void TextureStorage::_texture_set_data(RID p_texture, const Ref<Image> &p_image,
h = MAX(1, h >> 1);
}
- texture->total_data_size = tsize;
+ if (texture->target == GL_TEXTURE_CUBE_MAP || texture->target == GL_TEXTURE_2D_ARRAY) {
+ texture->total_data_size = tsize * texture->layers;
+ } else {
+ texture->total_data_size = tsize;
+ }
texture->stored_cube_sides |= (1 << p_layer);
@@ -1682,6 +1696,14 @@ uint32_t TextureStorage::texture_get_texid(RID p_texture) const {
return texture->tex_id;
}
+Vector3i TextureStorage::texture_get_size(RID p_texture) const {
+ Texture *texture = texture_owner.get_or_null(p_texture);
+
+ ERR_FAIL_NULL_V(texture, Vector3i(0, 0, 0));
+
+ return Vector3i(texture->width, texture->height, texture->depth);
+}
+
uint32_t TextureStorage::texture_get_width(RID p_texture) const {
Texture *texture = texture_owner.get_or_null(p_texture);
@@ -2123,7 +2145,7 @@ void TextureStorage::_update_render_target(RenderTarget *rt) {
texture->layers = 1;
}
texture->gl_format_cache = rt->color_format;
- texture->gl_type_cache = GL_UNSIGNED_BYTE;
+ texture->gl_type_cache = !rt->hdr ? GL_UNSIGNED_BYTE : GL_FLOAT; // to set HDR format size to 8 and keep 4 for LDR format
texture->gl_internal_format_cache = rt->color_internal_format;
texture->tex_id = rt->color;
texture->width = rt->size.x;
diff --git a/drivers/gles3/storage/texture_storage.h b/drivers/gles3/storage/texture_storage.h
index 8a03d72b9b..5569abcc73 100644
--- a/drivers/gles3/storage/texture_storage.h
+++ b/drivers/gles3/storage/texture_storage.h
@@ -169,7 +169,7 @@ struct Texture {
TYPE_3D
};
- Type type;
+ Type type = TYPE_2D;
RS::TextureLayeredType layered_type = RS::TEXTURE_LAYERED_2D_ARRAY;
GLenum target = GL_TEXTURE_2D;
@@ -553,6 +553,7 @@ public:
void texture_set_data(RID p_texture, const Ref<Image> &p_image, int p_layer = 0);
virtual Image::Format texture_get_format(RID p_texture) const override;
uint32_t texture_get_texid(RID p_texture) const;
+ Vector3i texture_get_size(RID p_texture) const;
uint32_t texture_get_width(RID p_texture) const;
uint32_t texture_get_height(RID p_texture) const;
uint32_t texture_get_depth(RID p_texture) const;
diff --git a/drivers/metal/README.md b/drivers/metal/README.md
new file mode 100644
index 0000000000..30cfa52360
--- /dev/null
+++ b/drivers/metal/README.md
@@ -0,0 +1,39 @@
+# Metal Rendering Device
+
+This document aims to describe the Metal rendering device implementation in Godot.
+
+## Future work / ideas
+
+* Use placement heaps
+* Explicit hazard tracking
+* [MetalFX] upscaling support?
+
+## Acknowledgments
+
+The Metal rendering owes a lot to the work of the [MoltenVK] project, which is a Vulkan implementation on top of Metal.
+In accordance with the Apache 2.0 license, the following copyright notices have been included where applicable:
+
+```
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+```
+
+[MoltenVK]: https://github.com/KhronosGroup/MoltenVK
+[MetalFX]: https://developer.apple.com/documentation/metalfx?language=objc
diff --git a/drivers/metal/SCsub b/drivers/metal/SCsub
new file mode 100644
index 0000000000..30129b7806
--- /dev/null
+++ b/drivers/metal/SCsub
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+Import("env")
+
+env_metal = env.Clone()
+
+# Thirdparty source files
+
+thirdparty_obj = []
+
+thirdparty_dir = "#thirdparty/spirv-cross/"
+thirdparty_sources = [
+ "spirv_cfg.cpp",
+ "spirv_cross_util.cpp",
+ "spirv_cross.cpp",
+ "spirv_parser.cpp",
+ "spirv_msl.cpp",
+ "spirv_reflect.cpp",
+ "spirv_glsl.cpp",
+ "spirv_cross_parsed_ir.cpp",
+]
+thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
+
+env_metal.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include"])
+
+# Must enable exceptions for SPIRV-Cross; otherwise, it will abort the process on errors.
+if "-fno-exceptions" in env_metal["CXXFLAGS"]:
+ env_metal["CXXFLAGS"].remove("-fno-exceptions")
+env_metal.Append(CXXFLAGS=["-fexceptions"])
+
+env_thirdparty = env_metal.Clone()
+env_thirdparty.disable_warnings()
+env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
+env_metal.drivers_sources += thirdparty_obj
+
+# Enable C++20 for the Objective-C++ Metal code, which uses C++20 concepts.
+if "-std=gnu++17" in env_metal["CXXFLAGS"]:
+ env_metal["CXXFLAGS"].remove("-std=gnu++17")
+env_metal.Append(CXXFLAGS=["-std=c++20"])
+
+# Driver source files
+
+driver_obj = []
+
+env_metal.add_source_files(driver_obj, "*.mm")
+env.drivers_sources += driver_obj
+
+# Needed to force rebuilding the driver files when the thirdparty library is updated.
+env.Depends(driver_obj, thirdparty_obj)
diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h
new file mode 100644
index 0000000000..7467e8ceb4
--- /dev/null
+++ b/drivers/metal/metal_device_properties.h
@@ -0,0 +1,141 @@
+/**************************************************************************/
+/* metal_device_properties.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+
+#ifndef METAL_DEVICE_PROPERTIES_H
+#define METAL_DEVICE_PROPERTIES_H
+
+#import "servers/rendering/rendering_device.h"
+
+#import <Foundation/Foundation.h>
+#import <Metal/Metal.h>
+
+/** The buffer index to use for vertex content. */
+const static uint32_t VERT_CONTENT_BUFFER_INDEX = 0;
+const static uint32_t MAX_COLOR_ATTACHMENT_COUNT = 8;
+
+typedef NS_OPTIONS(NSUInteger, SampleCount) {
+ SampleCount1 = (1UL << 0),
+ SampleCount2 = (1UL << 1),
+ SampleCount4 = (1UL << 2),
+ SampleCount8 = (1UL << 3),
+ SampleCount16 = (1UL << 4),
+ SampleCount32 = (1UL << 5),
+ SampleCount64 = (1UL << 6),
+};
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) MetalFeatures {
+ uint32_t mslVersion;
+ MTLGPUFamily highestFamily;
+ MTLLanguageVersion mslVersionEnum;
+ SampleCount supportedSampleCounts;
+ long hostMemoryPageSize;
+ bool layeredRendering;
+ bool multisampleLayeredRendering;
+ bool quadPermute; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
+ bool simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
+ bool simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
+ bool tessellationShader; /**< If true, tessellation shaders are supported. */
+ bool imageCubeArray; /**< If true, image cube arrays are supported. */
+};
+
+struct MetalLimits {
+ uint64_t maxImageArrayLayers;
+ uint64_t maxFramebufferHeight;
+ uint64_t maxFramebufferWidth;
+ uint64_t maxImageDimension1D;
+ uint64_t maxImageDimension2D;
+ uint64_t maxImageDimension3D;
+ uint64_t maxImageDimensionCube;
+ uint64_t maxViewportDimensionX;
+ uint64_t maxViewportDimensionY;
+ MTLSize maxThreadsPerThreadGroup;
+ MTLSize maxComputeWorkGroupCount;
+ uint64_t maxBoundDescriptorSets;
+ uint64_t maxColorAttachments;
+ uint64_t maxTexturesPerArgumentBuffer;
+ uint64_t maxSamplersPerArgumentBuffer;
+ uint64_t maxBuffersPerArgumentBuffer;
+ uint64_t maxBufferLength;
+ uint64_t minUniformBufferOffsetAlignment;
+ uint64_t maxVertexDescriptorLayoutStride;
+ uint16_t maxViewports;
+ uint32_t maxPerStageBufferCount; /**< The total number of per-stage Metal buffers available for shader uniform content and attributes. */
+ uint32_t maxPerStageTextureCount; /**< The total number of per-stage Metal textures available for shader uniform content. */
+ uint32_t maxPerStageSamplerCount; /**< The total number of per-stage Metal samplers available for shader uniform content. */
+ uint32_t maxVertexInputAttributes;
+ uint32_t maxVertexInputBindings;
+ uint32_t maxVertexInputBindingStride;
+ uint32_t maxDrawIndexedIndexValue;
+
+ uint32_t minSubgroupSize; /**< The minimum number of threads in a SIMD-group. */
+ uint32_t maxSubgroupSize; /**< The maximum number of threads in a SIMD-group. */
+ BitField<RDD::ShaderStage> subgroupSupportedShaderStages;
+ BitField<RD::SubgroupOperations> subgroupSupportedOperations; /**< The subgroup operations supported by the device. */
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MetalDeviceProperties {
+private:
+ void init_features(id<MTLDevice> p_device);
+ void init_limits(id<MTLDevice> p_device);
+
+public:
+ MetalFeatures features;
+ MetalLimits limits;
+
+ SampleCount find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const;
+
+ MetalDeviceProperties(id<MTLDevice> p_device);
+ ~MetalDeviceProperties();
+
+private:
+ static const SampleCount sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX];
+};
+
+#endif // METAL_DEVICE_PROPERTIES_H
diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm
new file mode 100644
index 0000000000..857fa8c66e
--- /dev/null
+++ b/drivers/metal/metal_device_properties.mm
@@ -0,0 +1,327 @@
+/**************************************************************************/
+/* metal_device_properties.mm */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+
+#import "metal_device_properties.h"
+
+#import <Metal/Metal.h>
+#import <spirv_cross.hpp>
+#import <spirv_msl.hpp>
+
+// Common scaling multipliers.
+#define KIBI (1024)
+#define MEBI (KIBI * KIBI)
+
+#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000)
+#define MTLGPUFamilyApple9 (MTLGPUFamily)1009
+#endif
+
+API_AVAILABLE(macos(11.0), ios(14.0))
+MTLGPUFamily &operator--(MTLGPUFamily &p_family) {
+ p_family = static_cast<MTLGPUFamily>(static_cast<int>(p_family) - 1);
+ if (p_family < MTLGPUFamilyApple1) {
+ p_family = MTLGPUFamilyApple9;
+ }
+
+ return p_family;
+}
+
+void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
+ features = {};
+
+ features.highestFamily = MTLGPUFamilyApple1;
+ for (MTLGPUFamily family = MTLGPUFamilyApple9; family >= MTLGPUFamilyApple1; --family) {
+ if ([p_device supportsFamily:family]) {
+ features.highestFamily = family;
+ break;
+ }
+ }
+
+ features.hostMemoryPageSize = sysconf(_SC_PAGESIZE);
+
+ for (SampleCount sc = SampleCount1; sc <= SampleCount64; sc <<= 1) {
+ if ([p_device supportsTextureSampleCount:sc]) {
+ features.supportedSampleCounts |= sc;
+ }
+ }
+
+ features.layeredRendering = [p_device supportsFamily:MTLGPUFamilyApple5];
+ features.multisampleLayeredRendering = [p_device supportsFamily:MTLGPUFamilyApple7];
+ features.tessellationShader = [p_device supportsFamily:MTLGPUFamilyApple3];
+ features.imageCubeArray = [p_device supportsFamily:MTLGPUFamilyApple3];
+ features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4];
+ features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
+ features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
+
+ MTLCompileOptions *opts = [MTLCompileOptions new];
+ features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version.
+
+#define setMSLVersion(m_maj, m_min) \
+ features.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(m_maj, m_min)
+
+ switch (features.mslVersionEnum) {
+#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000
+ case MTLLanguageVersion3_2:
+ setMSLVersion(3, 2);
+ break;
+#endif
+#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 140000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000
+ case MTLLanguageVersion3_1:
+ setMSLVersion(3, 1);
+ break;
+#endif
+ case MTLLanguageVersion3_0:
+ setMSLVersion(3, 0);
+ break;
+ case MTLLanguageVersion2_4:
+ setMSLVersion(2, 4);
+ break;
+ case MTLLanguageVersion2_3:
+ setMSLVersion(2, 3);
+ break;
+ case MTLLanguageVersion2_2:
+ setMSLVersion(2, 2);
+ break;
+ case MTLLanguageVersion2_1:
+ setMSLVersion(2, 1);
+ break;
+ case MTLLanguageVersion2_0:
+ setMSLVersion(2, 0);
+ break;
+ case MTLLanguageVersion1_2:
+ setMSLVersion(1, 2);
+ break;
+ case MTLLanguageVersion1_1:
+ setMSLVersion(1, 1);
+ break;
+#if TARGET_OS_IPHONE && !TARGET_OS_MACCATALYST
+ case MTLLanguageVersion1_0:
+ setMSLVersion(1, 0);
+ break;
+#endif
+ }
+}
+
+void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {
+ using std::max;
+ using std::min;
+
+ // FST: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+
+ // FST: Maximum number of layers per 1D texture array, 2D texture array, or 3D texture.
+ limits.maxImageArrayLayers = 2048;
+ if ([p_device supportsFamily:MTLGPUFamilyApple3]) {
+ // FST: Maximum 2D texture width and height.
+ limits.maxFramebufferWidth = 16384;
+ limits.maxFramebufferHeight = 16384;
+ limits.maxViewportDimensionX = 16384;
+ limits.maxViewportDimensionY = 16384;
+ // FST: Maximum 1D texture width.
+ limits.maxImageDimension1D = 16384;
+ // FST: Maximum 2D texture width and height.
+ limits.maxImageDimension2D = 16384;
+ // FST: Maximum cube map texture width and height.
+ limits.maxImageDimensionCube = 16384;
+ } else {
+ // FST: Maximum 2D texture width and height.
+ limits.maxFramebufferWidth = 8192;
+ limits.maxFramebufferHeight = 8192;
+ limits.maxViewportDimensionX = 8192;
+ limits.maxViewportDimensionY = 8192;
+ // FST: Maximum 1D texture width.
+ limits.maxImageDimension1D = 8192;
+ // FST: Maximum 2D texture width and height.
+ limits.maxImageDimension2D = 8192;
+ // FST: Maximum cube map texture width and height.
+ limits.maxImageDimensionCube = 8192;
+ }
+ // FST: Maximum 3D texture width, height, and depth.
+ limits.maxImageDimension3D = 2048;
+
+ limits.maxThreadsPerThreadGroup = p_device.maxThreadsPerThreadgroup;
+ // No effective limits.
+ limits.maxComputeWorkGroupCount = { std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max() };
+ // https://github.com/KhronosGroup/MoltenVK/blob/568cc3acc0e2299931fdaecaaa1fc3ec5b4af281/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h#L85
+ limits.maxBoundDescriptorSets = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;
+ // FST: Maximum number of color render targets per render pass descriptor.
+ limits.maxColorAttachments = 8;
+
+ // Maximum number of textures the device can access, per stage, from an argument buffer.
+ if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
+ limits.maxTexturesPerArgumentBuffer = 1'000'000;
+ } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
+ limits.maxTexturesPerArgumentBuffer = 96;
+ } else {
+ limits.maxTexturesPerArgumentBuffer = 31;
+ }
+
+ // Maximum number of samplers the device can access, per stage, from an argument buffer.
+ if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
+ limits.maxSamplersPerArgumentBuffer = 1024;
+ } else {
+ limits.maxSamplersPerArgumentBuffer = 16;
+ }
+
+ // Maximum number of buffers the device can access, per stage, from an argument buffer.
+ if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
+ limits.maxBuffersPerArgumentBuffer = std::numeric_limits<uint64_t>::max();
+ } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
+ limits.maxBuffersPerArgumentBuffer = 96;
+ } else {
+ limits.maxBuffersPerArgumentBuffer = 31;
+ }
+
+ limits.minSubgroupSize = limits.maxSubgroupSize = 1;
+ // These values were taken from MoltenVK.
+ if (features.simdPermute) {
+ limits.minSubgroupSize = 4;
+ limits.maxSubgroupSize = 32;
+ } else if (features.quadPermute) {
+ limits.minSubgroupSize = limits.maxSubgroupSize = 4;
+ }
+
+ limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_COMPUTE_BIT);
+ if (features.tessellationShader) {
+ limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_TESSELATION_CONTROL_BIT);
+ }
+ limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_FRAGMENT_BIT);
+
+ limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BASIC_BIT);
+ if (features.simdPermute || features.quadPermute) {
+ limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_VOTE_BIT);
+ limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BALLOT_BIT);
+ limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_BIT);
+ limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT);
+ }
+
+ if (features.simdReduction) {
+ limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT);
+ }
+
+ if (features.quadPermute) {
+ limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_QUAD_BIT);
+ }
+
+ limits.maxBufferLength = p_device.maxBufferLength;
+
+ // FST: Maximum size of vertex descriptor layout stride.
+ limits.maxVertexDescriptorLayoutStride = std::numeric_limits<uint64_t>::max();
+
+ // Maximum number of viewports.
+ if ([p_device supportsFamily:MTLGPUFamilyApple5]) {
+ limits.maxViewports = 16;
+ } else {
+ limits.maxViewports = 1;
+ }
+
+ limits.maxPerStageBufferCount = 31;
+ limits.maxPerStageSamplerCount = 16;
+ if ([p_device supportsFamily:MTLGPUFamilyApple6]) {
+ limits.maxPerStageTextureCount = 128;
+ } else if ([p_device supportsFamily:MTLGPUFamilyApple4]) {
+ limits.maxPerStageTextureCount = 96;
+ } else {
+ limits.maxPerStageTextureCount = 31;
+ }
+
+ limits.maxVertexInputAttributes = 31;
+ limits.maxVertexInputBindings = 31;
+ limits.maxVertexInputBindingStride = (2 * KIBI);
+
+#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
+ limits.minUniformBufferOffsetAlignment = 64;
+#endif
+
+#if TARGET_OS_OSX
+ // This is Apple Silicon specific.
+ limits.minUniformBufferOffsetAlignment = 16;
+#endif
+
+ limits.maxDrawIndexedIndexValue = std::numeric_limits<uint32_t>::max() - 1;
+}
+
+MetalDeviceProperties::MetalDeviceProperties(id<MTLDevice> p_device) {
+ init_features(p_device);
+ init_limits(p_device);
+}
+
+MetalDeviceProperties::~MetalDeviceProperties() {
+}
+
+SampleCount MetalDeviceProperties::find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const {
+ SampleCount supported = features.supportedSampleCounts;
+ if (supported & sample_count[p_samples]) {
+ return sample_count[p_samples];
+ }
+
+ SampleCount requested_sample_count = sample_count[p_samples];
+ // Find the nearest supported sample count.
+ while (requested_sample_count > SampleCount1) {
+ if (supported & requested_sample_count) {
+ return requested_sample_count;
+ }
+ requested_sample_count = (SampleCount)(requested_sample_count >> 1);
+ }
+
+ return SampleCount1;
+}
+
+// region static members
+
+const SampleCount MetalDeviceProperties::sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX] = {
+ SampleCount1,
+ SampleCount2,
+ SampleCount4,
+ SampleCount8,
+ SampleCount16,
+ SampleCount32,
+ SampleCount64,
+};
+
+// endregion
diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h
new file mode 100644
index 0000000000..97f33bb1e8
--- /dev/null
+++ b/drivers/metal/metal_objects.h
@@ -0,0 +1,909 @@
+/**************************************************************************/
+/* metal_objects.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+
+#ifndef METAL_OBJECTS_H
+#define METAL_OBJECTS_H
+
+#import "metal_device_properties.h"
+#import "metal_utils.h"
+#import "pixel_formats.h"
+
+#import "servers/rendering/rendering_device_driver.h"
+
+#import <CommonCrypto/CommonDigest.h>
+#import <Foundation/Foundation.h>
+#import <Metal/Metal.h>
+#import <QuartzCore/CAMetalLayer.h>
+#import <simd/simd.h>
+#import <zlib.h>
+#import <initializer_list>
+#import <optional>
+#import <spirv.hpp>
+
+// These types can be used in Vector and other containers that use
+// pointer operations not supported by ARC.
+namespace MTL {
+#define MTL_CLASS(name) \
+ class name { \
+ public: \
+ name(id<MTL##name> obj = nil) : m_obj(obj) {} \
+ operator id<MTL##name>() const { return m_obj; } \
+ id<MTL##name> m_obj; \
+ };
+
+MTL_CLASS(Texture)
+
+} //namespace MTL
+
+enum ShaderStageUsage : uint32_t {
+ None = 0,
+ Vertex = RDD::SHADER_STAGE_VERTEX_BIT,
+ Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT,
+ TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT,
+ TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT,
+ Compute = RDD::SHADER_STAGE_COMPUTE_BIT,
+};
+
+_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) {
+ p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b));
+ return p_a;
+}
+
+enum class MDCommandBufferStateType {
+ None,
+ Render,
+ Compute,
+ Blit,
+};
+
+enum class MDPipelineType {
+ None,
+ Render,
+ Compute,
+};
+
+class MDRenderPass;
+class MDPipeline;
+class MDRenderPipeline;
+class MDComputePipeline;
+class MDFrameBuffer;
+class RenderingDeviceDriverMetal;
+class MDUniformSet;
+class MDShader;
+
+#pragma mark - Resource Factory
+
+struct ClearAttKey {
+ const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT;
+ const static uint32_t DEPTH_INDEX = COLOR_COUNT;
+ const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;
+ const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;
+
+ uint16_t sample_count = 0;
+ uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 };
+
+ _FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; }
+ _FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; }
+ _FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; }
+ _FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; }
+ _FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; }
+
+ _FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; }
+ _FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; }
+ _FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; }
+
+ _FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const {
+ return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0;
+ }
+
+ uint32_t hash() const {
+ uint32_t h = hash_murmur3_one_32(sample_count);
+ h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h);
+ return h;
+ }
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDResourceFactory {
+private:
+ RenderingDeviceDriverMetal *device_driver;
+
+ id<MTLFunction> new_func(NSString *p_source, NSString *p_name, NSError **p_error);
+ id<MTLFunction> new_clear_vert_func(ClearAttKey &p_key);
+ id<MTLFunction> new_clear_frag_func(ClearAttKey &p_key);
+ NSString *get_format_type_string(MTLPixelFormat p_fmt);
+
+public:
+ id<MTLRenderPipelineState> new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error);
+ id<MTLDepthStencilState> new_depth_stencil_state(bool p_use_depth, bool p_use_stencil);
+
+ MDResourceFactory(RenderingDeviceDriverMetal *p_device_driver) :
+ device_driver(p_device_driver) {}
+ ~MDResourceFactory() = default;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDResourceCache {
+private:
+ typedef HashMap<ClearAttKey, id<MTLRenderPipelineState>, HashableHasher<ClearAttKey>> HashMap;
+ std::unique_ptr<MDResourceFactory> resource_factory;
+ HashMap clear_states;
+
+ struct {
+ id<MTLDepthStencilState> all;
+ id<MTLDepthStencilState> depth_only;
+ id<MTLDepthStencilState> stencil_only;
+ id<MTLDepthStencilState> none;
+ } clear_depth_stencil_state;
+
+public:
+ id<MTLRenderPipelineState> get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error);
+ id<MTLDepthStencilState> get_depth_stencil_state(bool p_use_depth, bool p_use_stencil);
+
+ explicit MDResourceCache(RenderingDeviceDriverMetal *p_device_driver) :
+ resource_factory(new MDResourceFactory(p_device_driver)) {}
+ ~MDResourceCache() = default;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDCommandBuffer {
+private:
+ RenderingDeviceDriverMetal *device_driver = nullptr;
+ id<MTLCommandQueue> queue = nil;
+ id<MTLCommandBuffer> commandBuffer = nil;
+
+ void _end_compute_dispatch();
+ void _end_blit();
+
+#pragma mark - Render
+
+ void _render_set_dirty_state();
+ void _render_bind_uniform_sets();
+
+ static void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects);
+ static uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size);
+ void _end_render_pass();
+ void _render_clear_render_area();
+
+public:
+ MDCommandBufferStateType type = MDCommandBufferStateType::None;
+
+ struct RenderState {
+ MDRenderPass *pass = nullptr;
+ MDFrameBuffer *frameBuffer = nullptr;
+ MDRenderPipeline *pipeline = nullptr;
+ LocalVector<RDD::RenderPassClearValue> clear_values;
+ LocalVector<MTLViewport> viewports;
+ LocalVector<MTLScissorRect> scissors;
+ std::optional<Color> blend_constants;
+ uint32_t current_subpass = UINT32_MAX;
+ Rect2i render_area = {};
+ bool is_rendering_entire_area = false;
+ MTLRenderPassDescriptor *desc = nil;
+ id<MTLRenderCommandEncoder> encoder = nil;
+ id<MTLBuffer> __unsafe_unretained index_buffer = nil; // Buffer is owned by RDD.
+ MTLIndexType index_type = MTLIndexTypeUInt16;
+ uint32_t index_offset = 0;
+ LocalVector<id<MTLBuffer> __unsafe_unretained> vertex_buffers;
+ LocalVector<NSUInteger> vertex_offsets;
+ // clang-format off
+ enum DirtyFlag: uint8_t {
+ DIRTY_NONE = 0b0000'0000,
+ DIRTY_PIPELINE = 0b0000'0001, //! pipeline state
+ DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets
+ DIRTY_DEPTH = 0b0000'0100, //! depth / stenci state
+ DIRTY_VERTEX = 0b0000'1000, //! vertex buffers
+ DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles
+ DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles
+ DIRTY_BLEND = 0b0100'0000, //! blend state
+ DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode
+
+ DIRTY_ALL = 0xff,
+ };
+ // clang-format on
+ BitField<DirtyFlag> dirty = DIRTY_NONE;
+
+ LocalVector<MDUniformSet *> uniform_sets;
+ // Bit mask of the uniform sets that are dirty, to prevent redundant binding.
+ uint64_t uniform_set_mask = 0;
+
+ _FORCE_INLINE_ void reset() {
+ pass = nil;
+ frameBuffer = nil;
+ pipeline = nil;
+ current_subpass = UINT32_MAX;
+ render_area = {};
+ is_rendering_entire_area = false;
+ desc = nil;
+ encoder = nil;
+ index_buffer = nil;
+ index_type = MTLIndexTypeUInt16;
+ dirty = DIRTY_NONE;
+ uniform_sets.clear();
+ uniform_set_mask = 0;
+ clear_values.clear();
+ viewports.clear();
+ scissors.clear();
+ blend_constants.reset();
+ vertex_buffers.clear();
+ vertex_offsets.clear();
+ }
+
+ _FORCE_INLINE_ void mark_viewport_dirty() {
+ if (viewports.is_empty()) {
+ return;
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_VIEWPORT);
+ }
+
+ _FORCE_INLINE_ void mark_scissors_dirty() {
+ if (scissors.is_empty()) {
+ return;
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_SCISSOR);
+ }
+
+ _FORCE_INLINE_ void mark_vertex_dirty() {
+ if (vertex_buffers.is_empty()) {
+ return;
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_VERTEX);
+ }
+
+ _FORCE_INLINE_ void mark_uniforms_dirty(std::initializer_list<uint32_t> l) {
+ if (uniform_sets.is_empty()) {
+ return;
+ }
+ for (uint32_t i : l) {
+ if (i < uniform_sets.size() && uniform_sets[i] != nullptr) {
+ uniform_set_mask |= 1 << i;
+ }
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
+ }
+
+ _FORCE_INLINE_ void mark_uniforms_dirty(void) {
+ if (uniform_sets.is_empty()) {
+ return;
+ }
+ for (uint32_t i = 0; i < uniform_sets.size(); i++) {
+ if (uniform_sets[i] != nullptr) {
+ uniform_set_mask |= 1 << i;
+ }
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
+ }
+
+ MTLScissorRect clip_to_render_area(MTLScissorRect p_rect) const {
+ uint32_t raLeft = render_area.position.x;
+ uint32_t raRight = raLeft + render_area.size.width;
+ uint32_t raBottom = render_area.position.y;
+ uint32_t raTop = raBottom + render_area.size.height;
+
+ p_rect.x = CLAMP(p_rect.x, raLeft, MAX(raRight - 1, raLeft));
+ p_rect.y = CLAMP(p_rect.y, raBottom, MAX(raTop - 1, raBottom));
+ p_rect.width = MIN(p_rect.width, raRight - p_rect.x);
+ p_rect.height = MIN(p_rect.height, raTop - p_rect.y);
+
+ return p_rect;
+ }
+
+ Rect2i clip_to_render_area(Rect2i p_rect) const {
+ int32_t raLeft = render_area.position.x;
+ int32_t raRight = raLeft + render_area.size.width;
+ int32_t raBottom = render_area.position.y;
+ int32_t raTop = raBottom + render_area.size.height;
+
+ p_rect.position.x = CLAMP(p_rect.position.x, raLeft, MAX(raRight - 1, raLeft));
+ p_rect.position.y = CLAMP(p_rect.position.y, raBottom, MAX(raTop - 1, raBottom));
+ p_rect.size.width = MIN(p_rect.size.width, raRight - p_rect.position.x);
+ p_rect.size.height = MIN(p_rect.size.height, raTop - p_rect.position.y);
+
+ return p_rect;
+ }
+
+ } render;
+
+ // State specific for a compute pass.
+ struct {
+ MDComputePipeline *pipeline = nullptr;
+ id<MTLComputeCommandEncoder> encoder = nil;
+ _FORCE_INLINE_ void reset() {
+ pipeline = nil;
+ encoder = nil;
+ }
+ } compute;
+
+ // State specific to a blit pass.
+ struct {
+ id<MTLBlitCommandEncoder> encoder = nil;
+ _FORCE_INLINE_ void reset() {
+ encoder = nil;
+ }
+ } blit;
+
+ _FORCE_INLINE_ id<MTLCommandBuffer> get_command_buffer() const {
+ return commandBuffer;
+ }
+
+ void begin();
+ void commit();
+ void end();
+
+ id<MTLBlitCommandEncoder> blit_command_encoder();
+ void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
+
+ void bind_pipeline(RDD::PipelineID p_pipeline);
+
+#pragma mark - Render Commands
+
+ void render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
+ void render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects);
+ void render_set_viewport(VectorView<Rect2i> p_viewports);
+ void render_set_scissor(VectorView<Rect2i> p_scissors);
+ void render_set_blend_constants(const Color &p_constants);
+ void render_begin_pass(RDD::RenderPassID p_render_pass,
+ RDD::FramebufferID p_frameBuffer,
+ RDD::CommandBufferType p_cmd_buffer_type,
+ const Rect2i &p_rect,
+ VectorView<RDD::RenderPassClearValue> p_clear_values);
+ void render_next_subpass();
+ void render_draw(uint32_t p_vertex_count,
+ uint32_t p_instance_count,
+ uint32_t p_base_vertex,
+ uint32_t p_first_instance);
+ void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets);
+ void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset);
+
+ void render_draw_indexed(uint32_t p_index_count,
+ uint32_t p_instance_count,
+ uint32_t p_first_index,
+ int32_t p_vertex_offset,
+ uint32_t p_first_instance);
+
+ void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride);
+ void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride);
+ void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride);
+ void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride);
+
+ void render_end_pass();
+
+#pragma mark - Compute Commands
+
+ void compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
+ void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
+ void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset);
+
+ MDCommandBuffer(id<MTLCommandQueue> p_queue, RenderingDeviceDriverMetal *p_device_driver) :
+ device_driver(p_device_driver), queue(p_queue) {
+ type = MDCommandBufferStateType::None;
+ }
+
+ MDCommandBuffer() = default;
+};
+
+#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000)
+#define MTLBindingAccess MTLArgumentAccess
+#define MTLBindingAccessReadOnly MTLArgumentAccessReadOnly
+#define MTLBindingAccessReadWrite MTLArgumentAccessReadWrite
+#define MTLBindingAccessWriteOnly MTLArgumentAccessWriteOnly
+#endif
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) BindingInfo {
+ MTLDataType dataType = MTLDataTypeNone;
+ uint32_t index = 0;
+ MTLBindingAccess access = MTLBindingAccessReadOnly;
+ MTLResourceUsage usage = 0;
+ MTLTextureType textureType = MTLTextureType2D;
+ spv::ImageFormat imageFormat = spv::ImageFormatUnknown;
+ uint32_t arrayLength = 0;
+ bool isMultisampled = false;
+
+ inline MTLArgumentDescriptor *new_argument_descriptor() const {
+ MTLArgumentDescriptor *desc = MTLArgumentDescriptor.argumentDescriptor;
+ desc.dataType = dataType;
+ desc.index = index;
+ desc.access = access;
+ desc.textureType = textureType;
+ desc.arrayLength = arrayLength;
+ return desc;
+ }
+
+ size_t serialize_size() const {
+ return sizeof(uint32_t) * 8 /* 8 uint32_t fields */;
+ }
+
+ template <typename W>
+ void serialize(W &p_writer) const {
+ p_writer.write((uint32_t)dataType);
+ p_writer.write(index);
+ p_writer.write((uint32_t)access);
+ p_writer.write((uint32_t)usage);
+ p_writer.write((uint32_t)textureType);
+ p_writer.write(imageFormat);
+ p_writer.write(arrayLength);
+ p_writer.write(isMultisampled);
+ }
+
+ template <typename R>
+ void deserialize(R &p_reader) {
+ p_reader.read((uint32_t &)dataType);
+ p_reader.read(index);
+ p_reader.read((uint32_t &)access);
+ p_reader.read((uint32_t &)usage);
+ p_reader.read((uint32_t &)textureType);
+ p_reader.read((uint32_t &)imageFormat);
+ p_reader.read(arrayLength);
+ p_reader.read(isMultisampled);
+ }
+};
+
+using RDC = RenderingDeviceCommons;
+
+typedef API_AVAILABLE(macos(11.0), ios(14.0)) HashMap<RDC::ShaderStage, BindingInfo> BindingInfoMap;
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformInfo {
+ uint32_t binding;
+ ShaderStageUsage active_stages = None;
+ BindingInfoMap bindings;
+ BindingInfoMap bindings_secondary;
+};
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformSet {
+ LocalVector<UniformInfo> uniforms;
+ uint32_t buffer_size = 0;
+ HashMap<RDC::ShaderStage, uint32_t> offsets;
+ HashMap<RDC::ShaderStage, id<MTLArgumentEncoder>> encoders;
+};
+
+struct ShaderCacheEntry;
+
+enum class ShaderLoadStrategy {
+ DEFAULT,
+ LAZY,
+};
+
+/// A Metal shader library.
+@interface MDLibrary : NSObject {
+ ShaderCacheEntry *_entry;
+};
+- (id<MTLLibrary>)library;
+- (NSError *)error;
+- (void)setLabel:(NSString *)label;
+
++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
+ device:(id<MTLDevice>)device
+ source:(NSString *)source
+ options:(MTLCompileOptions *)options
+ strategy:(ShaderLoadStrategy)strategy;
+@end
+
+struct SHA256Digest {
+ unsigned char data[CC_SHA256_DIGEST_LENGTH];
+
+ uint32_t hash() const {
+ uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH);
+ return c;
+ }
+
+ SHA256Digest() {
+ bzero(data, CC_SHA256_DIGEST_LENGTH);
+ }
+
+ SHA256Digest(const char *p_data, size_t p_length) {
+ CC_SHA256(p_data, (CC_LONG)p_length, data);
+ }
+
+ _FORCE_INLINE_ uint32_t short_sha() const {
+ return __builtin_bswap32(*(uint32_t *)&data[0]);
+ }
+};
+
+template <>
+struct HashMapComparatorDefault<SHA256Digest> {
+ static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) {
+ return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0;
+ }
+};
+
+/// A cache entry for a Metal shader library.
+struct ShaderCacheEntry {
+ RenderingDeviceDriverMetal &owner;
+ /// A hash of the Metal shader source code.
+ SHA256Digest key;
+ CharString name;
+ RD::ShaderStage stage = RD::SHADER_STAGE_VERTEX;
+ /// This reference must be weak, to ensure that when the last strong reference to the library
+ /// is released, the cache entry is freed.
+ MDLibrary *__weak library = nil;
+
+ /// Notify the cache that this entry is no longer needed.
+ void notify_free() const;
+
+ ShaderCacheEntry(RenderingDeviceDriverMetal &p_owner, SHA256Digest p_key) :
+ owner(p_owner), key(p_key) {
+ }
+ ~ShaderCacheEntry() = default;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDShader {
+public:
+ CharString name;
+ Vector<UniformSet> sets;
+
+ virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;
+
+ MDShader(CharString p_name, Vector<UniformSet> p_sets) :
+ name(p_name), sets(p_sets) {}
+ virtual ~MDShader() = default;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDComputeShader final : public MDShader {
+public:
+ struct {
+ uint32_t binding = -1;
+ uint32_t size = 0;
+ } push_constants;
+ MTLSize local = {};
+
+ MDLibrary *kernel;
+#if DEV_ENABLED
+ CharString kernel_source;
+#endif
+
+ void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
+
+ MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel);
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderShader final : public MDShader {
+public:
+ struct {
+ struct {
+ int32_t binding = -1;
+ uint32_t size = 0;
+ } vert;
+ struct {
+ int32_t binding = -1;
+ uint32_t size = 0;
+ } frag;
+ } push_constants;
+
+ MDLibrary *vert;
+ MDLibrary *frag;
+#if DEV_ENABLED
+ CharString vert_source;
+ CharString frag_source;
+#endif
+
+ void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
+
+ MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_vert, MDLibrary *p_frag);
+};
+
+enum StageResourceUsage : uint32_t {
+ VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2),
+ VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2),
+ FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2),
+ FragmentWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2),
+ TesselationControlRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),
+ TesselationControlWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),
+ TesselationEvaluationRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),
+ TesselationEvaluationWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),
+ ComputeRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2),
+ ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2),
+};
+
+_FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) {
+ p_a = StageResourceUsage(uint32_t(p_a) | p_b);
+ return p_a;
+}
+
+_FORCE_INLINE_ StageResourceUsage stage_resource_usage(RDC::ShaderStage p_stage, MTLResourceUsage p_usage) {
+ return StageResourceUsage(p_usage << (p_stage * 2));
+}
+
+_FORCE_INLINE_ MTLResourceUsage resource_usage_for_stage(StageResourceUsage p_usage, RDC::ShaderStage p_stage) {
+ return MTLResourceUsage((p_usage >> (p_stage * 2)) & 0b11);
+}
+
+template <>
+struct HashMapComparatorDefault<RDD::ShaderID> {
+ static bool compare(const RDD::ShaderID &p_lhs, const RDD::ShaderID &p_rhs) {
+ return p_lhs.id == p_rhs.id;
+ }
+};
+
+struct BoundUniformSet {
+ id<MTLBuffer> buffer;
+ HashMap<id<MTLResource>, StageResourceUsage> bound_resources;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDUniformSet {
+public:
+ uint32_t index;
+ LocalVector<RDD::BoundUniform> uniforms;
+ HashMap<MDShader *, BoundUniformSet> bound_uniforms;
+
+ BoundUniformSet &boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device);
+};
+
+enum class MDAttachmentType : uint8_t {
+ None = 0,
+ Color = 1 << 0,
+ Depth = 1 << 1,
+ Stencil = 1 << 2,
+};
+
+_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) {
+ flags::set(p_a, p_b);
+ return p_a;
+}
+
+_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) {
+ return uint8_t(p_a) & uint8_t(p_b);
+}
+
+struct MDSubpass {
+ uint32_t subpass_index = 0;
+ LocalVector<RDD::AttachmentReference> input_references;
+ LocalVector<RDD::AttachmentReference> color_references;
+ RDD::AttachmentReference depth_stencil_reference;
+ LocalVector<RDD::AttachmentReference> resolve_references;
+
+ MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const;
+};
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) MDAttachment {
+private:
+ uint32_t index = 0;
+ uint32_t firstUseSubpassIndex = 0;
+ uint32_t lastUseSubpassIndex = 0;
+
+public:
+ MTLPixelFormat format = MTLPixelFormatInvalid;
+ MDAttachmentType type = MDAttachmentType::None;
+ MTLLoadAction loadAction = MTLLoadActionDontCare;
+ MTLStoreAction storeAction = MTLStoreActionDontCare;
+ MTLLoadAction stencilLoadAction = MTLLoadActionDontCare;
+ MTLStoreAction stencilStoreAction = MTLStoreActionDontCare;
+ uint32_t samples = 1;
+
+ /*!
+ * @brief Returns true if this attachment is first used in the given subpass.
+ * @param p_subpass
+ * @return
+ */
+ _FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const {
+ return p_subpass.subpass_index == firstUseSubpassIndex;
+ }
+
+ /*!
+ * @brief Returns true if this attachment is last used in the given subpass.
+ * @param p_subpass
+ * @return
+ */
+ _FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const {
+ return p_subpass.subpass_index == lastUseSubpassIndex;
+ }
+
+ void linkToSubpass(MDRenderPass const &p_pass);
+
+ MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass,
+ bool p_is_rendering_entire_area,
+ bool p_has_resolve,
+ bool p_can_resolve,
+ bool p_is_stencil) const;
+ bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc,
+ PixelFormats &p_pf,
+ MDSubpass const &p_subpass,
+ id<MTLTexture> p_attachment,
+ bool p_is_rendering_entire_area,
+ bool p_has_resolve,
+ bool p_can_resolve,
+ bool p_is_stencil) const;
+ /** Returns whether this attachment should be cleared in the subpass. */
+ bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPass {
+public:
+ Vector<MDAttachment> attachments;
+ Vector<MDSubpass> subpasses;
+
+ uint32_t get_sample_count() const {
+ return attachments.is_empty() ? 1 : attachments[0].samples;
+ }
+
+ MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses);
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDPipeline {
+public:
+ MDPipelineType type;
+
+ explicit MDPipeline(MDPipelineType p_type) :
+ type(p_type) {}
+ virtual ~MDPipeline() = default;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPipeline final : public MDPipeline {
+public:
+ id<MTLRenderPipelineState> state = nil;
+ id<MTLDepthStencilState> depth_stencil = nil;
+ uint32_t push_constant_size = 0;
+ uint32_t push_constant_stages_mask = 0;
+ SampleCount sample_count = SampleCount1;
+
+ struct {
+ MTLCullMode cull_mode = MTLCullModeNone;
+ MTLTriangleFillMode fill_mode = MTLTriangleFillModeFill;
+ MTLDepthClipMode clip_mode = MTLDepthClipModeClip;
+ MTLWinding winding = MTLWindingClockwise;
+ MTLPrimitiveType render_primitive = MTLPrimitiveTypePoint;
+
+ struct {
+ bool enabled = false;
+ } depth_test;
+
+ struct {
+ bool enabled = false;
+ float depth_bias = 0.0;
+ float slope_scale = 0.0;
+ float clamp = 0.0;
+ _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
+ if (!enabled) {
+ return;
+ }
+ [p_enc setDepthBias:depth_bias slopeScale:slope_scale clamp:clamp];
+ }
+ } depth_bias;
+
+ struct {
+ bool enabled = false;
+ uint32_t front_reference = 0;
+ uint32_t back_reference = 0;
+ _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
+ if (!enabled)
+ return;
+ [p_enc setStencilFrontReferenceValue:front_reference backReferenceValue:back_reference];
+ };
+ } stencil;
+
+ struct {
+ bool enabled = false;
+ float r = 0.0;
+ float g = 0.0;
+ float b = 0.0;
+ float a = 0.0;
+
+ _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
+ //if (!enabled)
+ // return;
+ [p_enc setBlendColorRed:r green:g blue:b alpha:a];
+ };
+ } blend;
+
+ _FORCE_INLINE_ void apply(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc) const {
+ [p_enc setCullMode:cull_mode];
+ [p_enc setTriangleFillMode:fill_mode];
+ [p_enc setDepthClipMode:clip_mode];
+ [p_enc setFrontFacingWinding:winding];
+ depth_bias.apply(p_enc);
+ stencil.apply(p_enc);
+ blend.apply(p_enc);
+ }
+
+ } raster_state;
+
+ MDRenderShader *shader = nil;
+
+ MDRenderPipeline() :
+ MDPipeline(MDPipelineType::Render) {}
+ ~MDRenderPipeline() final = default;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDComputePipeline final : public MDPipeline {
+public:
+ id<MTLComputePipelineState> state = nil;
+ struct {
+ MTLSize local = {};
+ } compute_state;
+
+ MDComputeShader *shader = nil;
+
+ explicit MDComputePipeline(id<MTLComputePipelineState> p_state) :
+ MDPipeline(MDPipelineType::Compute), state(p_state) {}
+ ~MDComputePipeline() final = default;
+};
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) MDFrameBuffer {
+public:
+ Vector<MTL::Texture> textures;
+ Size2i size;
+ MDFrameBuffer(Vector<MTL::Texture> p_textures, Size2i p_size) :
+ textures(p_textures), size(p_size) {}
+ MDFrameBuffer() {}
+
+ virtual ~MDFrameBuffer() = default;
+};
+
+// These functions are used to convert between Objective-C objects and
+// the RIDs used by Godot, respecting automatic reference counting.
+namespace rid {
+
+// Converts an Objective-C object to a pointer, and incrementing the
+// reference count.
+_FORCE_INLINE_
+void *owned(id p_id) {
+ return (__bridge_retained void *)p_id;
+}
+
+#define MAKE_ID(FROM, TO) \
+ _FORCE_INLINE_ TO make(FROM p_obj) { return TO(owned(p_obj)); }
+
+MAKE_ID(id<MTLTexture>, RDD::TextureID)
+MAKE_ID(id<MTLBuffer>, RDD::BufferID)
+MAKE_ID(id<MTLSamplerState>, RDD::SamplerID)
+MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID)
+MAKE_ID(id<MTLCommandQueue>, RDD::CommandPoolID)
+
+// Converts a pointer to an Objective-C object without changing the reference count.
+_FORCE_INLINE_
+auto get(RDD::ID p_id) {
+ return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil;
+}
+
+// Converts a pointer to an Objective-C object, and decrements the reference count.
+_FORCE_INLINE_
+auto release(RDD::ID p_id) {
+ return (__bridge_transfer ::id)(void *)p_id.id;
+}
+
+} // namespace rid
+
+#endif // METAL_OBJECTS_H
diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm
new file mode 100644
index 0000000000..abdcccf00c
--- /dev/null
+++ b/drivers/metal/metal_objects.mm
@@ -0,0 +1,1581 @@
+/**************************************************************************/
+/* metal_objects.mm */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+
+#import "metal_objects.h"
+
+#import "metal_utils.h"
+#import "pixel_formats.h"
+#import "rendering_device_driver_metal.h"
+
+#import <os/signpost.h>
+
+void MDCommandBuffer::begin() {
+ DEV_ASSERT(commandBuffer == nil);
+ commandBuffer = queue.commandBuffer;
+}
+
+void MDCommandBuffer::end() {
+ switch (type) {
+ case MDCommandBufferStateType::None:
+ return;
+ case MDCommandBufferStateType::Render:
+ return render_end_pass();
+ case MDCommandBufferStateType::Compute:
+ return _end_compute_dispatch();
+ case MDCommandBufferStateType::Blit:
+ return _end_blit();
+ }
+}
+
+void MDCommandBuffer::commit() {
+ end();
+ [commandBuffer commit];
+ commandBuffer = nil;
+}
+
+void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
+ MDPipeline *p = (MDPipeline *)(p_pipeline.id);
+
+ // End current encoder if it is a compute encoder or blit encoder,
+ // as they do not have a defined end boundary in the RDD like render.
+ if (type == MDCommandBufferStateType::Compute) {
+ _end_compute_dispatch();
+ } else if (type == MDCommandBufferStateType::Blit) {
+ _end_blit();
+ }
+
+ if (p->type == MDPipelineType::Render) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+ MDRenderPipeline *rp = (MDRenderPipeline *)p;
+
+ if (render.encoder == nil) {
+ // This condition occurs when there are no attachments when calling render_next_subpass()
+ // and is due to the SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS flag.
+ render.desc.defaultRasterSampleCount = static_cast<NSUInteger>(rp->sample_count);
+
+// NOTE(sgc): This is to test rdar://FB13605547 and will be deleted once fix is confirmed.
+#if 0
+ if (render.pipeline->sample_count == 4) {
+ static id<MTLTexture> tex = nil;
+ static id<MTLTexture> res_tex = nil;
+ static dispatch_once_t onceToken;
+ dispatch_once(&onceToken, ^{
+ Size2i sz = render.frameBuffer->size;
+ MTLTextureDescriptor *td = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm width:sz.width height:sz.height mipmapped:NO];
+ td.textureType = MTLTextureType2DMultisample;
+ td.storageMode = MTLStorageModeMemoryless;
+ td.usage = MTLTextureUsageRenderTarget;
+ td.sampleCount = render.pipeline->sample_count;
+ tex = [device_driver->get_device() newTextureWithDescriptor:td];
+
+ td.textureType = MTLTextureType2D;
+ td.storageMode = MTLStorageModePrivate;
+ td.usage = MTLTextureUsageShaderWrite;
+ td.sampleCount = 1;
+ res_tex = [device_driver->get_device() newTextureWithDescriptor:td];
+ });
+ render.desc.colorAttachments[0].texture = tex;
+ render.desc.colorAttachments[0].loadAction = MTLLoadActionClear;
+ render.desc.colorAttachments[0].storeAction = MTLStoreActionMultisampleResolve;
+
+ render.desc.colorAttachments[0].resolveTexture = res_tex;
+ }
+#endif
+ render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc];
+ }
+
+ if (render.pipeline != rp) {
+ render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_RASTER));
+ // Mark all uniforms as dirty, as variants of a shader pipeline may have a different entry point ABI,
+ // due to setting force_active_argument_buffer_resources = true for spirv_cross::CompilerMSL::Options.
+ // As a result, uniform sets with the same layout will generate redundant binding warnings when
+ // capturing a Metal frame in Xcode.
+ //
+ // If we don't mark as dirty, then some bindings will generate a validation error.
+ render.mark_uniforms_dirty();
+ if (render.pipeline != nullptr && render.pipeline->depth_stencil != rp->depth_stencil) {
+ render.dirty.set_flag(RenderState::DIRTY_DEPTH);
+ }
+ render.pipeline = rp;
+ }
+ } else if (p->type == MDPipelineType::Compute) {
+ DEV_ASSERT(type == MDCommandBufferStateType::None);
+ type = MDCommandBufferStateType::Compute;
+
+ compute.pipeline = (MDComputePipeline *)p;
+ compute.encoder = commandBuffer.computeCommandEncoder;
+ [compute.encoder setComputePipelineState:compute.pipeline->state];
+ }
+}
+
+id<MTLBlitCommandEncoder> MDCommandBuffer::blit_command_encoder() {
+ switch (type) {
+ case MDCommandBufferStateType::None:
+ break;
+ case MDCommandBufferStateType::Render:
+ render_end_pass();
+ break;
+ case MDCommandBufferStateType::Compute:
+ _end_compute_dispatch();
+ break;
+ case MDCommandBufferStateType::Blit:
+ return blit.encoder;
+ }
+
+ type = MDCommandBufferStateType::Blit;
+ blit.encoder = commandBuffer.blitCommandEncoder;
+ return blit.encoder;
+}
+
+void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label) {
+ switch (type) {
+ case MDCommandBufferStateType::None:
+ break;
+ case MDCommandBufferStateType::Render:
+ render_end_pass();
+ break;
+ case MDCommandBufferStateType::Compute:
+ _end_compute_dispatch();
+ break;
+ case MDCommandBufferStateType::Blit:
+ _end_blit();
+ break;
+ }
+
+ id<MTLRenderCommandEncoder> enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc];
+ if (p_label != nil) {
+ [enc pushDebugGroup:p_label];
+ [enc popDebugGroup];
+ }
+ [enc endEncoding];
+}
+
+#pragma mark - Render Commands
+
+void MDCommandBuffer::render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+
+ MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
+ if (render.uniform_sets.size() <= set->index) {
+ uint32_t s = render.uniform_sets.size();
+ render.uniform_sets.resize(set->index + 1);
+ // Set intermediate values to null.
+ std::fill(&render.uniform_sets[s], &render.uniform_sets[set->index] + 1, nullptr);
+ }
+
+ if (render.uniform_sets[set->index] != set) {
+ render.dirty.set_flag(RenderState::DIRTY_UNIFORMS);
+ render.uniform_set_mask |= 1ULL << set->index;
+ render.uniform_sets[set->index] = set;
+ }
+}
+
+void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+
+ uint32_t vertex_count = p_rects.size() * 6;
+
+ simd::float4 vertices[vertex_count];
+ simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT];
+
+ Size2i size = render.frameBuffer->size;
+ Rect2i render_area = render.clip_to_render_area({ { 0, 0 }, size });
+ size = Size2i(render_area.position.x + render_area.size.width, render_area.position.y + render_area.size.height);
+ _populate_vertices(vertices, size, p_rects);
+
+ ClearAttKey key;
+ key.sample_count = render.pass->get_sample_count();
+
+ float depth_value = 0;
+ uint32_t stencil_value = 0;
+
+ for (uint32_t i = 0; i < p_attachment_clears.size(); i++) {
+ RDD::AttachmentClear const &attClear = p_attachment_clears[i];
+ uint32_t attachment_index;
+ if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) {
+ attachment_index = attClear.color_attachment;
+ } else {
+ attachment_index = render.pass->subpasses[render.current_subpass].depth_stencil_reference.attachment;
+ }
+
+ MDAttachment const &mda = render.pass->attachments[attachment_index];
+ if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) {
+ key.set_color_format(attachment_index, mda.format);
+ clear_colors[attachment_index] = {
+ attClear.value.color.r,
+ attClear.value.color.g,
+ attClear.value.color.b,
+ attClear.value.color.a
+ };
+ }
+
+ if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT)) {
+ key.set_depth_format(mda.format);
+ depth_value = attClear.value.depth;
+ }
+
+ if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT)) {
+ key.set_stencil_format(mda.format);
+ stencil_value = attClear.value.stencil;
+ }
+ }
+ clear_colors[ClearAttKey::DEPTH_INDEX] = {
+ depth_value,
+ depth_value,
+ depth_value,
+ depth_value
+ };
+
+ id<MTLRenderCommandEncoder> enc = render.encoder;
+
+ MDResourceCache &cache = device_driver->get_resource_cache();
+
+ [enc pushDebugGroup:@"ClearAttachments"];
+ [enc setRenderPipelineState:cache.get_clear_render_pipeline_state(key, nil)];
+ [enc setDepthStencilState:cache.get_depth_stencil_state(
+ key.is_depth_enabled(),
+ key.is_stencil_enabled())];
+ [enc setStencilReferenceValue:stencil_value];
+ [enc setCullMode:MTLCullModeNone];
+ [enc setTriangleFillMode:MTLTriangleFillModeFill];
+ [enc setDepthBias:0 slopeScale:0 clamp:0];
+ [enc setViewport:{ 0, 0, (double)size.width, (double)size.height, 0.0, 1.0 }];
+ [enc setScissorRect:{ 0, 0, (NSUInteger)size.width, (NSUInteger)size.height }];
+
+ [enc setVertexBytes:clear_colors length:sizeof(clear_colors) atIndex:0];
+ [enc setFragmentBytes:clear_colors length:sizeof(clear_colors) atIndex:0];
+ [enc setVertexBytes:vertices length:vertex_count * sizeof(vertices[0]) atIndex:device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX)];
+
+ [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:vertex_count];
+ [enc popDebugGroup];
+
+ render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_DEPTH | RenderState::DIRTY_RASTER));
+ render.mark_uniforms_dirty({ 0 }); // Mark index 0 dirty, if there is already a binding for index 0.
+ render.mark_viewport_dirty();
+ render.mark_scissors_dirty();
+ render.mark_vertex_dirty();
+}
+
+void MDCommandBuffer::_render_set_dirty_state() {
+ _render_bind_uniform_sets();
+
+ if (render.dirty.has_flag(RenderState::DIRTY_PIPELINE)) {
+ [render.encoder setRenderPipelineState:render.pipeline->state];
+ }
+
+ if (render.dirty.has_flag(RenderState::DIRTY_VIEWPORT)) {
+ [render.encoder setViewports:render.viewports.ptr() count:render.viewports.size()];
+ }
+
+ if (render.dirty.has_flag(RenderState::DIRTY_DEPTH)) {
+ [render.encoder setDepthStencilState:render.pipeline->depth_stencil];
+ }
+
+ if (render.dirty.has_flag(RenderState::DIRTY_RASTER)) {
+ render.pipeline->raster_state.apply(render.encoder);
+ }
+
+ if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) {
+ size_t len = render.scissors.size();
+ MTLScissorRect rects[len];
+ for (size_t i = 0; i < len; i++) {
+ rects[i] = render.clip_to_render_area(render.scissors[i]);
+ }
+ [render.encoder setScissorRects:rects count:len];
+ }
+
+ if (render.dirty.has_flag(RenderState::DIRTY_BLEND) && render.blend_constants.has_value()) {
+ [render.encoder setBlendColorRed:render.blend_constants->r green:render.blend_constants->g blue:render.blend_constants->b alpha:render.blend_constants->a];
+ }
+
+ if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) {
+ uint32_t p_binding_count = render.vertex_buffers.size();
+ uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
+ [render.encoder setVertexBuffers:render.vertex_buffers.ptr()
+ offsets:render.vertex_offsets.ptr()
+ withRange:NSMakeRange(first, p_binding_count)];
+ }
+
+ render.dirty.clear();
+}
+
+void MDCommandBuffer::render_set_viewport(VectorView<Rect2i> p_viewports) {
+ render.viewports.resize(p_viewports.size());
+ for (uint32_t i = 0; i < p_viewports.size(); i += 1) {
+ Rect2i const &vp = p_viewports[i];
+ render.viewports[i] = {
+ .originX = static_cast<double>(vp.position.x),
+ .originY = static_cast<double>(vp.position.y),
+ .width = static_cast<double>(vp.size.width),
+ .height = static_cast<double>(vp.size.height),
+ .znear = 0.0,
+ .zfar = 1.0,
+ };
+ }
+
+ render.dirty.set_flag(RenderState::DIRTY_VIEWPORT);
+}
+
+void MDCommandBuffer::render_set_scissor(VectorView<Rect2i> p_scissors) {
+ render.scissors.resize(p_scissors.size());
+ for (uint32_t i = 0; i < p_scissors.size(); i += 1) {
+ Rect2i const &vp = p_scissors[i];
+ render.scissors[i] = {
+ .x = static_cast<NSUInteger>(vp.position.x),
+ .y = static_cast<NSUInteger>(vp.position.y),
+ .width = static_cast<NSUInteger>(vp.size.width),
+ .height = static_cast<NSUInteger>(vp.size.height),
+ };
+ }
+
+ render.dirty.set_flag(RenderState::DIRTY_SCISSOR);
+}
+
+void MDCommandBuffer::render_set_blend_constants(const Color &p_constants) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+ if (render.blend_constants != p_constants) {
+ render.blend_constants = p_constants;
+ render.dirty.set_flag(RenderState::DIRTY_BLEND);
+ }
+}
+
+void MDCommandBuffer::_render_bind_uniform_sets() {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+ if (!render.dirty.has_flag(RenderState::DIRTY_UNIFORMS)) {
+ return;
+ }
+
+ render.dirty.clear_flag(RenderState::DIRTY_UNIFORMS);
+ uint64_t set_uniforms = render.uniform_set_mask;
+ render.uniform_set_mask = 0;
+
+ id<MTLRenderCommandEncoder> enc = render.encoder;
+ MDRenderShader *shader = render.pipeline->shader;
+ id<MTLDevice> device = enc.device;
+
+ while (set_uniforms != 0) {
+ // Find the index of the next set bit.
+ int index = __builtin_ctzll(set_uniforms);
+ // Clear the set bit.
+ set_uniforms &= ~(1ULL << index);
+ MDUniformSet *set = render.uniform_sets[index];
+ if (set == nullptr || set->index >= (uint32_t)shader->sets.size()) {
+ continue;
+ }
+ UniformSet const &set_info = shader->sets[set->index];
+
+ BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
+
+ for (KeyValue<id<MTLResource>, StageResourceUsage> const &keyval : bus.bound_resources) {
+ MTLResourceUsage usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_VERTEX);
+ if (usage != 0) {
+ [enc useResource:keyval.key usage:usage stages:MTLRenderStageVertex];
+ }
+ usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_FRAGMENT);
+ if (usage != 0) {
+ [enc useResource:keyval.key usage:usage stages:MTLRenderStageFragment];
+ }
+ }
+
+ // Set the buffer for the vertex stage.
+ {
+ uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX);
+ if (offset) {
+ [enc setVertexBuffer:bus.buffer offset:*offset atIndex:set->index];
+ }
+ }
+ // Set the buffer for the fragment stage.
+ {
+ uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT);
+ if (offset) {
+ [enc setFragmentBuffer:bus.buffer offset:*offset atIndex:set->index];
+ }
+ }
+ }
+}
+
+void MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects) {
+ uint32_t idx = 0;
+ for (uint32_t i = 0; i < p_rects.size(); i++) {
+ Rect2i const &rect = p_rects[i];
+ idx = _populate_vertices(p_vertices, idx, rect, p_fb_size);
+ }
+}
+
+uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size) {
+ // Determine the positions of the four edges of the
+ // clear rectangle as a fraction of the attachment size.
+ float leftPos = (float)(p_rect.position.x) / (float)p_fb_size.width;
+ float rightPos = (float)(p_rect.size.width) / (float)p_fb_size.width + leftPos;
+ float bottomPos = (float)(p_rect.position.y) / (float)p_fb_size.height;
+ float topPos = (float)(p_rect.size.height) / (float)p_fb_size.height + bottomPos;
+
+ // Transform to clip-space coordinates, which are bounded by (-1.0 < p < 1.0) in clip-space.
+ leftPos = (leftPos * 2.0f) - 1.0f;
+ rightPos = (rightPos * 2.0f) - 1.0f;
+ bottomPos = (bottomPos * 2.0f) - 1.0f;
+ topPos = (topPos * 2.0f) - 1.0f;
+
+ simd::float4 vtx;
+
+ uint32_t idx = p_index;
+ vtx.z = 0.0;
+ vtx.w = (float)1;
+
+ // Top left vertex - First triangle.
+ vtx.y = topPos;
+ vtx.x = leftPos;
+ p_vertices[idx++] = vtx;
+
+ // Bottom left vertex.
+ vtx.y = bottomPos;
+ vtx.x = leftPos;
+ p_vertices[idx++] = vtx;
+
+ // Bottom right vertex.
+ vtx.y = bottomPos;
+ vtx.x = rightPos;
+ p_vertices[idx++] = vtx;
+
+ // Bottom right vertex - Second triangle.
+ p_vertices[idx++] = vtx;
+
+ // Top right vertex.
+ vtx.y = topPos;
+ vtx.x = rightPos;
+ p_vertices[idx++] = vtx;
+
+ // Top left vertex.
+ vtx.y = topPos;
+ vtx.x = leftPos;
+ p_vertices[idx++] = vtx;
+
+ return idx;
+}
+
+void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RDD::RenderPassClearValue> p_clear_values) {
+ DEV_ASSERT(commandBuffer != nil);
+ end();
+
+ MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
+ MDFrameBuffer *fb = (MDFrameBuffer *)(p_frameBuffer.id);
+
+ type = MDCommandBufferStateType::Render;
+ render.pass = pass;
+ render.current_subpass = UINT32_MAX;
+ render.render_area = p_rect;
+ render.clear_values.resize(p_clear_values.size());
+ for (uint32_t i = 0; i < p_clear_values.size(); i++) {
+ render.clear_values[i] = p_clear_values[i];
+ }
+ render.is_rendering_entire_area = (p_rect.position == Point2i(0, 0)) && p_rect.size == fb->size;
+ render.frameBuffer = fb;
+ render_next_subpass();
+}
+
+void MDCommandBuffer::_end_render_pass() {
+ MDFrameBuffer const &fb_info = *render.frameBuffer;
+ MDRenderPass const &pass_info = *render.pass;
+ MDSubpass const &subpass = pass_info.subpasses[render.current_subpass];
+
+ PixelFormats &pf = device_driver->get_pixel_formats();
+
+ for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) {
+ uint32_t color_index = subpass.color_references[i].attachment;
+ uint32_t resolve_index = subpass.resolve_references[i].attachment;
+ DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED));
+ if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.textures[color_index]) {
+ continue;
+ }
+
+ id<MTLTexture> resolve_tex = fb_info.textures[resolve_index];
+
+ CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve), "not implemented: unresolvable texture types");
+ // see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407
+ }
+
+ [render.encoder endEncoding];
+ render.encoder = nil;
+}
+
+void MDCommandBuffer::_render_clear_render_area() {
+ MDRenderPass const &pass = *render.pass;
+ MDSubpass const &subpass = pass.subpasses[render.current_subpass];
+
+ // First determine attachments that should be cleared.
+ LocalVector<RDD::AttachmentClear> clears;
+ clears.reserve(subpass.color_references.size() + /* possible depth stencil clear */ 1);
+
+ for (uint32_t i = 0; i < subpass.color_references.size(); i++) {
+ uint32_t idx = subpass.color_references[i].attachment;
+ if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) {
+ clears.push_back({ .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = render.clear_values[idx] });
+ }
+ }
+ uint32_t ds_index = subpass.depth_stencil_reference.attachment;
+ MDAttachment const &attachment = pass.attachments[ds_index];
+ bool shouldClearDepth = (ds_index != RDD::AttachmentReference::UNUSED && attachment.shouldClear(subpass, false));
+ bool shouldClearStencil = (ds_index != RDD::AttachmentReference::UNUSED && attachment.shouldClear(subpass, true));
+ if (shouldClearDepth || shouldClearStencil) {
+ BitField<RDD::TextureAspectBits> bits;
+ if (shouldClearDepth && attachment.type & MDAttachmentType::Depth) {
+ bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT);
+ }
+ if (shouldClearStencil && attachment.type & MDAttachmentType::Stencil) {
+ bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT);
+ }
+
+ clears.push_back({ .aspect = bits, .color_attachment = ds_index, .value = render.clear_values[ds_index] });
+ }
+
+ if (clears.is_empty()) {
+ return;
+ }
+
+ render_clear_attachments(clears, { render.render_area });
+}
+
+void MDCommandBuffer::render_next_subpass() {
+ DEV_ASSERT(commandBuffer != nil);
+
+ if (render.current_subpass == UINT32_MAX) {
+ render.current_subpass = 0;
+ } else {
+ _end_render_pass();
+ render.current_subpass++;
+ }
+
+ MDFrameBuffer const &fb = *render.frameBuffer;
+ MDRenderPass const &pass = *render.pass;
+ MDSubpass const &subpass = pass.subpasses[render.current_subpass];
+
+ MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor;
+ PixelFormats &pf = device_driver->get_pixel_formats();
+
+ uint32_t attachmentCount = 0;
+ for (uint32_t i = 0; i < subpass.color_references.size(); i++) {
+ uint32_t idx = subpass.color_references[i].attachment;
+ if (idx == RDD::AttachmentReference::UNUSED) {
+ continue;
+ }
+
+ attachmentCount += 1;
+ MTLRenderPassColorAttachmentDescriptor *ca = desc.colorAttachments[i];
+
+ uint32_t resolveIdx = subpass.resolve_references.is_empty() ? RDD::AttachmentReference::UNUSED : subpass.resolve_references[i].attachment;
+ bool has_resolve = resolveIdx != RDD::AttachmentReference::UNUSED;
+ bool can_resolve = true;
+ if (resolveIdx != RDD::AttachmentReference::UNUSED) {
+ id<MTLTexture> resolve_tex = fb.textures[resolveIdx];
+ can_resolve = flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve);
+ if (can_resolve) {
+ ca.resolveTexture = resolve_tex;
+ } else {
+ CRASH_NOW_MSG("unimplemented: using a texture format that is not supported for resolve");
+ }
+ }
+
+ MDAttachment const &attachment = pass.attachments[idx];
+
+ id<MTLTexture> tex = fb.textures[idx];
+ if ((attachment.type & MDAttachmentType::Color)) {
+ if (attachment.configureDescriptor(ca, pf, subpass, tex, render.is_rendering_entire_area, has_resolve, can_resolve, false)) {
+ Color clearColor = render.clear_values[idx].color;
+ ca.clearColor = MTLClearColorMake(clearColor.r, clearColor.g, clearColor.b, clearColor.a);
+ }
+ }
+ }
+
+ if (subpass.depth_stencil_reference.attachment != RDD::AttachmentReference::UNUSED) {
+ attachmentCount += 1;
+ uint32_t idx = subpass.depth_stencil_reference.attachment;
+ MDAttachment const &attachment = pass.attachments[idx];
+ id<MTLTexture> tex = fb.textures[idx];
+ if (attachment.type & MDAttachmentType::Depth) {
+ MTLRenderPassDepthAttachmentDescriptor *da = desc.depthAttachment;
+ if (attachment.configureDescriptor(da, pf, subpass, tex, render.is_rendering_entire_area, false, false, false)) {
+ da.clearDepth = render.clear_values[idx].depth;
+ }
+ }
+
+ if (attachment.type & MDAttachmentType::Stencil) {
+ MTLRenderPassStencilAttachmentDescriptor *sa = desc.stencilAttachment;
+ if (attachment.configureDescriptor(sa, pf, subpass, tex, render.is_rendering_entire_area, false, false, true)) {
+ sa.clearStencil = render.clear_values[idx].stencil;
+ }
+ }
+ }
+
+ desc.renderTargetWidth = MAX((NSUInteger)MIN(render.render_area.position.x + render.render_area.size.width, fb.size.width), 1u);
+ desc.renderTargetHeight = MAX((NSUInteger)MIN(render.render_area.position.y + render.render_area.size.height, fb.size.height), 1u);
+
+ if (attachmentCount == 0) {
+ // If there are no attachments, delay the creation of the encoder,
+ // so we can use a matching sample count for the pipeline, by setting
+ // the defaultRasterSampleCount from the pipeline's sample count.
+ render.desc = desc;
+ } else {
+ render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc];
+
+ if (!render.is_rendering_entire_area) {
+ _render_clear_render_area();
+ }
+ // With a new encoder, all state is dirty.
+ render.dirty.set_flag(RenderState::DIRTY_ALL);
+ }
+}
+
+void MDCommandBuffer::render_draw(uint32_t p_vertex_count,
+ uint32_t p_instance_count,
+ uint32_t p_base_vertex,
+ uint32_t p_first_instance) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+ _render_set_dirty_state();
+
+ DEV_ASSERT(render.dirty == 0);
+
+ id<MTLRenderCommandEncoder> enc = render.encoder;
+
+ [enc drawPrimitives:render.pipeline->raster_state.render_primitive
+ vertexStart:p_base_vertex
+ vertexCount:p_vertex_count
+ instanceCount:p_instance_count
+ baseInstance:p_first_instance];
+}
+
+void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+
+ render.vertex_buffers.resize(p_binding_count);
+ render.vertex_offsets.resize(p_binding_count);
+
+ // Reverse the buffers, as their bindings are assigned in descending order.
+ for (uint32_t i = 0; i < p_binding_count; i += 1) {
+ render.vertex_buffers[i] = rid::get(p_buffers[p_binding_count - i - 1]);
+ render.vertex_offsets[i] = p_offsets[p_binding_count - i - 1];
+ }
+
+ if (render.encoder) {
+ uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
+ [render.encoder setVertexBuffers:render.vertex_buffers.ptr()
+ offsets:render.vertex_offsets.ptr()
+ withRange:NSMakeRange(first, p_binding_count)];
+ } else {
+ render.dirty.set_flag(RenderState::DIRTY_VERTEX);
+ }
+}
+
+void MDCommandBuffer::render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+
+ render.index_buffer = rid::get(p_buffer);
+ render.index_type = p_format == RDD::IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32;
+ render.index_offset = p_offset;
+}
+
+void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count,
+ uint32_t p_instance_count,
+ uint32_t p_first_index,
+ int32_t p_vertex_offset,
+ uint32_t p_first_instance) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+ _render_set_dirty_state();
+
+ id<MTLRenderCommandEncoder> enc = render.encoder;
+
+ uint32_t index_offset = render.index_offset;
+ index_offset += p_first_index * (render.index_type == MTLIndexTypeUInt16 ? sizeof(uint16_t) : sizeof(uint32_t));
+
+ [enc drawIndexedPrimitives:render.pipeline->raster_state.render_primitive
+ indexCount:p_index_count
+ indexType:render.index_type
+ indexBuffer:render.index_buffer
+ indexBufferOffset:index_offset
+ instanceCount:p_instance_count
+ baseVertex:p_vertex_offset
+ baseInstance:p_first_instance];
+}
+
+void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+ _render_set_dirty_state();
+
+ id<MTLRenderCommandEncoder> enc = render.encoder;
+
+ id<MTLBuffer> indirect_buffer = rid::get(p_indirect_buffer);
+ NSUInteger indirect_offset = p_offset;
+
+ for (uint32_t i = 0; i < p_draw_count; i++) {
+ [enc drawIndexedPrimitives:render.pipeline->raster_state.render_primitive
+ indexType:render.index_type
+ indexBuffer:render.index_buffer
+ indexBufferOffset:0
+ indirectBuffer:indirect_buffer
+ indirectBufferOffset:indirect_offset];
+ indirect_offset += p_stride;
+ }
+}
+
+void MDCommandBuffer::render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
+ ERR_FAIL_MSG("not implemented");
+}
+
+void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+ _render_set_dirty_state();
+
+ id<MTLRenderCommandEncoder> enc = render.encoder;
+
+ id<MTLBuffer> indirect_buffer = rid::get(p_indirect_buffer);
+ NSUInteger indirect_offset = p_offset;
+
+ for (uint32_t i = 0; i < p_draw_count; i++) {
+ [enc drawPrimitives:render.pipeline->raster_state.render_primitive
+ indirectBuffer:indirect_buffer
+ indirectBufferOffset:indirect_offset];
+ indirect_offset += p_stride;
+ }
+}
+
+void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
+ ERR_FAIL_MSG("not implemented");
+}
+
+void MDCommandBuffer::render_end_pass() {
+ DEV_ASSERT(type == MDCommandBufferStateType::Render);
+
+ [render.encoder endEncoding];
+ render.reset();
+ type = MDCommandBufferStateType::None;
+}
+
+#pragma mark - Compute
+
+void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Compute);
+
+ id<MTLComputeCommandEncoder> enc = compute.encoder;
+ id<MTLDevice> device = enc.device;
+
+ MDShader *shader = (MDShader *)(p_shader.id);
+ UniformSet const &set_info = shader->sets[p_set_index];
+
+ MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
+ BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
+
+ for (KeyValue<id<MTLResource>, StageResourceUsage> &keyval : bus.bound_resources) {
+ MTLResourceUsage usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_COMPUTE);
+ if (usage != 0) {
+ [enc useResource:keyval.key usage:usage];
+ }
+ }
+
+ uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
+ if (offset) {
+ [enc setBuffer:bus.buffer offset:*offset atIndex:p_set_index];
+ }
+}
+
+void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Compute);
+
+ MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups);
+
+ id<MTLComputeCommandEncoder> enc = compute.encoder;
+ [enc dispatchThreadgroups:region.size threadsPerThreadgroup:compute.pipeline->compute_state.local];
+}
+
+void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) {
+ DEV_ASSERT(type == MDCommandBufferStateType::Compute);
+
+ id<MTLBuffer> indirectBuffer = rid::get(p_indirect_buffer);
+
+ id<MTLComputeCommandEncoder> enc = compute.encoder;
+ [enc dispatchThreadgroupsWithIndirectBuffer:indirectBuffer indirectBufferOffset:p_offset threadsPerThreadgroup:compute.pipeline->compute_state.local];
+}
+
+void MDCommandBuffer::_end_compute_dispatch() {
+ DEV_ASSERT(type == MDCommandBufferStateType::Compute);
+
+ [compute.encoder endEncoding];
+ compute.reset();
+ type = MDCommandBufferStateType::None;
+}
+
+void MDCommandBuffer::_end_blit() {
+ DEV_ASSERT(type == MDCommandBufferStateType::Blit);
+
+ [blit.encoder endEncoding];
+ blit.reset();
+ type = MDCommandBufferStateType::None;
+}
+
+MDComputeShader::MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel) :
+ MDShader(p_name, p_sets), kernel(p_kernel) {
+}
+
+void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
+ DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute);
+ if (push_constants.binding == (uint32_t)-1) {
+ return;
+ }
+
+ id<MTLComputeCommandEncoder> enc = p_cb->compute.encoder;
+
+ void const *ptr = p_data.ptr();
+ size_t length = p_data.size() * sizeof(uint32_t);
+
+ [enc setBytes:ptr length:length atIndex:push_constants.binding];
+}
+
+MDRenderShader::MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) :
+ MDShader(p_name, p_sets), vert(p_vert), frag(p_frag) {
+}
+
+void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
+ DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
+ id<MTLRenderCommandEncoder> enc = p_cb->render.encoder;
+
+ void const *ptr = p_data.ptr();
+ size_t length = p_data.size() * sizeof(uint32_t);
+
+ if (push_constants.vert.binding > -1) {
+ [enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding];
+ }
+
+ if (push_constants.frag.binding > -1) {
+ [enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding];
+ }
+}
+
+BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device) {
+ BoundUniformSet *sus = bound_uniforms.getptr(p_shader);
+ if (sus != nullptr) {
+ return *sus;
+ }
+
+ UniformSet const &set = p_shader->sets[index];
+
+ HashMap<id<MTLResource>, StageResourceUsage> bound_resources;
+ auto add_usage = [&bound_resources](id<MTLResource> __unsafe_unretained res, RDD::ShaderStage stage, MTLResourceUsage usage) {
+ StageResourceUsage *sru = bound_resources.getptr(res);
+ if (sru == nullptr) {
+ bound_resources.insert(res, stage_resource_usage(stage, usage));
+ } else {
+ *sru |= stage_resource_usage(stage, usage);
+ }
+ };
+ id<MTLBuffer> enc_buffer = nil;
+ if (set.buffer_size > 0) {
+ MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceHazardTrackingModeTracked;
+ enc_buffer = [p_device newBufferWithLength:set.buffer_size options:options];
+ for (KeyValue<RDC::ShaderStage, id<MTLArgumentEncoder>> const &kv : set.encoders) {
+ RDD::ShaderStage const stage = kv.key;
+ ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage);
+ id<MTLArgumentEncoder> const enc = kv.value;
+
+ [enc setArgumentBuffer:enc_buffer offset:set.offsets[stage]];
+
+ for (uint32_t i = 0; i < uniforms.size(); i++) {
+ RDD::BoundUniform const &uniform = uniforms[i];
+ UniformInfo ui = set.uniforms[i];
+
+ BindingInfo *bi = ui.bindings.getptr(stage);
+ if (bi == nullptr) {
+ // No binding for this stage.
+ continue;
+ }
+
+ if ((ui.active_stages & stage_usage) == 0) {
+ // Not active for this state, so don't bind anything.
+ continue;
+ }
+
+ switch (uniform.type) {
+ case RDD::UNIFORM_TYPE_SAMPLER: {
+ size_t count = uniform.ids.size();
+ id<MTLSamplerState> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
+ for (size_t j = 0; j < count; j += 1) {
+ objects[j] = rid::get(uniform.ids[j].id);
+ }
+ [enc setSamplerStates:objects withRange:NSMakeRange(bi->index, count)];
+ } break;
+ case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
+ size_t count = uniform.ids.size() / 2;
+ id<MTLTexture> __unsafe_unretained *textures = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+ id<MTLSamplerState> __unsafe_unretained *samplers = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
+ for (uint32_t j = 0; j < count; j += 1) {
+ id<MTLSamplerState> sampler = rid::get(uniform.ids[j * 2 + 0]);
+ id<MTLTexture> texture = rid::get(uniform.ids[j * 2 + 1]);
+ samplers[j] = sampler;
+ textures[j] = texture;
+ add_usage(texture, stage, bi->usage);
+ }
+ BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
+ if (sbi) {
+ [enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
+ }
+ [enc setTextures:textures
+ withRange:NSMakeRange(bi->index, count)];
+ } break;
+ case RDD::UNIFORM_TYPE_TEXTURE: {
+ size_t count = uniform.ids.size();
+ if (count == 1) {
+ id<MTLTexture> obj = rid::get(uniform.ids[0]);
+ [enc setTexture:obj atIndex:bi->index];
+ add_usage(obj, stage, bi->usage);
+ } else {
+ id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+ for (size_t j = 0; j < count; j += 1) {
+ id<MTLTexture> obj = rid::get(uniform.ids[j]);
+ objects[j] = obj;
+ add_usage(obj, stage, bi->usage);
+ }
+ [enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
+ }
+ } break;
+ case RDD::UNIFORM_TYPE_IMAGE: {
+ size_t count = uniform.ids.size();
+ if (count == 1) {
+ id<MTLTexture> obj = rid::get(uniform.ids[0]);
+ [enc setTexture:obj atIndex:bi->index];
+ add_usage(obj, stage, bi->usage);
+ BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
+ if (sbi) {
+ id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj;
+ id<MTLBuffer> buf = tex.buffer;
+ if (buf) {
+ [enc setBuffer:buf offset:tex.bufferOffset atIndex:sbi->index];
+ }
+ }
+ } else {
+ id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+ for (size_t j = 0; j < count; j += 1) {
+ id<MTLTexture> obj = rid::get(uniform.ids[j]);
+ objects[j] = obj;
+ add_usage(obj, stage, bi->usage);
+ }
+ [enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
+ }
+ } break;
+ case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: {
+ ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
+ } break;
+ case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
+ ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
+ } break;
+ case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
+ CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
+ } break;
+ case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
+ id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
+ [enc setBuffer:buffer offset:0 atIndex:bi->index];
+ add_usage(buffer, stage, bi->usage);
+ } break;
+ case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
+ id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
+ [enc setBuffer:buffer offset:0 atIndex:bi->index];
+ add_usage(buffer, stage, bi->usage);
+ } break;
+ case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
+ size_t count = uniform.ids.size();
+ if (count == 1) {
+ id<MTLTexture> obj = rid::get(uniform.ids[0]);
+ [enc setTexture:obj atIndex:bi->index];
+ add_usage(obj, stage, bi->usage);
+ } else {
+ id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+ for (size_t j = 0; j < count; j += 1) {
+ id<MTLTexture> obj = rid::get(uniform.ids[j]);
+ objects[j] = obj;
+ add_usage(obj, stage, bi->usage);
+ }
+ [enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
+ }
+ } break;
+ default: {
+ DEV_ASSERT(false);
+ }
+ }
+ }
+ }
+ }
+
+ BoundUniformSet bs = { .buffer = enc_buffer, .bound_resources = bound_resources };
+ bound_uniforms.insert(p_shader, bs);
+ return bound_uniforms.get(p_shader);
+}
+
+MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const {
+ MTLFmtCaps caps = kMTLFmtCapsNone;
+
+ for (RDD::AttachmentReference const &ar : input_references) {
+ if (ar.attachment == p_index) {
+ flags::set(caps, kMTLFmtCapsRead);
+ break;
+ }
+ }
+
+ for (RDD::AttachmentReference const &ar : color_references) {
+ if (ar.attachment == p_index) {
+ flags::set(caps, kMTLFmtCapsColorAtt);
+ break;
+ }
+ }
+
+ for (RDD::AttachmentReference const &ar : resolve_references) {
+ if (ar.attachment == p_index) {
+ flags::set(caps, kMTLFmtCapsResolve);
+ break;
+ }
+ }
+
+ if (depth_stencil_reference.attachment == p_index) {
+ flags::set(caps, kMTLFmtCapsDSAtt);
+ }
+
+ return caps;
+}
+
+void MDAttachment::linkToSubpass(const MDRenderPass &p_pass) {
+ firstUseSubpassIndex = UINT32_MAX;
+ lastUseSubpassIndex = 0;
+
+ for (MDSubpass const &subpass : p_pass.subpasses) {
+ MTLFmtCaps reqCaps = subpass.getRequiredFmtCapsForAttachmentAt(index);
+ if (reqCaps) {
+ firstUseSubpassIndex = MIN(subpass.subpass_index, firstUseSubpassIndex);
+ lastUseSubpassIndex = MAX(subpass.subpass_index, lastUseSubpassIndex);
+ }
+ }
+}
+
+MTLStoreAction MDAttachment::getMTLStoreAction(MDSubpass const &p_subpass,
+ bool p_is_rendering_entire_area,
+ bool p_has_resolve,
+ bool p_can_resolve,
+ bool p_is_stencil) const {
+ if (!p_is_rendering_entire_area || !isLastUseOf(p_subpass)) {
+ return p_has_resolve && p_can_resolve ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore;
+ }
+
+ switch (p_is_stencil ? stencilStoreAction : storeAction) {
+ case MTLStoreActionStore:
+ return p_has_resolve && p_can_resolve ? MTLStoreActionStoreAndMultisampleResolve : MTLStoreActionStore;
+ case MTLStoreActionDontCare:
+ return p_has_resolve ? (p_can_resolve ? MTLStoreActionMultisampleResolve : MTLStoreActionStore) : MTLStoreActionDontCare;
+
+ default:
+ return MTLStoreActionStore;
+ }
+}
+
+bool MDAttachment::configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc,
+ PixelFormats &p_pf,
+ MDSubpass const &p_subpass,
+ id<MTLTexture> p_attachment,
+ bool p_is_rendering_entire_area,
+ bool p_has_resolve,
+ bool p_can_resolve,
+ bool p_is_stencil) const {
+ p_desc.texture = p_attachment;
+
+ MTLLoadAction load;
+ if (!p_is_rendering_entire_area || !isFirstUseOf(p_subpass)) {
+ load = MTLLoadActionLoad;
+ } else {
+ load = p_is_stencil ? stencilLoadAction : loadAction;
+ }
+
+ p_desc.loadAction = load;
+
+ MTLPixelFormat mtlFmt = p_attachment.pixelFormat;
+ bool isDepthFormat = p_pf.isDepthFormat(mtlFmt);
+ bool isStencilFormat = p_pf.isStencilFormat(mtlFmt);
+ if (isStencilFormat && !p_is_stencil && !isDepthFormat) {
+ p_desc.storeAction = MTLStoreActionDontCare;
+ } else {
+ p_desc.storeAction = getMTLStoreAction(p_subpass, p_is_rendering_entire_area, p_has_resolve, p_can_resolve, p_is_stencil);
+ }
+
+ return load == MTLLoadActionClear;
+}
+
+bool MDAttachment::shouldClear(const MDSubpass &p_subpass, bool p_is_stencil) const {
+ // If the subpass is not the first subpass to use this attachment, don't clear this attachment.
+ if (p_subpass.subpass_index != firstUseSubpassIndex) {
+ return false;
+ }
+ return (p_is_stencil ? stencilLoadAction : loadAction) == MTLLoadActionClear;
+}
+
+MDRenderPass::MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses) :
+ attachments(p_attachments), subpasses(p_subpasses) {
+ for (MDAttachment &att : attachments) {
+ att.linkToSubpass(*this);
+ }
+}
+
+#pragma mark - Resource Factory
+
+id<MTLFunction> MDResourceFactory::new_func(NSString *p_source, NSString *p_name, NSError **p_error) {
+ @autoreleasepool {
+ NSError *err = nil;
+ MTLCompileOptions *options = [MTLCompileOptions new];
+ id<MTLDevice> device = device_driver->get_device();
+ id<MTLLibrary> mtlLib = [device newLibraryWithSource:p_source
+ options:options
+ error:&err];
+ if (err) {
+ if (p_error != nil) {
+ *p_error = err;
+ }
+ }
+ return [mtlLib newFunctionWithName:p_name];
+ }
+}
+
+id<MTLFunction> MDResourceFactory::new_clear_vert_func(ClearAttKey &p_key) {
+ @autoreleasepool {
+ NSString *msl = [NSString stringWithFormat:@R"(
+#include <metal_stdlib>
+using namespace metal;
+
+typedef struct {
+ float4 a_position [[attribute(0)]];
+} AttributesPos;
+
+typedef struct {
+ float4 colors[9];
+} ClearColorsIn;
+
+typedef struct {
+ float4 v_position [[position]];
+ uint layer;
+} VaryingsPos;
+
+vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) {
+ VaryingsPos varyings;
+ varyings.v_position = float4(attributes.a_position.x, -attributes.a_position.y, ccIn.colors[%d].r, 1.0);
+ varyings.layer = uint(attributes.a_position.w);
+ return varyings;
+}
+)",
+ ClearAttKey::DEPTH_INDEX];
+
+ return new_func(msl, @"vertClear", nil);
+ }
+}
+
+id<MTLFunction> MDResourceFactory::new_clear_frag_func(ClearAttKey &p_key) {
+ @autoreleasepool {
+ NSMutableString *msl = [NSMutableString stringWithCapacity:2048];
+
+ [msl appendFormat:@R"(
+#include <metal_stdlib>
+using namespace metal;
+
+typedef struct {
+ float4 v_position [[position]];
+} VaryingsPos;
+
+typedef struct {
+ float4 colors[9];
+} ClearColorsIn;
+
+typedef struct {
+)"];
+
+ for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {
+ if (p_key.is_enabled(caIdx)) {
+ NSString *typeStr = get_format_type_string((MTLPixelFormat)p_key.pixel_formats[caIdx]);
+ [msl appendFormat:@" %@4 color%u [[color(%u)]];\n", typeStr, caIdx, caIdx];
+ }
+ }
+ [msl appendFormat:@R"(} ClearColorsOut;
+
+fragment ClearColorsOut fragClear(VaryingsPos varyings [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) {
+
+ ClearColorsOut ccOut;
+)"];
+ for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {
+ if (p_key.is_enabled(caIdx)) {
+ NSString *typeStr = get_format_type_string((MTLPixelFormat)p_key.pixel_formats[caIdx]);
+ [msl appendFormat:@" ccOut.color%u = %@4(ccIn.colors[%u]);\n", caIdx, typeStr, caIdx];
+ }
+ }
+ [msl appendString:@R"( return ccOut;
+})"];
+
+ return new_func(msl, @"fragClear", nil);
+ }
+}
+
+NSString *MDResourceFactory::get_format_type_string(MTLPixelFormat p_fmt) {
+ switch (device_driver->get_pixel_formats().getFormatType(p_fmt)) {
+ case MTLFormatType::ColorInt8:
+ case MTLFormatType::ColorInt16:
+ return @"short";
+ case MTLFormatType::ColorUInt8:
+ case MTLFormatType::ColorUInt16:
+ return @"ushort";
+ case MTLFormatType::ColorInt32:
+ return @"int";
+ case MTLFormatType::ColorUInt32:
+ return @"uint";
+ case MTLFormatType::ColorHalf:
+ return @"half";
+ case MTLFormatType::ColorFloat:
+ case MTLFormatType::DepthStencil:
+ case MTLFormatType::Compressed:
+ return @"float";
+ case MTLFormatType::None:
+ return @"unexpected_MTLPixelFormatInvalid";
+ }
+}
+
+id<MTLDepthStencilState> MDResourceFactory::new_depth_stencil_state(bool p_use_depth, bool p_use_stencil) {
+ MTLDepthStencilDescriptor *dsDesc = [MTLDepthStencilDescriptor new];
+ dsDesc.depthCompareFunction = MTLCompareFunctionAlways;
+ dsDesc.depthWriteEnabled = p_use_depth;
+
+ if (p_use_stencil) {
+ MTLStencilDescriptor *sDesc = [MTLStencilDescriptor new];
+ sDesc.stencilCompareFunction = MTLCompareFunctionAlways;
+ sDesc.stencilFailureOperation = MTLStencilOperationReplace;
+ sDesc.depthFailureOperation = MTLStencilOperationReplace;
+ sDesc.depthStencilPassOperation = MTLStencilOperationReplace;
+
+ dsDesc.frontFaceStencil = sDesc;
+ dsDesc.backFaceStencil = sDesc;
+ } else {
+ dsDesc.frontFaceStencil = nil;
+ dsDesc.backFaceStencil = nil;
+ }
+
+ return [device_driver->get_device() newDepthStencilStateWithDescriptor:dsDesc];
+}
+
+id<MTLRenderPipelineState> MDResourceFactory::new_clear_pipeline_state(ClearAttKey &p_key, NSError **p_error) {
+ PixelFormats &pixFmts = device_driver->get_pixel_formats();
+
+ id<MTLFunction> vtxFunc = new_clear_vert_func(p_key);
+ id<MTLFunction> fragFunc = new_clear_frag_func(p_key);
+ MTLRenderPipelineDescriptor *plDesc = [MTLRenderPipelineDescriptor new];
+ plDesc.label = @"ClearRenderAttachments";
+ plDesc.vertexFunction = vtxFunc;
+ plDesc.fragmentFunction = fragFunc;
+ plDesc.rasterSampleCount = p_key.sample_count;
+ plDesc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle;
+
+ for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {
+ MTLRenderPipelineColorAttachmentDescriptor *colorDesc = plDesc.colorAttachments[caIdx];
+ colorDesc.pixelFormat = (MTLPixelFormat)p_key.pixel_formats[caIdx];
+ colorDesc.writeMask = p_key.is_enabled(caIdx) ? MTLColorWriteMaskAll : MTLColorWriteMaskNone;
+ }
+
+ MTLPixelFormat mtlDepthFormat = p_key.depth_format();
+ if (pixFmts.isDepthFormat(mtlDepthFormat)) {
+ plDesc.depthAttachmentPixelFormat = mtlDepthFormat;
+ }
+
+ MTLPixelFormat mtlStencilFormat = p_key.stencil_format();
+ if (pixFmts.isStencilFormat(mtlStencilFormat)) {
+ plDesc.stencilAttachmentPixelFormat = mtlStencilFormat;
+ }
+
+ MTLVertexDescriptor *vtxDesc = plDesc.vertexDescriptor;
+
+ // Vertex attribute descriptors.
+ MTLVertexAttributeDescriptorArray *vaDescArray = vtxDesc.attributes;
+ MTLVertexAttributeDescriptor *vaDesc;
+ NSUInteger vtxBuffIdx = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX);
+ NSUInteger vtxStride = 0;
+
+ // Vertex location.
+ vaDesc = vaDescArray[0];
+ vaDesc.format = MTLVertexFormatFloat4;
+ vaDesc.bufferIndex = vtxBuffIdx;
+ vaDesc.offset = vtxStride;
+ vtxStride += sizeof(simd::float4);
+
+ // Vertex attribute buffer.
+ MTLVertexBufferLayoutDescriptorArray *vbDescArray = vtxDesc.layouts;
+ MTLVertexBufferLayoutDescriptor *vbDesc = vbDescArray[vtxBuffIdx];
+ vbDesc.stepFunction = MTLVertexStepFunctionPerVertex;
+ vbDesc.stepRate = 1;
+ vbDesc.stride = vtxStride;
+
+ return [device_driver->get_device() newRenderPipelineStateWithDescriptor:plDesc error:p_error];
+}
+
+id<MTLRenderPipelineState> MDResourceCache::get_clear_render_pipeline_state(ClearAttKey &p_key, NSError **p_error) {
+ HashMap::ConstIterator it = clear_states.find(p_key);
+ if (it != clear_states.end()) {
+ return it->value;
+ }
+
+ id<MTLRenderPipelineState> state = resource_factory->new_clear_pipeline_state(p_key, p_error);
+ clear_states[p_key] = state;
+ return state;
+}
+
+id<MTLDepthStencilState> MDResourceCache::get_depth_stencil_state(bool p_use_depth, bool p_use_stencil) {
+ id<MTLDepthStencilState> __strong *val;
+ if (p_use_depth && p_use_stencil) {
+ val = &clear_depth_stencil_state.all;
+ } else if (p_use_depth) {
+ val = &clear_depth_stencil_state.depth_only;
+ } else if (p_use_stencil) {
+ val = &clear_depth_stencil_state.stencil_only;
+ } else {
+ val = &clear_depth_stencil_state.none;
+ }
+ DEV_ASSERT(val != nullptr);
+
+ if (*val == nil) {
+ *val = resource_factory->new_depth_stencil_state(p_use_depth, p_use_stencil);
+ }
+ return *val;
+}
+
+static const char *SHADER_STAGE_NAMES[] = {
+ [RD::SHADER_STAGE_VERTEX] = "vert",
+ [RD::SHADER_STAGE_FRAGMENT] = "frag",
+ [RD::SHADER_STAGE_TESSELATION_CONTROL] = "tess_ctrl",
+ [RD::SHADER_STAGE_TESSELATION_EVALUATION] = "tess_eval",
+ [RD::SHADER_STAGE_COMPUTE] = "comp",
+};
+
+void ShaderCacheEntry::notify_free() const {
+ owner.shader_cache_free_entry(key);
+}
+
+@interface MDLibrary ()
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry;
+@end
+
+/// Loads the MTLLibrary when the library is first accessed.
+@interface MDLazyLibrary : MDLibrary {
+ id<MTLLibrary> _library;
+ NSError *_error;
+ std::shared_mutex _mu;
+ bool _loaded;
+ id<MTLDevice> _device;
+ NSString *_source;
+ MTLCompileOptions *_options;
+}
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+ device:(id<MTLDevice>)device
+ source:(NSString *)source
+ options:(MTLCompileOptions *)options;
+@end
+
+/// Loads the MTLLibrary immediately on initialization, using an asynchronous API.
+@interface MDImmediateLibrary : MDLibrary {
+ id<MTLLibrary> _library;
+ NSError *_error;
+ std::mutex _cv_mutex;
+ std::condition_variable _cv;
+ std::atomic<bool> _complete;
+ bool _ready;
+}
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+ device:(id<MTLDevice>)device
+ source:(NSString *)source
+ options:(MTLCompileOptions *)options;
+@end
+
+@implementation MDLibrary
+
++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry
+ device:(id<MTLDevice>)device
+ source:(NSString *)source
+ options:(MTLCompileOptions *)options
+ strategy:(ShaderLoadStrategy)strategy {
+ switch (strategy) {
+ case ShaderLoadStrategy::DEFAULT:
+ [[fallthrough]];
+ default:
+ return [[MDImmediateLibrary alloc] initWithCacheEntry:entry device:device source:source options:options];
+ case ShaderLoadStrategy::LAZY:
+ return [[MDLazyLibrary alloc] initWithCacheEntry:entry device:device source:source options:options];
+ }
+}
+
+- (id<MTLLibrary>)library {
+ CRASH_NOW_MSG("Not implemented");
+ return nil;
+}
+
+- (NSError *)error {
+ CRASH_NOW_MSG("Not implemented");
+ return nil;
+}
+
+- (void)setLabel:(NSString *)label {
+}
+
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry {
+ self = [super init];
+ _entry = entry;
+ _entry->library = self;
+ return self;
+}
+
+- (void)dealloc {
+ _entry->notify_free();
+}
+
+@end
+
+@implementation MDImmediateLibrary
+
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+ device:(id<MTLDevice>)device
+ source:(NSString *)source
+ options:(MTLCompileOptions *)options {
+ self = [super initWithCacheEntry:entry];
+ _complete = false;
+ _ready = false;
+
+ __block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self;
+ os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile",
+ "shader_name=%{public}s stage=%{public}s hash=%X",
+ entry->name.get_data(), SHADER_STAGE_NAMES[entry->stage], entry->key.short_sha());
+
+ [device newLibraryWithSource:source
+ options:options
+ completionHandler:^(id<MTLLibrary> library, NSError *error) {
+ os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile");
+ self->_library = library;
+ self->_error = error;
+ if (error) {
+ ERR_PRINT(String(U"Error compiling shader %s: %s").format(entry->name.get_data(), error.localizedDescription.UTF8String));
+ }
+
+ {
+ std::lock_guard<std::mutex> lock(self->_cv_mutex);
+ _ready = true;
+ }
+ _cv.notify_all();
+ _complete = true;
+ }];
+ return self;
+}
+
+- (id<MTLLibrary>)library {
+ if (!_complete) {
+ std::unique_lock<std::mutex> lock(_cv_mutex);
+ _cv.wait(lock, [&] { return _ready; });
+ }
+ return _library;
+}
+
+- (NSError *)error {
+ if (!_complete) {
+ std::unique_lock<std::mutex> lock(_cv_mutex);
+ _cv.wait(lock, [&] { return _ready; });
+ }
+ return _error;
+}
+
+@end
+
+@implementation MDLazyLibrary
+- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry
+ device:(id<MTLDevice>)device
+ source:(NSString *)source
+ options:(MTLCompileOptions *)options {
+ self = [super initWithCacheEntry:entry];
+ _device = device;
+ _source = source;
+ _options = options;
+
+ return self;
+}
+
+- (void)load {
+ {
+ std::shared_lock<std::shared_mutex> lock(_mu);
+ if (_loaded) {
+ return;
+ }
+ }
+
+ std::unique_lock<std::shared_mutex> lock(_mu);
+ if (_loaded) {
+ return;
+ }
+
+ __block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self;
+ os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile",
+ "shader_name=%{public}s stage=%{public}s hash=%X",
+ _entry->name.get_data(), SHADER_STAGE_NAMES[_entry->stage], _entry->key.short_sha());
+ NSError *error;
+ _library = [_device newLibraryWithSource:_source options:_options error:&error];
+ os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile");
+ _device = nil;
+ _source = nil;
+ _options = nil;
+ _loaded = true;
+}
+
+- (id<MTLLibrary>)library {
+ [self load];
+ return _library;
+}
+
+- (NSError *)error {
+ [self load];
+ return _error;
+}
+
+@end
diff --git a/drivers/metal/metal_utils.h b/drivers/metal/metal_utils.h
new file mode 100644
index 0000000000..f3ee395d04
--- /dev/null
+++ b/drivers/metal/metal_utils.h
@@ -0,0 +1,101 @@
+/**************************************************************************/
+/* metal_utils.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#ifndef METAL_UTILS_H
+#define METAL_UTILS_H
+
+#import <os/log.h>
+
+#pragma mark - Boolean flags
+
+namespace flags {
+
+/*! Sets the flags within the value parameter specified by the mask parameter. */
+template <typename Tv, typename Tm>
+void set(Tv &p_value, Tm p_mask) {
+ using T = std::underlying_type_t<Tv>;
+ p_value = static_cast<Tv>(static_cast<T>(p_value) | static_cast<T>(p_mask));
+}
+
+/*! Clears the flags within the value parameter specified by the mask parameter. */
+template <typename Tv, typename Tm>
+void clear(Tv &p_value, Tm p_mask) {
+ using T = std::underlying_type_t<Tv>;
+ p_value = static_cast<Tv>(static_cast<T>(p_value) & ~static_cast<T>(p_mask));
+}
+
+/*! Returns whether the specified value has any of the bits specified in mask set to 1. */
+template <typename Tv, typename Tm>
+static constexpr bool any(Tv p_value, const Tm p_mask) { return ((p_value & p_mask) != 0); }
+
+/*! Returns whether the specified value has all of the bits specified in mask set to 1. */
+template <typename Tv, typename Tm>
+static constexpr bool all(Tv p_value, const Tm p_mask) { return ((p_value & p_mask) == p_mask); }
+
+} //namespace flags
+
+#pragma mark - Alignment and Offsets
+
+static constexpr bool is_power_of_two(uint64_t p_value) {
+ return p_value && ((p_value & (p_value - 1)) == 0);
+}
+
+static constexpr uint64_t round_up_to_alignment(uint64_t p_value, uint64_t p_alignment) {
+ DEV_ASSERT(is_power_of_two(p_alignment));
+
+ if (p_alignment == 0) {
+ return p_value;
+ }
+
+ uint64_t mask = p_alignment - 1;
+ uint64_t aligned_value = (p_value + mask) & ~mask;
+
+ return aligned_value;
+}
+
+class Defer {
+public:
+ Defer(std::function<void()> func) :
+ func_(func) {}
+ ~Defer() { func_(); }
+
+private:
+ std::function<void()> func_;
+};
+
+#define CONCAT_INTERNAL(x, y) x##y
+#define CONCAT(x, y) CONCAT_INTERNAL(x, y)
+#define DEFER const Defer &CONCAT(defer__, __LINE__) = Defer
+
+extern os_log_t LOG_DRIVER;
+// Used for dynamic tracing.
+extern os_log_t LOG_INTERVALS;
+
+#endif // METAL_UTILS_H
diff --git a/drivers/metal/pixel_formats.h b/drivers/metal/pixel_formats.h
new file mode 100644
index 0000000000..167c3d5600
--- /dev/null
+++ b/drivers/metal/pixel_formats.h
@@ -0,0 +1,416 @@
+/**************************************************************************/
+/* pixel_formats.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+
+#ifndef PIXEL_FORMATS_H
+#define PIXEL_FORMATS_H
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+
+#import "servers/rendering/rendering_device.h"
+
+#import <Metal/Metal.h>
+
+static const uint32_t _mtlPixelFormatCount = 256;
+static const uint32_t _mtlPixelFormatCoreCount = MTLPixelFormatX32_Stencil8 + 2; // The actual last enum value is not available on iOS.
+static const uint32_t _mtlVertexFormatCount = MTLVertexFormatHalf + 1;
+
+#pragma mark -
+#pragma mark Metal format capabilities
+
+typedef enum : uint16_t {
+
+ kMTLFmtCapsNone = 0,
+ /*! The format can be used in a shader read operation. */
+ kMTLFmtCapsRead = (1 << 0),
+ /*! The format can be used in a shader filter operation during sampling. */
+ kMTLFmtCapsFilter = (1 << 1),
+ /*! The format can be used in a shader write operation. */
+ kMTLFmtCapsWrite = (1 << 2),
+ /*! The format can be used with atomic operations. */
+ kMTLFmtCapsAtomic = (1 << 3),
+ /*! The format can be used as a color attachment. */
+ kMTLFmtCapsColorAtt = (1 << 4),
+ /*! The format can be used as a depth-stencil attachment. */
+ kMTLFmtCapsDSAtt = (1 << 5),
+ /*! The format can be used with blend operations. */
+ kMTLFmtCapsBlend = (1 << 6),
+ /*! The format can be used as a destination for multisample antialias (MSAA) data. */
+ kMTLFmtCapsMSAA = (1 << 7),
+ /*! The format can be used as a resolve attachment. */
+ kMTLFmtCapsResolve = (1 << 8),
+ kMTLFmtCapsVertex = (1 << 9),
+
+ kMTLFmtCapsRF = (kMTLFmtCapsRead | kMTLFmtCapsFilter),
+ kMTLFmtCapsRC = (kMTLFmtCapsRead | kMTLFmtCapsColorAtt),
+ kMTLFmtCapsRCB = (kMTLFmtCapsRC | kMTLFmtCapsBlend),
+ kMTLFmtCapsRCM = (kMTLFmtCapsRC | kMTLFmtCapsMSAA),
+ kMTLFmtCapsRCMB = (kMTLFmtCapsRCM | kMTLFmtCapsBlend),
+ kMTLFmtCapsRWC = (kMTLFmtCapsRC | kMTLFmtCapsWrite),
+ kMTLFmtCapsRWCB = (kMTLFmtCapsRWC | kMTLFmtCapsBlend),
+ kMTLFmtCapsRWCM = (kMTLFmtCapsRWC | kMTLFmtCapsMSAA),
+ kMTLFmtCapsRWCMB = (kMTLFmtCapsRWCM | kMTLFmtCapsBlend),
+ kMTLFmtCapsRFCMRB = (kMTLFmtCapsRCMB | kMTLFmtCapsFilter | kMTLFmtCapsResolve),
+ kMTLFmtCapsRFWCMB = (kMTLFmtCapsRWCMB | kMTLFmtCapsFilter),
+ kMTLFmtCapsAll = (kMTLFmtCapsRFWCMB | kMTLFmtCapsResolve),
+
+ kMTLFmtCapsDRM = (kMTLFmtCapsDSAtt | kMTLFmtCapsRead | kMTLFmtCapsMSAA),
+ kMTLFmtCapsDRFM = (kMTLFmtCapsDRM | kMTLFmtCapsFilter),
+ kMTLFmtCapsDRMR = (kMTLFmtCapsDRM | kMTLFmtCapsResolve),
+ kMTLFmtCapsDRFMR = (kMTLFmtCapsDRFM | kMTLFmtCapsResolve),
+
+ kMTLFmtCapsChromaSubsampling = kMTLFmtCapsRF,
+ kMTLFmtCapsMultiPlanar = kMTLFmtCapsChromaSubsampling,
+} MTLFmtCaps;
+
+inline MTLFmtCaps operator|(MTLFmtCaps p_left, MTLFmtCaps p_right) {
+ return static_cast<MTLFmtCaps>(static_cast<uint32_t>(p_left) | p_right);
+}
+
+inline MTLFmtCaps &operator|=(MTLFmtCaps &p_left, MTLFmtCaps p_right) {
+ return (p_left = p_left | p_right);
+}
+
+#pragma mark -
+#pragma mark Metal view classes
+
+enum class MTLViewClass : uint8_t {
+ None,
+ Color8,
+ Color16,
+ Color32,
+ Color64,
+ Color128,
+ PVRTC_RGB_2BPP,
+ PVRTC_RGB_4BPP,
+ PVRTC_RGBA_2BPP,
+ PVRTC_RGBA_4BPP,
+ EAC_R11,
+ EAC_RG11,
+ EAC_RGBA8,
+ ETC2_RGB8,
+ ETC2_RGB8A1,
+ ASTC_4x4,
+ ASTC_5x4,
+ ASTC_5x5,
+ ASTC_6x5,
+ ASTC_6x6,
+ ASTC_8x5,
+ ASTC_8x6,
+ ASTC_8x8,
+ ASTC_10x5,
+ ASTC_10x6,
+ ASTC_10x8,
+ ASTC_10x10,
+ ASTC_12x10,
+ ASTC_12x12,
+ BC1_RGBA,
+ BC2_RGBA,
+ BC3_RGBA,
+ BC4_R,
+ BC5_RG,
+ BC6H_RGB,
+ BC7_RGBA,
+ Depth24_Stencil8,
+ Depth32_Stencil8,
+ BGRA10_XR,
+ BGR10_XR
+};
+
+#pragma mark -
+#pragma mark Format descriptors
+
+/** Enumerates the data type of a format. */
+enum class MTLFormatType {
+ None, /**< Format type is unknown. */
+ ColorHalf, /**< A 16-bit floating point color. */
+ ColorFloat, /**< A 32-bit floating point color. */
+ ColorInt8, /**< A signed 8-bit integer color. */
+ ColorUInt8, /**< An unsigned 8-bit integer color. */
+ ColorInt16, /**< A signed 16-bit integer color. */
+ ColorUInt16, /**< An unsigned 16-bit integer color. */
+ ColorInt32, /**< A signed 32-bit integer color. */
+ ColorUInt32, /**< An unsigned 32-bit integer color. */
+ DepthStencil, /**< A depth and stencil value. */
+ Compressed, /**< A block-compressed color. */
+};
+
+typedef struct Extent2D {
+ uint32_t width;
+ uint32_t height;
+} Extent2D;
+
+/** Describes the properties of a DataFormat, including the corresponding Metal pixel and vertex format. */
+typedef struct DataFormatDesc {
+ RD::DataFormat dataFormat;
+ MTLPixelFormat mtlPixelFormat;
+ MTLPixelFormat mtlPixelFormatSubstitute;
+ MTLVertexFormat mtlVertexFormat;
+ MTLVertexFormat mtlVertexFormatSubstitute;
+ uint8_t chromaSubsamplingPlaneCount;
+ uint8_t chromaSubsamplingComponentBits;
+ Extent2D blockTexelSize;
+ uint32_t bytesPerBlock;
+ MTLFormatType formatType;
+ const char *name;
+ bool hasReportedSubstitution;
+
+ inline double bytesPerTexel() const { return (double)bytesPerBlock / (double)(blockTexelSize.width * blockTexelSize.height); }
+
+ inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid || chromaSubsamplingPlaneCount > 1); }
+ inline bool isSupportedOrSubstitutable() const { return isSupported() || (mtlPixelFormatSubstitute != MTLPixelFormatInvalid); }
+
+ inline bool vertexIsSupported() const { return (mtlVertexFormat != MTLVertexFormatInvalid); }
+ inline bool vertexIsSupportedOrSubstitutable() const { return vertexIsSupported() || (mtlVertexFormatSubstitute != MTLVertexFormatInvalid); }
+} DataFormatDesc;
+
+/** Describes the properties of a MTLPixelFormat or MTLVertexFormat. */
+typedef struct MTLFormatDesc {
+ union {
+ MTLPixelFormat mtlPixelFormat;
+ MTLVertexFormat mtlVertexFormat;
+ };
+ RD::DataFormat dataFormat;
+ MTLFmtCaps mtlFmtCaps;
+ MTLViewClass mtlViewClass;
+ MTLPixelFormat mtlPixelFormatLinear;
+ const char *name = nullptr;
+
+ inline bool isSupported() const { return (mtlPixelFormat != MTLPixelFormatInvalid) && (mtlFmtCaps != kMTLFmtCapsNone); }
+} MTLFormatDesc;
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) PixelFormats {
+ using DataFormat = RD::DataFormat;
+
+public:
+ /** Returns whether the DataFormat is supported by the GPU bound to this instance. */
+ bool isSupported(DataFormat p_format);
+
+ /** Returns whether the DataFormat is supported by this implementation, or can be substituted by one that is. */
+ bool isSupportedOrSubstitutable(DataFormat p_format);
+
+ /** Returns whether the specified Metal MTLPixelFormat can be used as a depth format. */
+ _FORCE_INLINE_ bool isDepthFormat(MTLPixelFormat p_format) {
+ switch (p_format) {
+ case MTLPixelFormatDepth32Float:
+ case MTLPixelFormatDepth16Unorm:
+ case MTLPixelFormatDepth32Float_Stencil8:
+#if TARGET_OS_OSX
+ case MTLPixelFormatDepth24Unorm_Stencil8:
+#endif
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /** Returns whether the specified Metal MTLPixelFormat can be used as a stencil format. */
+ _FORCE_INLINE_ bool isStencilFormat(MTLPixelFormat p_format) {
+ switch (p_format) {
+ case MTLPixelFormatStencil8:
+#if TARGET_OS_OSX
+ case MTLPixelFormatDepth24Unorm_Stencil8:
+ case MTLPixelFormatX24_Stencil8:
+#endif
+ case MTLPixelFormatDepth32Float_Stencil8:
+ case MTLPixelFormatX32_Stencil8:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /** Returns whether the specified Metal MTLPixelFormat is a PVRTC format. */
+ bool isPVRTCFormat(MTLPixelFormat p_format);
+
+ /** Returns the format type corresponding to the specified Godot pixel format, */
+ MTLFormatType getFormatType(DataFormat p_format);
+
+ /** Returns the format type corresponding to the specified Metal MTLPixelFormat, */
+ MTLFormatType getFormatType(MTLPixelFormat p_formt);
+
+ /**
+ * Returns the Metal MTLPixelFormat corresponding to the specified Godot pixel
+ * or returns MTLPixelFormatInvalid if no corresponding MTLPixelFormat exists.
+ */
+ MTLPixelFormat getMTLPixelFormat(DataFormat p_format);
+
+ /**
+ * Returns the DataFormat corresponding to the specified Metal MTLPixelFormat,
+ * or returns DATA_FORMAT_MAX if no corresponding DataFormat exists.
+ */
+ DataFormat getDataFormat(MTLPixelFormat p_format);
+
+ /**
+ * Returns the size, in bytes, of a texel block of the specified Godot pixel.
+ * For uncompressed formats, the returned value corresponds to the size in bytes of a single texel.
+ */
+ uint32_t getBytesPerBlock(DataFormat p_format);
+
+ /**
+ * Returns the size, in bytes, of a texel block of the specified Metal format.
+ * For uncompressed formats, the returned value corresponds to the size in bytes of a single texel.
+ */
+ uint32_t getBytesPerBlock(MTLPixelFormat p_format);
+
+ /** Returns the number of planes of the specified chroma-subsampling (YCbCr) DataFormat */
+ uint8_t getChromaSubsamplingPlaneCount(DataFormat p_format);
+
+ /** Returns the number of bits per channel of the specified chroma-subsampling (YCbCr) DataFormat */
+ uint8_t getChromaSubsamplingComponentBits(DataFormat p_format);
+
+ /**
+ * Returns the size, in bytes, of a texel of the specified Godot format.
+ * The returned value may be fractional for certain compressed formats.
+ */
+ float getBytesPerTexel(DataFormat p_format);
+
+ /**
+ * Returns the size, in bytes, of a texel of the specified Metal format.
+ * The returned value may be fractional for certain compressed formats.
+ */
+ float getBytesPerTexel(MTLPixelFormat p_format);
+
+ /**
+ * Returns the size, in bytes, of a row of texels of the specified Godot pixel format.
+ *
+ * For compressed formats, this takes into consideration the compression block size,
+ * and p_texels_per_row should specify the width in texels, not blocks. The result is rounded
+ * up if p_texels_per_row is not an integer multiple of the compression block width.
+ */
+ size_t getBytesPerRow(DataFormat p_format, uint32_t p_texels_per_row);
+
+ /**
+ * Returns the size, in bytes, of a row of texels of the specified Metal format.
+ *
+ * For compressed formats, this takes into consideration the compression block size,
+ * and texelsPerRow should specify the width in texels, not blocks. The result is rounded
+ * up if texelsPerRow is not an integer multiple of the compression block width.
+ */
+ size_t getBytesPerRow(MTLPixelFormat p_format, uint32_t p_texels_per_row);
+
+ /**
+ * Returns the size, in bytes, of a texture layer of the specified Godot pixel format.
+ *
+ * For compressed formats, this takes into consideration the compression block size,
+ * and p_texel_rows_per_layer should specify the height in texels, not blocks. The result is
+ * rounded up if p_texel_rows_per_layer is not an integer multiple of the compression block height.
+ */
+ size_t getBytesPerLayer(DataFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer);
+
+ /**
+ * Returns the size, in bytes, of a texture layer of the specified Metal format.
+ * For compressed formats, this takes into consideration the compression block size,
+ * and p_texel_rows_per_layer should specify the height in texels, not blocks. The result is
+ * rounded up if p_texel_rows_per_layer is not an integer multiple of the compression block height.
+ */
+ size_t getBytesPerLayer(MTLPixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer);
+
+ /** Returns the Metal format capabilities supported by the specified Godot format, without substitution. */
+ MTLFmtCaps getCapabilities(DataFormat p_format, bool p_extended = false);
+
+ /** Returns the Metal format capabilities supported by the specified Metal format. */
+ MTLFmtCaps getCapabilities(MTLPixelFormat p_format, bool p_extended = false);
+
+ /**
+ * Returns the Metal MTLVertexFormat corresponding to the specified
+ * DataFormat as used as a vertex attribute format.
+ */
+ MTLVertexFormat getMTLVertexFormat(DataFormat p_format);
+
+#pragma mark Construction
+
+ explicit PixelFormats(id<MTLDevice> p_device);
+
+protected:
+ id<MTLDevice> device;
+
+ DataFormatDesc &getDataFormatDesc(DataFormat p_format);
+ DataFormatDesc &getDataFormatDesc(MTLPixelFormat p_format);
+ MTLFormatDesc &getMTLPixelFormatDesc(MTLPixelFormat p_format);
+ MTLFormatDesc &getMTLVertexFormatDesc(MTLVertexFormat p_format);
+ void initDataFormatCapabilities();
+ void initMTLPixelFormatCapabilities();
+ void initMTLVertexFormatCapabilities();
+ void buildMTLFormatMaps();
+ void buildDFFormatMaps();
+ void modifyMTLFormatCapabilities();
+ void modifyMTLFormatCapabilities(id<MTLDevice> p_device);
+ void addMTLPixelFormatCapabilities(id<MTLDevice> p_device,
+ MTLFeatureSet p_feature_set,
+ MTLPixelFormat p_format,
+ MTLFmtCaps p_caps);
+ void addMTLPixelFormatCapabilities(id<MTLDevice> p_device,
+ MTLGPUFamily p_family,
+ MTLPixelFormat p_format,
+ MTLFmtCaps p_caps);
+ void disableMTLPixelFormatCapabilities(MTLPixelFormat p_format,
+ MTLFmtCaps p_caps);
+ void disableAllMTLPixelFormatCapabilities(MTLPixelFormat p_format);
+ void addMTLVertexFormatCapabilities(id<MTLDevice> p_device,
+ MTLFeatureSet p_feature_set,
+ MTLVertexFormat p_format,
+ MTLFmtCaps p_caps);
+
+ DataFormatDesc _dataFormatDescriptions[RD::DATA_FORMAT_MAX];
+ MTLFormatDesc _mtlPixelFormatDescriptions[_mtlPixelFormatCount];
+ MTLFormatDesc _mtlVertexFormatDescriptions[_mtlVertexFormatCount];
+
+ // Most Metal formats have small values and are mapped by simple lookup array.
+ // Outliers are mapped by a map.
+ uint16_t _mtlFormatDescIndicesByMTLPixelFormatsCore[_mtlPixelFormatCoreCount];
+ HashMap<uint32_t, uint32_t> _mtlFormatDescIndicesByMTLPixelFormatsExt;
+
+ uint16_t _mtlFormatDescIndicesByMTLVertexFormats[_mtlVertexFormatCount];
+};
+
+#pragma clang diagnostic pop
+
+#endif // PIXEL_FORMATS_H
diff --git a/drivers/metal/pixel_formats.mm b/drivers/metal/pixel_formats.mm
new file mode 100644
index 0000000000..ac737b3f0a
--- /dev/null
+++ b/drivers/metal/pixel_formats.mm
@@ -0,0 +1,1298 @@
+/**************************************************************************/
+/* pixel_formats.mm */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+
+#import "pixel_formats.h"
+
+#import "metal_utils.h"
+
+#if TARGET_OS_IPHONE || TARGET_OS_TV
+#if !(__IPHONE_OS_VERSION_MAX_ALLOWED >= 160400) // iOS/tvOS 16.4
+#define MTLPixelFormatBC1_RGBA MTLPixelFormatInvalid
+#define MTLPixelFormatBC1_RGBA_sRGB MTLPixelFormatInvalid
+#define MTLPixelFormatBC2_RGBA MTLPixelFormatInvalid
+#define MTLPixelFormatBC2_RGBA_sRGB MTLPixelFormatInvalid
+#define MTLPixelFormatBC3_RGBA MTLPixelFormatInvalid
+#define MTLPixelFormatBC3_RGBA_sRGB MTLPixelFormatInvalid
+#define MTLPixelFormatBC4_RUnorm MTLPixelFormatInvalid
+#define MTLPixelFormatBC4_RSnorm MTLPixelFormatInvalid
+#define MTLPixelFormatBC5_RGUnorm MTLPixelFormatInvalid
+#define MTLPixelFormatBC5_RGSnorm MTLPixelFormatInvalid
+#define MTLPixelFormatBC6H_RGBUfloat MTLPixelFormatInvalid
+#define MTLPixelFormatBC6H_RGBFloat MTLPixelFormatInvalid
+#define MTLPixelFormatBC7_RGBAUnorm MTLPixelFormatInvalid
+#define MTLPixelFormatBC7_RGBAUnorm_sRGB MTLPixelFormatInvalid
+#endif
+
+#define MTLPixelFormatDepth16Unorm_Stencil8 MTLPixelFormatDepth32Float_Stencil8
+#define MTLPixelFormatDepth24Unorm_Stencil8 MTLPixelFormatInvalid
+#define MTLPixelFormatX24_Stencil8 MTLPixelFormatInvalid
+#endif
+
+#if TARGET_OS_TV
+#define MTLPixelFormatASTC_4x4_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_5x4_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_5x5_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_6x5_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_6x6_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_8x5_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_8x6_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_8x8_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_10x5_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_10x6_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_10x8_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_10x10_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_12x10_HDR MTLPixelFormatInvalid
+#define MTLPixelFormatASTC_12x12_HDR MTLPixelFormatInvalid
+#endif
+
+#if !((__MAC_OS_X_VERSION_MAX_ALLOWED >= 140000) || (__IPHONE_OS_VERSION_MAX_ALLOWED >= 170000)) // Xcode 15
+#define MTLVertexFormatFloatRG11B10 MTLVertexFormatInvalid
+#define MTLVertexFormatFloatRGB9E5 MTLVertexFormatInvalid
+#endif
+
+/** Selects and returns one of the values, based on the platform OS. */
+_FORCE_INLINE_ constexpr MTLFmtCaps select_platform_caps(MTLFmtCaps p_macOS_val, MTLFmtCaps p_iOS_val) {
+#if (TARGET_OS_IOS || TARGET_OS_TV) && !TARGET_OS_MACCATALYST
+ return p_iOS_val;
+#elif TARGET_OS_OSX
+ return p_macOS_val;
+#else
+#error "unsupported platform"
+#endif
+}
+
+template <typename T>
+void clear(T *p_val, size_t p_count = 1) {
+ memset(p_val, 0, sizeof(T) * p_count);
+}
+
+#pragma mark -
+#pragma mark PixelFormats
+
+bool PixelFormats::isSupported(DataFormat p_format) {
+ return getDataFormatDesc(p_format).isSupported();
+}
+
+bool PixelFormats::isSupportedOrSubstitutable(DataFormat p_format) {
+ return getDataFormatDesc(p_format).isSupportedOrSubstitutable();
+}
+
+bool PixelFormats::isPVRTCFormat(MTLPixelFormat p_format) {
+ switch (p_format) {
+ case MTLPixelFormatPVRTC_RGBA_2BPP:
+ case MTLPixelFormatPVRTC_RGBA_2BPP_sRGB:
+ case MTLPixelFormatPVRTC_RGBA_4BPP:
+ case MTLPixelFormatPVRTC_RGBA_4BPP_sRGB:
+ case MTLPixelFormatPVRTC_RGB_2BPP:
+ case MTLPixelFormatPVRTC_RGB_2BPP_sRGB:
+ case MTLPixelFormatPVRTC_RGB_4BPP:
+ case MTLPixelFormatPVRTC_RGB_4BPP_sRGB:
+ return true;
+ default:
+ return false;
+ }
+}
+
+MTLFormatType PixelFormats::getFormatType(DataFormat p_format) {
+ return getDataFormatDesc(p_format).formatType;
+}
+
+MTLFormatType PixelFormats::getFormatType(MTLPixelFormat p_formt) {
+ return getDataFormatDesc(p_formt).formatType;
+}
+
+MTLPixelFormat PixelFormats::getMTLPixelFormat(DataFormat p_format) {
+ DataFormatDesc &dfDesc = getDataFormatDesc(p_format);
+ MTLPixelFormat mtlPixFmt = dfDesc.mtlPixelFormat;
+
+ // If the MTLPixelFormat is not supported but DataFormat is valid,
+ // attempt to substitute a different format.
+ if (mtlPixFmt == MTLPixelFormatInvalid && p_format != RD::DATA_FORMAT_MAX && dfDesc.chromaSubsamplingPlaneCount <= 1) {
+ mtlPixFmt = dfDesc.mtlPixelFormatSubstitute;
+ }
+
+ return mtlPixFmt;
+}
+
+RD::DataFormat PixelFormats::getDataFormat(MTLPixelFormat p_format) {
+ return getMTLPixelFormatDesc(p_format).dataFormat;
+}
+
+uint32_t PixelFormats::getBytesPerBlock(DataFormat p_format) {
+ return getDataFormatDesc(p_format).bytesPerBlock;
+}
+
+uint32_t PixelFormats::getBytesPerBlock(MTLPixelFormat p_format) {
+ return getDataFormatDesc(p_format).bytesPerBlock;
+}
+
+uint8_t PixelFormats::getChromaSubsamplingPlaneCount(DataFormat p_format) {
+ return getDataFormatDesc(p_format).chromaSubsamplingPlaneCount;
+}
+
+uint8_t PixelFormats::getChromaSubsamplingComponentBits(DataFormat p_format) {
+ return getDataFormatDesc(p_format).chromaSubsamplingComponentBits;
+}
+
+float PixelFormats::getBytesPerTexel(DataFormat p_format) {
+ return getDataFormatDesc(p_format).bytesPerTexel();
+}
+
+float PixelFormats::getBytesPerTexel(MTLPixelFormat p_format) {
+ return getDataFormatDesc(p_format).bytesPerTexel();
+}
+
+size_t PixelFormats::getBytesPerRow(DataFormat p_format, uint32_t p_texels_per_row) {
+ DataFormatDesc &dfDesc = getDataFormatDesc(p_format);
+ return Math::division_round_up(p_texels_per_row, dfDesc.blockTexelSize.width) * dfDesc.bytesPerBlock;
+}
+
+size_t PixelFormats::getBytesPerRow(MTLPixelFormat p_format, uint32_t p_texels_per_row) {
+ DataFormatDesc &dfDesc = getDataFormatDesc(p_format);
+ return Math::division_round_up(p_texels_per_row, dfDesc.blockTexelSize.width) * dfDesc.bytesPerBlock;
+}
+
+size_t PixelFormats::getBytesPerLayer(DataFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer) {
+ return Math::division_round_up(p_texel_rows_per_layer, getDataFormatDesc(p_format).blockTexelSize.height) * p_bytes_per_row;
+}
+
+size_t PixelFormats::getBytesPerLayer(MTLPixelFormat p_format, size_t p_bytes_per_row, uint32_t p_texel_rows_per_layer) {
+ return Math::division_round_up(p_texel_rows_per_layer, getDataFormatDesc(p_format).blockTexelSize.height) * p_bytes_per_row;
+}
+
+MTLFmtCaps PixelFormats::getCapabilities(DataFormat p_format, bool p_extended) {
+ return getCapabilities(getDataFormatDesc(p_format).mtlPixelFormat, p_extended);
+}
+
+MTLFmtCaps PixelFormats::getCapabilities(MTLPixelFormat p_format, bool p_extended) {
+ MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(p_format);
+ MTLFmtCaps caps = mtlDesc.mtlFmtCaps;
+ if (!p_extended || mtlDesc.mtlViewClass == MTLViewClass::None) {
+ return caps;
+ }
+ // Now get caps of all formats in the view class.
+ for (MTLFormatDesc &otherDesc : _mtlPixelFormatDescriptions) {
+ if (otherDesc.mtlViewClass == mtlDesc.mtlViewClass) {
+ caps |= otherDesc.mtlFmtCaps;
+ }
+ }
+ return caps;
+}
+
+MTLVertexFormat PixelFormats::getMTLVertexFormat(DataFormat p_format) {
+ DataFormatDesc &dfDesc = getDataFormatDesc(p_format);
+ MTLVertexFormat format = dfDesc.mtlVertexFormat;
+
+ if (format == MTLVertexFormatInvalid) {
+ String errMsg;
+ errMsg += "DataFormat ";
+ errMsg += dfDesc.name;
+ errMsg += " is not supported for vertex buffers on this device.";
+
+ if (dfDesc.vertexIsSupportedOrSubstitutable()) {
+ format = dfDesc.mtlVertexFormatSubstitute;
+
+ DataFormatDesc &dfDescSubs = getDataFormatDesc(getMTLVertexFormatDesc(format).dataFormat);
+ errMsg += " Using DataFormat ";
+ errMsg += dfDescSubs.name;
+ errMsg += " instead.";
+ }
+ WARN_PRINT(errMsg);
+ }
+
+ return format;
+}
+
+DataFormatDesc &PixelFormats::getDataFormatDesc(DataFormat p_format) {
+ CRASH_BAD_INDEX_MSG(p_format, RD::DATA_FORMAT_MAX, "Attempting to describe an invalid DataFormat");
+ return _dataFormatDescriptions[p_format];
+}
+
+DataFormatDesc &PixelFormats::getDataFormatDesc(MTLPixelFormat p_format) {
+ return getDataFormatDesc(getMTLPixelFormatDesc(p_format).dataFormat);
+}
+
+// Return a reference to the Metal format descriptor corresponding to the MTLPixelFormat.
+MTLFormatDesc &PixelFormats::getMTLPixelFormatDesc(MTLPixelFormat p_format) {
+ uint16_t fmtIdx = ((p_format < _mtlPixelFormatCoreCount)
+ ? _mtlFormatDescIndicesByMTLPixelFormatsCore[p_format]
+ : _mtlFormatDescIndicesByMTLPixelFormatsExt[p_format]);
+ return _mtlPixelFormatDescriptions[fmtIdx];
+}
+
+// Return a reference to the Metal format descriptor corresponding to the MTLVertexFormat.
+MTLFormatDesc &PixelFormats::getMTLVertexFormatDesc(MTLVertexFormat p_format) {
+ uint16_t fmtIdx = (p_format < _mtlVertexFormatCount) ? _mtlFormatDescIndicesByMTLVertexFormats[p_format] : 0;
+ return _mtlVertexFormatDescriptions[fmtIdx];
+}
+
+PixelFormats::PixelFormats(id<MTLDevice> p_device) :
+ device(p_device) {
+ initMTLPixelFormatCapabilities();
+ initMTLVertexFormatCapabilities();
+ buildMTLFormatMaps();
+ modifyMTLFormatCapabilities();
+
+ initDataFormatCapabilities();
+ buildDFFormatMaps();
+}
+
+#define addDfFormatDescFull(DATA_FMT, MTL_FMT, MTL_FMT_ALT, MTL_VTX_FMT, MTL_VTX_FMT_ALT, CSPC, CSCB, BLK_W, BLK_H, BLK_BYTE_CNT, MVK_FMT_TYPE) \
+ CRASH_BAD_INDEX_MSG(RD::DATA_FORMAT_##DATA_FMT, RD::DATA_FORMAT_MAX, "Attempting to describe too many DataFormats"); \
+ _dataFormatDescriptions[RD::DATA_FORMAT_##DATA_FMT] = { RD::DATA_FORMAT_##DATA_FMT, MTLPixelFormat##MTL_FMT, MTLPixelFormat##MTL_FMT_ALT, MTLVertexFormat##MTL_VTX_FMT, MTLVertexFormat##MTL_VTX_FMT_ALT, \
+ CSPC, CSCB, { BLK_W, BLK_H }, BLK_BYTE_CNT, MTLFormatType::MVK_FMT_TYPE, "DATA_FORMAT_" #DATA_FMT, false }
+
+#define addDataFormatDesc(DATA_FMT, MTL_FMT, MTL_FMT_ALT, MTL_VTX_FMT, MTL_VTX_FMT_ALT, BLK_W, BLK_H, BLK_BYTE_CNT, MVK_FMT_TYPE) \
+ addDfFormatDescFull(DATA_FMT, MTL_FMT, MTL_FMT_ALT, MTL_VTX_FMT, MTL_VTX_FMT_ALT, 0, 0, BLK_W, BLK_H, BLK_BYTE_CNT, MVK_FMT_TYPE)
+
+#define addDfFormatDescChromaSubsampling(DATA_FMT, MTL_FMT, CSPC, CSCB, BLK_W, BLK_H, BLK_BYTE_CNT) \
+ addDfFormatDescFull(DATA_FMT, MTL_FMT, Invalid, Invalid, Invalid, CSPC, CSCB, BLK_W, BLK_H, BLK_BYTE_CNT, ColorFloat)
+
+void PixelFormats::initDataFormatCapabilities() {
+ clear(_dataFormatDescriptions, RD::DATA_FORMAT_MAX);
+
+ addDataFormatDesc(R4G4_UNORM_PACK8, Invalid, Invalid, Invalid, Invalid, 1, 1, 1, ColorFloat);
+ addDataFormatDesc(R4G4B4A4_UNORM_PACK16, ABGR4Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(B4G4R4A4_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat);
+
+ addDataFormatDesc(R5G6B5_UNORM_PACK16, B5G6R5Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(B5G6R5_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R5G5B5A1_UNORM_PACK16, A1BGR5Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(B5G5R5A1_UNORM_PACK16, Invalid, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(A1R5G5B5_UNORM_PACK16, BGR5A1Unorm, Invalid, Invalid, Invalid, 1, 1, 2, ColorFloat);
+
+ addDataFormatDesc(R8_UNORM, R8Unorm, Invalid, UCharNormalized, UChar2Normalized, 1, 1, 1, ColorFloat);
+ addDataFormatDesc(R8_SNORM, R8Snorm, Invalid, CharNormalized, Char2Normalized, 1, 1, 1, ColorFloat);
+ addDataFormatDesc(R8_USCALED, Invalid, Invalid, UChar, UChar2, 1, 1, 1, ColorFloat);
+ addDataFormatDesc(R8_SSCALED, Invalid, Invalid, Char, Char2, 1, 1, 1, ColorFloat);
+ addDataFormatDesc(R8_UINT, R8Uint, Invalid, UChar, UChar2, 1, 1, 1, ColorUInt8);
+ addDataFormatDesc(R8_SINT, R8Sint, Invalid, Char, Char2, 1, 1, 1, ColorInt8);
+ addDataFormatDesc(R8_SRGB, R8Unorm_sRGB, Invalid, UCharNormalized, UChar2Normalized, 1, 1, 1, ColorFloat);
+
+ addDataFormatDesc(R8G8_UNORM, RG8Unorm, Invalid, UChar2Normalized, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R8G8_SNORM, RG8Snorm, Invalid, Char2Normalized, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R8G8_USCALED, Invalid, Invalid, UChar2, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R8G8_SSCALED, Invalid, Invalid, Char2, Invalid, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R8G8_UINT, RG8Uint, Invalid, UChar2, Invalid, 1, 1, 2, ColorUInt8);
+ addDataFormatDesc(R8G8_SINT, RG8Sint, Invalid, Char2, Invalid, 1, 1, 2, ColorInt8);
+ addDataFormatDesc(R8G8_SRGB, RG8Unorm_sRGB, Invalid, UChar2Normalized, Invalid, 1, 1, 2, ColorFloat);
+
+ addDataFormatDesc(R8G8B8_UNORM, Invalid, Invalid, UChar3Normalized, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(R8G8B8_SNORM, Invalid, Invalid, Char3Normalized, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(R8G8B8_USCALED, Invalid, Invalid, UChar3, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(R8G8B8_SSCALED, Invalid, Invalid, Char3, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(R8G8B8_UINT, Invalid, Invalid, UChar3, Invalid, 1, 1, 3, ColorUInt8);
+ addDataFormatDesc(R8G8B8_SINT, Invalid, Invalid, Char3, Invalid, 1, 1, 3, ColorInt8);
+ addDataFormatDesc(R8G8B8_SRGB, Invalid, Invalid, UChar3Normalized, Invalid, 1, 1, 3, ColorFloat);
+
+ addDataFormatDesc(B8G8R8_UNORM, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(B8G8R8_SNORM, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(B8G8R8_USCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(B8G8R8_SSCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat);
+ addDataFormatDesc(B8G8R8_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorUInt8);
+ addDataFormatDesc(B8G8R8_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorInt8);
+ addDataFormatDesc(B8G8R8_SRGB, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, ColorFloat);
+
+ addDataFormatDesc(R8G8B8A8_UNORM, RGBA8Unorm, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R8G8B8A8_SNORM, RGBA8Snorm, Invalid, Char4Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R8G8B8A8_USCALED, Invalid, Invalid, UChar4, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R8G8B8A8_SSCALED, Invalid, Invalid, Char4, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R8G8B8A8_UINT, RGBA8Uint, Invalid, UChar4, Invalid, 1, 1, 4, ColorUInt8);
+ addDataFormatDesc(R8G8B8A8_SINT, RGBA8Sint, Invalid, Char4, Invalid, 1, 1, 4, ColorInt8);
+ addDataFormatDesc(R8G8B8A8_SRGB, RGBA8Unorm_sRGB, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat);
+
+ addDataFormatDesc(B8G8R8A8_UNORM, BGRA8Unorm, Invalid, UChar4Normalized_BGRA, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(B8G8R8A8_SNORM, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(B8G8R8A8_USCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(B8G8R8A8_SSCALED, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(B8G8R8A8_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorUInt8);
+ addDataFormatDesc(B8G8R8A8_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorInt8);
+ addDataFormatDesc(B8G8R8A8_SRGB, BGRA8Unorm_sRGB, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+
+ addDataFormatDesc(A8B8G8R8_UNORM_PACK32, RGBA8Unorm, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A8B8G8R8_SNORM_PACK32, RGBA8Snorm, Invalid, Char4Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A8B8G8R8_USCALED_PACK32, Invalid, Invalid, UChar4, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A8B8G8R8_SSCALED_PACK32, Invalid, Invalid, Char4, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A8B8G8R8_UINT_PACK32, RGBA8Uint, Invalid, UChar4, Invalid, 1, 1, 4, ColorUInt8);
+ addDataFormatDesc(A8B8G8R8_SINT_PACK32, RGBA8Sint, Invalid, Char4, Invalid, 1, 1, 4, ColorInt8);
+ addDataFormatDesc(A8B8G8R8_SRGB_PACK32, RGBA8Unorm_sRGB, Invalid, UChar4Normalized, Invalid, 1, 1, 4, ColorFloat);
+
+ addDataFormatDesc(A2R10G10B10_UNORM_PACK32, BGR10A2Unorm, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2R10G10B10_SNORM_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2R10G10B10_USCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2R10G10B10_SSCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2R10G10B10_UINT_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorUInt16);
+ addDataFormatDesc(A2R10G10B10_SINT_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorInt16);
+
+ addDataFormatDesc(A2B10G10R10_UNORM_PACK32, RGB10A2Unorm, Invalid, UInt1010102Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2B10G10R10_SNORM_PACK32, Invalid, Invalid, Int1010102Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2B10G10R10_USCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2B10G10R10_SSCALED_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(A2B10G10R10_UINT_PACK32, RGB10A2Uint, Invalid, Invalid, Invalid, 1, 1, 4, ColorUInt16);
+ addDataFormatDesc(A2B10G10R10_SINT_PACK32, Invalid, Invalid, Invalid, Invalid, 1, 1, 4, ColorInt16);
+
+ addDataFormatDesc(R16_UNORM, R16Unorm, Invalid, UShortNormalized, UShort2Normalized, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R16_SNORM, R16Snorm, Invalid, ShortNormalized, Short2Normalized, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R16_USCALED, Invalid, Invalid, UShort, UShort2, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R16_SSCALED, Invalid, Invalid, Short, Short2, 1, 1, 2, ColorFloat);
+ addDataFormatDesc(R16_UINT, R16Uint, Invalid, UShort, UShort2, 1, 1, 2, ColorUInt16);
+ addDataFormatDesc(R16_SINT, R16Sint, Invalid, Short, Short2, 1, 1, 2, ColorInt16);
+ addDataFormatDesc(R16_SFLOAT, R16Float, Invalid, Half, Half2, 1, 1, 2, ColorFloat);
+
+ addDataFormatDesc(R16G16_UNORM, RG16Unorm, Invalid, UShort2Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R16G16_SNORM, RG16Snorm, Invalid, Short2Normalized, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R16G16_USCALED, Invalid, Invalid, UShort2, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R16G16_SSCALED, Invalid, Invalid, Short2, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(R16G16_UINT, RG16Uint, Invalid, UShort2, Invalid, 1, 1, 4, ColorUInt16);
+ addDataFormatDesc(R16G16_SINT, RG16Sint, Invalid, Short2, Invalid, 1, 1, 4, ColorInt16);
+ addDataFormatDesc(R16G16_SFLOAT, RG16Float, Invalid, Half2, Invalid, 1, 1, 4, ColorFloat);
+
+ addDataFormatDesc(R16G16B16_UNORM, Invalid, Invalid, UShort3Normalized, Invalid, 1, 1, 6, ColorFloat);
+ addDataFormatDesc(R16G16B16_SNORM, Invalid, Invalid, Short3Normalized, Invalid, 1, 1, 6, ColorFloat);
+ addDataFormatDesc(R16G16B16_USCALED, Invalid, Invalid, UShort3, Invalid, 1, 1, 6, ColorFloat);
+ addDataFormatDesc(R16G16B16_SSCALED, Invalid, Invalid, Short3, Invalid, 1, 1, 6, ColorFloat);
+ addDataFormatDesc(R16G16B16_UINT, Invalid, Invalid, UShort3, Invalid, 1, 1, 6, ColorUInt16);
+ addDataFormatDesc(R16G16B16_SINT, Invalid, Invalid, Short3, Invalid, 1, 1, 6, ColorInt16);
+ addDataFormatDesc(R16G16B16_SFLOAT, Invalid, Invalid, Half3, Invalid, 1, 1, 6, ColorFloat);
+
+ addDataFormatDesc(R16G16B16A16_UNORM, RGBA16Unorm, Invalid, UShort4Normalized, Invalid, 1, 1, 8, ColorFloat);
+ addDataFormatDesc(R16G16B16A16_SNORM, RGBA16Snorm, Invalid, Short4Normalized, Invalid, 1, 1, 8, ColorFloat);
+ addDataFormatDesc(R16G16B16A16_USCALED, Invalid, Invalid, UShort4, Invalid, 1, 1, 8, ColorFloat);
+ addDataFormatDesc(R16G16B16A16_SSCALED, Invalid, Invalid, Short4, Invalid, 1, 1, 8, ColorFloat);
+ addDataFormatDesc(R16G16B16A16_UINT, RGBA16Uint, Invalid, UShort4, Invalid, 1, 1, 8, ColorUInt16);
+ addDataFormatDesc(R16G16B16A16_SINT, RGBA16Sint, Invalid, Short4, Invalid, 1, 1, 8, ColorInt16);
+ addDataFormatDesc(R16G16B16A16_SFLOAT, RGBA16Float, Invalid, Half4, Invalid, 1, 1, 8, ColorFloat);
+
+ addDataFormatDesc(R32_UINT, R32Uint, Invalid, UInt, Invalid, 1, 1, 4, ColorUInt32);
+ addDataFormatDesc(R32_SINT, R32Sint, Invalid, Int, Invalid, 1, 1, 4, ColorInt32);
+ addDataFormatDesc(R32_SFLOAT, R32Float, Invalid, Float, Invalid, 1, 1, 4, ColorFloat);
+
+ addDataFormatDesc(R32G32_UINT, RG32Uint, Invalid, UInt2, Invalid, 1, 1, 8, ColorUInt32);
+ addDataFormatDesc(R32G32_SINT, RG32Sint, Invalid, Int2, Invalid, 1, 1, 8, ColorInt32);
+ addDataFormatDesc(R32G32_SFLOAT, RG32Float, Invalid, Float2, Invalid, 1, 1, 8, ColorFloat);
+
+ addDataFormatDesc(R32G32B32_UINT, Invalid, Invalid, UInt3, Invalid, 1, 1, 12, ColorUInt32);
+ addDataFormatDesc(R32G32B32_SINT, Invalid, Invalid, Int3, Invalid, 1, 1, 12, ColorInt32);
+ addDataFormatDesc(R32G32B32_SFLOAT, Invalid, Invalid, Float3, Invalid, 1, 1, 12, ColorFloat);
+
+ addDataFormatDesc(R32G32B32A32_UINT, RGBA32Uint, Invalid, UInt4, Invalid, 1, 1, 16, ColorUInt32);
+ addDataFormatDesc(R32G32B32A32_SINT, RGBA32Sint, Invalid, Int4, Invalid, 1, 1, 16, ColorInt32);
+ addDataFormatDesc(R32G32B32A32_SFLOAT, RGBA32Float, Invalid, Float4, Invalid, 1, 1, 16, ColorFloat);
+
+ addDataFormatDesc(R64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 8, ColorFloat);
+ addDataFormatDesc(R64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 8, ColorFloat);
+ addDataFormatDesc(R64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 8, ColorFloat);
+
+ addDataFormatDesc(R64G64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 16, ColorFloat);
+ addDataFormatDesc(R64G64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 16, ColorFloat);
+ addDataFormatDesc(R64G64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 16, ColorFloat);
+
+ addDataFormatDesc(R64G64B64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 24, ColorFloat);
+ addDataFormatDesc(R64G64B64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 24, ColorFloat);
+ addDataFormatDesc(R64G64B64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 24, ColorFloat);
+
+ addDataFormatDesc(R64G64B64A64_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 32, ColorFloat);
+ addDataFormatDesc(R64G64B64A64_SINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 32, ColorFloat);
+ addDataFormatDesc(R64G64B64A64_SFLOAT, Invalid, Invalid, Invalid, Invalid, 1, 1, 32, ColorFloat);
+
+ addDataFormatDesc(B10G11R11_UFLOAT_PACK32, RG11B10Float, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+ addDataFormatDesc(E5B9G9R9_UFLOAT_PACK32, RGB9E5Float, Invalid, Invalid, Invalid, 1, 1, 4, ColorFloat);
+
+ addDataFormatDesc(D32_SFLOAT, Depth32Float, Invalid, Invalid, Invalid, 1, 1, 4, DepthStencil);
+ addDataFormatDesc(D32_SFLOAT_S8_UINT, Depth32Float_Stencil8, Invalid, Invalid, Invalid, 1, 1, 5, DepthStencil);
+
+ addDataFormatDesc(S8_UINT, Stencil8, Invalid, Invalid, Invalid, 1, 1, 1, DepthStencil);
+
+ addDataFormatDesc(D16_UNORM, Depth16Unorm, Depth32Float, Invalid, Invalid, 1, 1, 2, DepthStencil);
+ addDataFormatDesc(D16_UNORM_S8_UINT, Invalid, Invalid, Invalid, Invalid, 1, 1, 3, DepthStencil);
+ addDataFormatDesc(D24_UNORM_S8_UINT, Depth24Unorm_Stencil8, Depth32Float_Stencil8, Invalid, Invalid, 1, 1, 4, DepthStencil);
+
+ addDataFormatDesc(X8_D24_UNORM_PACK32, Invalid, Depth24Unorm_Stencil8, Invalid, Invalid, 1, 1, 4, DepthStencil);
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunguarded-availability"
+
+ addDataFormatDesc(BC1_RGB_UNORM_BLOCK, BC1_RGBA, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(BC1_RGB_SRGB_BLOCK, BC1_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(BC1_RGBA_UNORM_BLOCK, BC1_RGBA, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(BC1_RGBA_SRGB_BLOCK, BC1_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+
+ addDataFormatDesc(BC2_UNORM_BLOCK, BC2_RGBA, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(BC2_SRGB_BLOCK, BC2_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+
+ addDataFormatDesc(BC3_UNORM_BLOCK, BC3_RGBA, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(BC3_SRGB_BLOCK, BC3_RGBA_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+
+ addDataFormatDesc(BC4_UNORM_BLOCK, BC4_RUnorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(BC4_SNORM_BLOCK, BC4_RSnorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+
+ addDataFormatDesc(BC5_UNORM_BLOCK, BC5_RGUnorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(BC5_SNORM_BLOCK, BC5_RGSnorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+
+ addDataFormatDesc(BC6H_UFLOAT_BLOCK, BC6H_RGBUfloat, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(BC6H_SFLOAT_BLOCK, BC6H_RGBFloat, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+
+ addDataFormatDesc(BC7_UNORM_BLOCK, BC7_RGBAUnorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(BC7_SRGB_BLOCK, BC7_RGBAUnorm_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+
+#pragma clang diagnostic pop
+
+ addDataFormatDesc(ETC2_R8G8B8_UNORM_BLOCK, ETC2_RGB8, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(ETC2_R8G8B8_SRGB_BLOCK, ETC2_RGB8_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(ETC2_R8G8B8A1_UNORM_BLOCK, ETC2_RGB8A1, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(ETC2_R8G8B8A1_SRGB_BLOCK, ETC2_RGB8A1_sRGB, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+
+ addDataFormatDesc(ETC2_R8G8B8A8_UNORM_BLOCK, EAC_RGBA8, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(ETC2_R8G8B8A8_SRGB_BLOCK, EAC_RGBA8_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+
+ addDataFormatDesc(EAC_R11_UNORM_BLOCK, EAC_R11Unorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+ addDataFormatDesc(EAC_R11_SNORM_BLOCK, EAC_R11Snorm, Invalid, Invalid, Invalid, 4, 4, 8, Compressed);
+
+ addDataFormatDesc(EAC_R11G11_UNORM_BLOCK, EAC_RG11Unorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(EAC_R11G11_SNORM_BLOCK, EAC_RG11Snorm, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+
+ addDataFormatDesc(ASTC_4x4_UNORM_BLOCK, ASTC_4x4_LDR, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(ASTC_4x4_SRGB_BLOCK, ASTC_4x4_sRGB, Invalid, Invalid, Invalid, 4, 4, 16, Compressed);
+ addDataFormatDesc(ASTC_5x4_UNORM_BLOCK, ASTC_5x4_LDR, Invalid, Invalid, Invalid, 5, 4, 16, Compressed);
+ addDataFormatDesc(ASTC_5x4_SRGB_BLOCK, ASTC_5x4_sRGB, Invalid, Invalid, Invalid, 5, 4, 16, Compressed);
+ addDataFormatDesc(ASTC_5x5_UNORM_BLOCK, ASTC_5x5_LDR, Invalid, Invalid, Invalid, 5, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_5x5_SRGB_BLOCK, ASTC_5x5_sRGB, Invalid, Invalid, Invalid, 5, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_6x5_UNORM_BLOCK, ASTC_6x5_LDR, Invalid, Invalid, Invalid, 6, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_6x5_SRGB_BLOCK, ASTC_6x5_sRGB, Invalid, Invalid, Invalid, 6, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_6x6_UNORM_BLOCK, ASTC_6x6_LDR, Invalid, Invalid, Invalid, 6, 6, 16, Compressed);
+ addDataFormatDesc(ASTC_6x6_SRGB_BLOCK, ASTC_6x6_sRGB, Invalid, Invalid, Invalid, 6, 6, 16, Compressed);
+ addDataFormatDesc(ASTC_8x5_UNORM_BLOCK, ASTC_8x5_LDR, Invalid, Invalid, Invalid, 8, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_8x5_SRGB_BLOCK, ASTC_8x5_sRGB, Invalid, Invalid, Invalid, 8, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_8x6_UNORM_BLOCK, ASTC_8x6_LDR, Invalid, Invalid, Invalid, 8, 6, 16, Compressed);
+ addDataFormatDesc(ASTC_8x6_SRGB_BLOCK, ASTC_8x6_sRGB, Invalid, Invalid, Invalid, 8, 6, 16, Compressed);
+ addDataFormatDesc(ASTC_8x8_UNORM_BLOCK, ASTC_8x8_LDR, Invalid, Invalid, Invalid, 8, 8, 16, Compressed);
+ addDataFormatDesc(ASTC_8x8_SRGB_BLOCK, ASTC_8x8_sRGB, Invalid, Invalid, Invalid, 8, 8, 16, Compressed);
+ addDataFormatDesc(ASTC_10x5_UNORM_BLOCK, ASTC_10x5_LDR, Invalid, Invalid, Invalid, 10, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_10x5_SRGB_BLOCK, ASTC_10x5_sRGB, Invalid, Invalid, Invalid, 10, 5, 16, Compressed);
+ addDataFormatDesc(ASTC_10x6_UNORM_BLOCK, ASTC_10x6_LDR, Invalid, Invalid, Invalid, 10, 6, 16, Compressed);
+ addDataFormatDesc(ASTC_10x6_SRGB_BLOCK, ASTC_10x6_sRGB, Invalid, Invalid, Invalid, 10, 6, 16, Compressed);
+ addDataFormatDesc(ASTC_10x8_UNORM_BLOCK, ASTC_10x8_LDR, Invalid, Invalid, Invalid, 10, 8, 16, Compressed);
+ addDataFormatDesc(ASTC_10x8_SRGB_BLOCK, ASTC_10x8_sRGB, Invalid, Invalid, Invalid, 10, 8, 16, Compressed);
+ addDataFormatDesc(ASTC_10x10_UNORM_BLOCK, ASTC_10x10_LDR, Invalid, Invalid, Invalid, 10, 10, 16, Compressed);
+ addDataFormatDesc(ASTC_10x10_SRGB_BLOCK, ASTC_10x10_sRGB, Invalid, Invalid, Invalid, 10, 10, 16, Compressed);
+ addDataFormatDesc(ASTC_12x10_UNORM_BLOCK, ASTC_12x10_LDR, Invalid, Invalid, Invalid, 12, 10, 16, Compressed);
+ addDataFormatDesc(ASTC_12x10_SRGB_BLOCK, ASTC_12x10_sRGB, Invalid, Invalid, Invalid, 12, 10, 16, Compressed);
+ addDataFormatDesc(ASTC_12x12_UNORM_BLOCK, ASTC_12x12_LDR, Invalid, Invalid, Invalid, 12, 12, 16, Compressed);
+ addDataFormatDesc(ASTC_12x12_SRGB_BLOCK, ASTC_12x12_sRGB, Invalid, Invalid, Invalid, 12, 12, 16, Compressed);
+
+ addDfFormatDescChromaSubsampling(G8B8G8R8_422_UNORM, GBGR422, 1, 8, 2, 1, 4);
+ addDfFormatDescChromaSubsampling(B8G8R8G8_422_UNORM, BGRG422, 1, 8, 2, 1, 4);
+ addDfFormatDescChromaSubsampling(G8_B8_R8_3PLANE_420_UNORM, Invalid, 3, 8, 2, 2, 6);
+ addDfFormatDescChromaSubsampling(G8_B8R8_2PLANE_420_UNORM, Invalid, 2, 8, 2, 2, 6);
+ addDfFormatDescChromaSubsampling(G8_B8_R8_3PLANE_422_UNORM, Invalid, 3, 8, 2, 1, 4);
+ addDfFormatDescChromaSubsampling(G8_B8R8_2PLANE_422_UNORM, Invalid, 2, 8, 2, 1, 4);
+ addDfFormatDescChromaSubsampling(G8_B8_R8_3PLANE_444_UNORM, Invalid, 3, 8, 1, 1, 3);
+ addDfFormatDescChromaSubsampling(R10X6_UNORM_PACK16, R16Unorm, 0, 10, 1, 1, 2);
+ addDfFormatDescChromaSubsampling(R10X6G10X6_UNORM_2PACK16, RG16Unorm, 0, 10, 1, 1, 4);
+ addDfFormatDescChromaSubsampling(R10X6G10X6B10X6A10X6_UNORM_4PACK16, RGBA16Unorm, 0, 10, 1, 1, 8);
+ addDfFormatDescChromaSubsampling(G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, Invalid, 1, 10, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(B10X6G10X6R10X6G10X6_422_UNORM_4PACK16, Invalid, 1, 10, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16, Invalid, 3, 10, 2, 2, 12);
+ addDfFormatDescChromaSubsampling(G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, Invalid, 2, 10, 2, 2, 12);
+ addDfFormatDescChromaSubsampling(G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16, Invalid, 3, 10, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, Invalid, 2, 10, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16, Invalid, 3, 10, 1, 1, 6);
+ addDfFormatDescChromaSubsampling(R12X4_UNORM_PACK16, R16Unorm, 0, 12, 1, 1, 2);
+ addDfFormatDescChromaSubsampling(R12X4G12X4_UNORM_2PACK16, RG16Unorm, 0, 12, 1, 1, 4);
+ addDfFormatDescChromaSubsampling(R12X4G12X4B12X4A12X4_UNORM_4PACK16, RGBA16Unorm, 0, 12, 1, 1, 8);
+ addDfFormatDescChromaSubsampling(G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, Invalid, 1, 12, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(B12X4G12X4R12X4G12X4_422_UNORM_4PACK16, Invalid, 1, 12, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16, Invalid, 3, 12, 2, 2, 12);
+ addDfFormatDescChromaSubsampling(G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, Invalid, 2, 12, 2, 2, 12);
+ addDfFormatDescChromaSubsampling(G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16, Invalid, 3, 12, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, Invalid, 2, 12, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16, Invalid, 3, 12, 1, 1, 6);
+ addDfFormatDescChromaSubsampling(G16B16G16R16_422_UNORM, Invalid, 1, 16, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(B16G16R16G16_422_UNORM, Invalid, 1, 16, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G16_B16_R16_3PLANE_420_UNORM, Invalid, 3, 16, 2, 2, 12);
+ addDfFormatDescChromaSubsampling(G16_B16R16_2PLANE_420_UNORM, Invalid, 2, 16, 2, 2, 12);
+ addDfFormatDescChromaSubsampling(G16_B16_R16_3PLANE_422_UNORM, Invalid, 3, 16, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G16_B16R16_2PLANE_422_UNORM, Invalid, 2, 16, 2, 1, 8);
+ addDfFormatDescChromaSubsampling(G16_B16_R16_3PLANE_444_UNORM, Invalid, 3, 16, 1, 1, 6);
+}
+
+#define addMTLPixelFormatDescFull(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT_LINEAR) \
+ CRASH_BAD_INDEX_MSG(fmtIdx, _mtlPixelFormatCount, "Adding too many pixel formats"); \
+ _mtlPixelFormatDescriptions[fmtIdx++] = { .mtlPixelFormat = MTLPixelFormat##MTL_FMT, RD::DATA_FORMAT_MAX, select_platform_caps(kMTLFmtCaps##MACOS_CAPS, kMTLFmtCaps##IOS_CAPS), MTLViewClass::VIEW_CLASS, MTLPixelFormat##MTL_FMT_LINEAR, "MTLPixelFormat" #MTL_FMT }
+
+#define addMTLPixelFormatDesc(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS) \
+ addMTLPixelFormatDescFull(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT)
+
+#define addMTLPixelFormatDescSRGB(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT_LINEAR) \
+ addMTLPixelFormatDescFull(MTL_FMT, VIEW_CLASS, IOS_CAPS, MACOS_CAPS, MTL_FMT_LINEAR)
+
+void PixelFormats::initMTLPixelFormatCapabilities() {
+ clear(_mtlPixelFormatDescriptions, _mtlPixelFormatCount);
+
+ uint32_t fmtIdx = 0;
+
+ // When adding to this list, be sure to ensure _mtlPixelFormatCount is large enough for the format count.
+
+ // MTLPixelFormatInvalid must come first.
+ addMTLPixelFormatDesc(Invalid, None, None, None);
+
+ // Ordinary 8-bit pixel formats.
+ addMTLPixelFormatDesc(A8Unorm, Color8, RF, RF);
+ addMTLPixelFormatDesc(R8Unorm, Color8, All, All);
+ addMTLPixelFormatDescSRGB(R8Unorm_sRGB, Color8, RFCMRB, None, R8Unorm);
+ addMTLPixelFormatDesc(R8Snorm, Color8, RFWCMB, All);
+ addMTLPixelFormatDesc(R8Uint, Color8, RWCM, RWCM);
+ addMTLPixelFormatDesc(R8Sint, Color8, RWCM, RWCM);
+
+ // Ordinary 16-bit pixel formats.
+ addMTLPixelFormatDesc(R16Unorm, Color16, RFWCMB, All);
+ addMTLPixelFormatDesc(R16Snorm, Color16, RFWCMB, All);
+ addMTLPixelFormatDesc(R16Uint, Color16, RWCM, RWCM);
+ addMTLPixelFormatDesc(R16Sint, Color16, RWCM, RWCM);
+ addMTLPixelFormatDesc(R16Float, Color16, All, All);
+
+ addMTLPixelFormatDesc(RG8Unorm, Color16, All, All);
+ addMTLPixelFormatDescSRGB(RG8Unorm_sRGB, Color16, RFCMRB, None, RG8Unorm);
+ addMTLPixelFormatDesc(RG8Snorm, Color16, RFWCMB, All);
+ addMTLPixelFormatDesc(RG8Uint, Color16, RWCM, RWCM);
+ addMTLPixelFormatDesc(RG8Sint, Color16, RWCM, RWCM);
+
+ // Packed 16-bit pixel formats.
+ addMTLPixelFormatDesc(B5G6R5Unorm, Color16, RFCMRB, None);
+ addMTLPixelFormatDesc(A1BGR5Unorm, Color16, RFCMRB, None);
+ addMTLPixelFormatDesc(ABGR4Unorm, Color16, RFCMRB, None);
+ addMTLPixelFormatDesc(BGR5A1Unorm, Color16, RFCMRB, None);
+
+ // Ordinary 32-bit pixel formats.
+ addMTLPixelFormatDesc(R32Uint, Color32, RC, RWCM);
+ addMTLPixelFormatDesc(R32Sint, Color32, RC, RWCM);
+ addMTLPixelFormatDesc(R32Float, Color32, RCMB, All);
+
+ addMTLPixelFormatDesc(RG16Unorm, Color32, RFWCMB, All);
+ addMTLPixelFormatDesc(RG16Snorm, Color32, RFWCMB, All);
+ addMTLPixelFormatDesc(RG16Uint, Color32, RWCM, RWCM);
+ addMTLPixelFormatDesc(RG16Sint, Color32, RWCM, RWCM);
+ addMTLPixelFormatDesc(RG16Float, Color32, All, All);
+
+ addMTLPixelFormatDesc(RGBA8Unorm, Color32, All, All);
+ addMTLPixelFormatDescSRGB(RGBA8Unorm_sRGB, Color32, RFCMRB, RFCMRB, RGBA8Unorm);
+ addMTLPixelFormatDesc(RGBA8Snorm, Color32, RFWCMB, All);
+ addMTLPixelFormatDesc(RGBA8Uint, Color32, RWCM, RWCM);
+ addMTLPixelFormatDesc(RGBA8Sint, Color32, RWCM, RWCM);
+
+ addMTLPixelFormatDesc(BGRA8Unorm, Color32, All, All);
+ addMTLPixelFormatDescSRGB(BGRA8Unorm_sRGB, Color32, RFCMRB, RFCMRB, BGRA8Unorm);
+
+ // Packed 32-bit pixel formats.
+ addMTLPixelFormatDesc(RGB10A2Unorm, Color32, RFCMRB, All);
+ addMTLPixelFormatDesc(RGB10A2Uint, Color32, RCM, RWCM);
+ addMTLPixelFormatDesc(RG11B10Float, Color32, RFCMRB, All);
+ addMTLPixelFormatDesc(RGB9E5Float, Color32, RFCMRB, RF);
+
+ // Ordinary 64-bit pixel formats.
+ addMTLPixelFormatDesc(RG32Uint, Color64, RC, RWCM);
+ addMTLPixelFormatDesc(RG32Sint, Color64, RC, RWCM);
+ addMTLPixelFormatDesc(RG32Float, Color64, RCB, All);
+
+ addMTLPixelFormatDesc(RGBA16Unorm, Color64, RFWCMB, All);
+ addMTLPixelFormatDesc(RGBA16Snorm, Color64, RFWCMB, All);
+ addMTLPixelFormatDesc(RGBA16Uint, Color64, RWCM, RWCM);
+ addMTLPixelFormatDesc(RGBA16Sint, Color64, RWCM, RWCM);
+ addMTLPixelFormatDesc(RGBA16Float, Color64, All, All);
+
+ // Ordinary 128-bit pixel formats.
+ addMTLPixelFormatDesc(RGBA32Uint, Color128, RC, RWCM);
+ addMTLPixelFormatDesc(RGBA32Sint, Color128, RC, RWCM);
+ addMTLPixelFormatDesc(RGBA32Float, Color128, RC, All);
+
+ // Compressed pixel formats.
+ addMTLPixelFormatDesc(PVRTC_RGBA_2BPP, PVRTC_RGBA_2BPP, RF, None);
+ addMTLPixelFormatDescSRGB(PVRTC_RGBA_2BPP_sRGB, PVRTC_RGBA_2BPP, RF, None, PVRTC_RGBA_2BPP);
+ addMTLPixelFormatDesc(PVRTC_RGBA_4BPP, PVRTC_RGBA_4BPP, RF, None);
+ addMTLPixelFormatDescSRGB(PVRTC_RGBA_4BPP_sRGB, PVRTC_RGBA_4BPP, RF, None, PVRTC_RGBA_4BPP);
+
+ addMTLPixelFormatDesc(ETC2_RGB8, ETC2_RGB8, RF, None);
+ addMTLPixelFormatDescSRGB(ETC2_RGB8_sRGB, ETC2_RGB8, RF, None, ETC2_RGB8);
+ addMTLPixelFormatDesc(ETC2_RGB8A1, ETC2_RGB8A1, RF, None);
+ addMTLPixelFormatDescSRGB(ETC2_RGB8A1_sRGB, ETC2_RGB8A1, RF, None, ETC2_RGB8A1);
+ addMTLPixelFormatDesc(EAC_RGBA8, EAC_RGBA8, RF, None);
+ addMTLPixelFormatDescSRGB(EAC_RGBA8_sRGB, EAC_RGBA8, RF, None, EAC_RGBA8);
+ addMTLPixelFormatDesc(EAC_R11Unorm, EAC_R11, RF, None);
+ addMTLPixelFormatDesc(EAC_R11Snorm, EAC_R11, RF, None);
+ addMTLPixelFormatDesc(EAC_RG11Unorm, EAC_RG11, RF, None);
+ addMTLPixelFormatDesc(EAC_RG11Snorm, EAC_RG11, RF, None);
+
+ addMTLPixelFormatDesc(ASTC_4x4_LDR, ASTC_4x4, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_4x4_sRGB, ASTC_4x4, None, None, ASTC_4x4_LDR);
+ addMTLPixelFormatDesc(ASTC_4x4_HDR, ASTC_4x4, None, None);
+ addMTLPixelFormatDesc(ASTC_5x4_LDR, ASTC_5x4, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_5x4_sRGB, ASTC_5x4, None, None, ASTC_5x4_LDR);
+ addMTLPixelFormatDesc(ASTC_5x4_HDR, ASTC_5x4, None, None);
+ addMTLPixelFormatDesc(ASTC_5x5_LDR, ASTC_5x5, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_5x5_sRGB, ASTC_5x5, None, None, ASTC_5x5_LDR);
+ addMTLPixelFormatDesc(ASTC_5x5_HDR, ASTC_5x5, None, None);
+ addMTLPixelFormatDesc(ASTC_6x5_LDR, ASTC_6x5, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_6x5_sRGB, ASTC_6x5, None, None, ASTC_6x5_LDR);
+ addMTLPixelFormatDesc(ASTC_6x5_HDR, ASTC_6x5, None, None);
+ addMTLPixelFormatDesc(ASTC_6x6_LDR, ASTC_6x6, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_6x6_sRGB, ASTC_6x6, None, None, ASTC_6x6_LDR);
+ addMTLPixelFormatDesc(ASTC_6x6_HDR, ASTC_6x6, None, None);
+ addMTLPixelFormatDesc(ASTC_8x5_LDR, ASTC_8x5, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_8x5_sRGB, ASTC_8x5, None, None, ASTC_8x5_LDR);
+ addMTLPixelFormatDesc(ASTC_8x5_HDR, ASTC_8x5, None, None);
+ addMTLPixelFormatDesc(ASTC_8x6_LDR, ASTC_8x6, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_8x6_sRGB, ASTC_8x6, None, None, ASTC_8x6_LDR);
+ addMTLPixelFormatDesc(ASTC_8x6_HDR, ASTC_8x6, None, None);
+ addMTLPixelFormatDesc(ASTC_8x8_LDR, ASTC_8x8, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_8x8_sRGB, ASTC_8x8, None, None, ASTC_8x8_LDR);
+ addMTLPixelFormatDesc(ASTC_8x8_HDR, ASTC_8x8, None, None);
+ addMTLPixelFormatDesc(ASTC_10x5_LDR, ASTC_10x5, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_10x5_sRGB, ASTC_10x5, None, None, ASTC_10x5_LDR);
+ addMTLPixelFormatDesc(ASTC_10x5_HDR, ASTC_10x5, None, None);
+ addMTLPixelFormatDesc(ASTC_10x6_LDR, ASTC_10x6, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_10x6_sRGB, ASTC_10x6, None, None, ASTC_10x6_LDR);
+ addMTLPixelFormatDesc(ASTC_10x6_HDR, ASTC_10x6, None, None);
+ addMTLPixelFormatDesc(ASTC_10x8_LDR, ASTC_10x8, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_10x8_sRGB, ASTC_10x8, None, None, ASTC_10x8_LDR);
+ addMTLPixelFormatDesc(ASTC_10x8_HDR, ASTC_10x8, None, None);
+ addMTLPixelFormatDesc(ASTC_10x10_LDR, ASTC_10x10, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_10x10_sRGB, ASTC_10x10, None, None, ASTC_10x10_LDR);
+ addMTLPixelFormatDesc(ASTC_10x10_HDR, ASTC_10x10, None, None);
+ addMTLPixelFormatDesc(ASTC_12x10_LDR, ASTC_12x10, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_12x10_sRGB, ASTC_12x10, None, None, ASTC_12x10_LDR);
+ addMTLPixelFormatDesc(ASTC_12x10_HDR, ASTC_12x10, None, None);
+ addMTLPixelFormatDesc(ASTC_12x12_LDR, ASTC_12x12, None, None);
+ addMTLPixelFormatDescSRGB(ASTC_12x12_sRGB, ASTC_12x12, None, None, ASTC_12x12_LDR);
+ addMTLPixelFormatDesc(ASTC_12x12_HDR, ASTC_12x12, None, None);
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunguarded-availability"
+
+ addMTLPixelFormatDesc(BC1_RGBA, BC1_RGBA, RF, RF);
+ addMTLPixelFormatDescSRGB(BC1_RGBA_sRGB, BC1_RGBA, RF, RF, BC1_RGBA);
+ addMTLPixelFormatDesc(BC2_RGBA, BC2_RGBA, RF, RF);
+ addMTLPixelFormatDescSRGB(BC2_RGBA_sRGB, BC2_RGBA, RF, RF, BC2_RGBA);
+ addMTLPixelFormatDesc(BC3_RGBA, BC3_RGBA, RF, RF);
+ addMTLPixelFormatDescSRGB(BC3_RGBA_sRGB, BC3_RGBA, RF, RF, BC3_RGBA);
+ addMTLPixelFormatDesc(BC4_RUnorm, BC4_R, RF, RF);
+ addMTLPixelFormatDesc(BC4_RSnorm, BC4_R, RF, RF);
+ addMTLPixelFormatDesc(BC5_RGUnorm, BC5_RG, RF, RF);
+ addMTLPixelFormatDesc(BC5_RGSnorm, BC5_RG, RF, RF);
+ addMTLPixelFormatDesc(BC6H_RGBUfloat, BC6H_RGB, RF, RF);
+ addMTLPixelFormatDesc(BC6H_RGBFloat, BC6H_RGB, RF, RF);
+ addMTLPixelFormatDesc(BC7_RGBAUnorm, BC7_RGBA, RF, RF);
+ addMTLPixelFormatDescSRGB(BC7_RGBAUnorm_sRGB, BC7_RGBA, RF, RF, BC7_RGBAUnorm);
+
+#pragma clang diagnostic pop
+
+ // YUV pixel formats.
+ addMTLPixelFormatDesc(GBGR422, None, RF, RF);
+ addMTLPixelFormatDesc(BGRG422, None, RF, RF);
+
+ // Extended range and wide color pixel formats.
+ addMTLPixelFormatDesc(BGRA10_XR, BGRA10_XR, None, None);
+ addMTLPixelFormatDescSRGB(BGRA10_XR_sRGB, BGRA10_XR, None, None, BGRA10_XR);
+ addMTLPixelFormatDesc(BGR10_XR, BGR10_XR, None, None);
+ addMTLPixelFormatDescSRGB(BGR10_XR_sRGB, BGR10_XR, None, None, BGR10_XR);
+ addMTLPixelFormatDesc(BGR10A2Unorm, Color32, None, None);
+
+ // Depth and stencil pixel formats.
+ addMTLPixelFormatDesc(Depth16Unorm, None, None, None);
+ addMTLPixelFormatDesc(Depth32Float, None, DRM, DRFMR);
+ addMTLPixelFormatDesc(Stencil8, None, DRM, DRMR);
+ addMTLPixelFormatDesc(Depth24Unorm_Stencil8, Depth24_Stencil8, None, None);
+ addMTLPixelFormatDesc(Depth32Float_Stencil8, Depth32_Stencil8, DRM, DRFMR);
+ addMTLPixelFormatDesc(X24_Stencil8, Depth24_Stencil8, None, DRMR);
+ addMTLPixelFormatDesc(X32_Stencil8, Depth32_Stencil8, DRM, DRMR);
+
+ // When adding to this list, be sure to ensure _mtlPixelFormatCount is large enough for the format count.
+}
+
+#define addMTLVertexFormatDesc(MTL_VTX_FMT, IOS_CAPS, MACOS_CAPS) \
+ CRASH_BAD_INDEX_MSG(fmtIdx, _mtlVertexFormatCount, "Attempting to describe too many MTLVertexFormats"); \
+ _mtlVertexFormatDescriptions[fmtIdx++] = { .mtlVertexFormat = MTLVertexFormat##MTL_VTX_FMT, RD::DATA_FORMAT_MAX, select_platform_caps(kMTLFmtCaps##MACOS_CAPS, kMTLFmtCaps##IOS_CAPS), MTLViewClass::None, MTLPixelFormatInvalid, "MTLVertexFormat" #MTL_VTX_FMT }
+
+void PixelFormats::initMTLVertexFormatCapabilities() {
+ clear(_mtlVertexFormatDescriptions, _mtlVertexFormatCount);
+
+ uint32_t fmtIdx = 0;
+
+ // When adding to this list, be sure to ensure _mtlVertexFormatCount is large enough for the format count.
+
+ // MTLVertexFormatInvalid must come first.
+ addMTLVertexFormatDesc(Invalid, None, None);
+
+ addMTLVertexFormatDesc(UChar2Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(Char2Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(UChar2, Vertex, Vertex);
+ addMTLVertexFormatDesc(Char2, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UChar3Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(Char3Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(UChar3, Vertex, Vertex);
+ addMTLVertexFormatDesc(Char3, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UChar4Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(Char4Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(UChar4, Vertex, Vertex);
+ addMTLVertexFormatDesc(Char4, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UInt1010102Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(Int1010102Normalized, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UShort2Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(Short2Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(UShort2, Vertex, Vertex);
+ addMTLVertexFormatDesc(Short2, Vertex, Vertex);
+ addMTLVertexFormatDesc(Half2, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UShort3Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(Short3Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(UShort3, Vertex, Vertex);
+ addMTLVertexFormatDesc(Short3, Vertex, Vertex);
+ addMTLVertexFormatDesc(Half3, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UShort4Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(Short4Normalized, Vertex, Vertex);
+ addMTLVertexFormatDesc(UShort4, Vertex, Vertex);
+ addMTLVertexFormatDesc(Short4, Vertex, Vertex);
+ addMTLVertexFormatDesc(Half4, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UInt, Vertex, Vertex);
+ addMTLVertexFormatDesc(Int, Vertex, Vertex);
+ addMTLVertexFormatDesc(Float, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UInt2, Vertex, Vertex);
+ addMTLVertexFormatDesc(Int2, Vertex, Vertex);
+ addMTLVertexFormatDesc(Float2, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UInt3, Vertex, Vertex);
+ addMTLVertexFormatDesc(Int3, Vertex, Vertex);
+ addMTLVertexFormatDesc(Float3, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UInt4, Vertex, Vertex);
+ addMTLVertexFormatDesc(Int4, Vertex, Vertex);
+ addMTLVertexFormatDesc(Float4, Vertex, Vertex);
+
+ addMTLVertexFormatDesc(UCharNormalized, None, None);
+ addMTLVertexFormatDesc(CharNormalized, None, None);
+ addMTLVertexFormatDesc(UChar, None, None);
+ addMTLVertexFormatDesc(Char, None, None);
+
+ addMTLVertexFormatDesc(UShortNormalized, None, None);
+ addMTLVertexFormatDesc(ShortNormalized, None, None);
+ addMTLVertexFormatDesc(UShort, None, None);
+ addMTLVertexFormatDesc(Short, None, None);
+ addMTLVertexFormatDesc(Half, None, None);
+
+ addMTLVertexFormatDesc(UChar4Normalized_BGRA, None, None);
+
+ // When adding to this list, be sure to ensure _mtlVertexFormatCount is large enough for the format count.
+}
+
+void PixelFormats::buildMTLFormatMaps() {
+ // Set all MTLPixelFormats and MTLVertexFormats to undefined/invalid.
+ clear(_mtlFormatDescIndicesByMTLPixelFormatsCore, _mtlPixelFormatCoreCount);
+ clear(_mtlFormatDescIndicesByMTLVertexFormats, _mtlVertexFormatCount);
+
+ // Build lookup table for MTLPixelFormat specs.
+ // For most Metal format values, which are small and consecutive, use a simple lookup array.
+ // For outlier format values, which can be large, use a map.
+ for (uint32_t fmtIdx = 0; fmtIdx < _mtlPixelFormatCount; fmtIdx++) {
+ MTLPixelFormat fmt = _mtlPixelFormatDescriptions[fmtIdx].mtlPixelFormat;
+ if (fmt) {
+ if (fmt < _mtlPixelFormatCoreCount) {
+ _mtlFormatDescIndicesByMTLPixelFormatsCore[fmt] = fmtIdx;
+ } else {
+ _mtlFormatDescIndicesByMTLPixelFormatsExt[fmt] = fmtIdx;
+ }
+ }
+ }
+
+ // Build lookup table for MTLVertexFormat specs.
+ for (uint32_t fmtIdx = 0; fmtIdx < _mtlVertexFormatCount; fmtIdx++) {
+ MTLVertexFormat fmt = _mtlVertexFormatDescriptions[fmtIdx].mtlVertexFormat;
+ if (fmt) {
+ _mtlFormatDescIndicesByMTLVertexFormats[fmt] = fmtIdx;
+ }
+ }
+}
+
+// If the device supports the feature set, add additional capabilities to a MTLPixelFormat.
+void PixelFormats::addMTLPixelFormatCapabilities(id<MTLDevice> p_device,
+ MTLFeatureSet p_feature_set,
+ MTLPixelFormat p_format,
+ MTLFmtCaps p_caps) {
+ if ([p_device supportsFeatureSet:p_feature_set]) {
+ flags::set(getMTLPixelFormatDesc(p_format).mtlFmtCaps, p_caps);
+ }
+}
+
+// If the device supports the GPU family, add additional capabilities to a MTLPixelFormat.
+void PixelFormats::addMTLPixelFormatCapabilities(id<MTLDevice> p_device,
+ MTLGPUFamily p_family,
+ MTLPixelFormat p_format,
+ MTLFmtCaps p_caps) {
+ if ([p_device supportsFamily:p_family]) {
+ flags::set(getMTLPixelFormatDesc(p_format).mtlFmtCaps, p_caps);
+ }
+}
+
+// Disable capability flags in the Metal pixel format.
+void PixelFormats::disableMTLPixelFormatCapabilities(MTLPixelFormat p_format,
+ MTLFmtCaps p_caps) {
+ flags::clear(getMTLPixelFormatDesc(p_format).mtlFmtCaps, p_caps);
+}
+
+void PixelFormats::disableAllMTLPixelFormatCapabilities(MTLPixelFormat p_format) {
+ getMTLPixelFormatDesc(p_format).mtlFmtCaps = kMTLFmtCapsNone;
+}
+
+// If the device supports the feature set, add additional capabilities to a MTLVertexFormat.
+void PixelFormats::addMTLVertexFormatCapabilities(id<MTLDevice> p_device,
+ MTLFeatureSet p_feature_set,
+ MTLVertexFormat p_format,
+ MTLFmtCaps p_caps) {
+ if ([p_device supportsFeatureSet:p_feature_set]) {
+ flags::set(getMTLVertexFormatDesc(p_format).mtlFmtCaps, p_caps);
+ }
+}
+
+void PixelFormats::modifyMTLFormatCapabilities() {
+ modifyMTLFormatCapabilities(device);
+}
+
+// If the supportsBCTextureCompression query is available, use it.
+bool supports_bc_texture_compression(id<MTLDevice> p_device) {
+#if (TARGET_OS_OSX || TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED >= 160400)
+ if (@available(macOS 11.0, iOS 16.4, *)) {
+ return p_device.supportsBCTextureCompression;
+ }
+#endif
+ return false;
+}
+
+#define addFeatSetMTLPixFmtCaps(FEAT_SET, MTL_FMT, CAPS) \
+ addMTLPixelFormatCapabilities(p_device, MTLFeatureSet_##FEAT_SET, MTLPixelFormat##MTL_FMT, kMTLFmtCaps##CAPS)
+
+#define addFeatSetMTLVtxFmtCaps(FEAT_SET, MTL_FMT, CAPS) \
+ addMTLVertexFormatCapabilities(p_device, MTLFeatureSet_##FEAT_SET, MTLVertexFormat##MTL_FMT, kMTLFmtCaps##CAPS)
+
+#define addGPUMTLPixFmtCaps(GPU_FAM, MTL_FMT, CAPS) \
+ addMTLPixelFormatCapabilities(p_device, MTLGPUFamily##GPU_FAM, MTLPixelFormat##MTL_FMT, kMTLFmtCaps##CAPS)
+
+#define disableAllMTLPixFmtCaps(MTL_FMT) \
+ disableAllMTLPixelFormatCapabilities(MTLPixelFormat##MTL_FMT)
+
+#define disableMTLPixFmtCaps(MTL_FMT, CAPS) \
+ disableMTLPixelFormatCapabilities(MTLPixelFormat##MTL_FMT, kMTLFmtCaps##CAPS)
+
+void PixelFormats::modifyMTLFormatCapabilities(id<MTLDevice> p_device) {
+ if (!supports_bc_texture_compression(p_device)) {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunguarded-availability"
+
+ disableAllMTLPixFmtCaps(BC1_RGBA);
+ disableAllMTLPixFmtCaps(BC1_RGBA_sRGB);
+ disableAllMTLPixFmtCaps(BC2_RGBA);
+ disableAllMTLPixFmtCaps(BC2_RGBA_sRGB);
+ disableAllMTLPixFmtCaps(BC3_RGBA);
+ disableAllMTLPixFmtCaps(BC3_RGBA_sRGB);
+ disableAllMTLPixFmtCaps(BC4_RUnorm);
+ disableAllMTLPixFmtCaps(BC4_RSnorm);
+ disableAllMTLPixFmtCaps(BC5_RGUnorm);
+ disableAllMTLPixFmtCaps(BC5_RGSnorm);
+ disableAllMTLPixFmtCaps(BC6H_RGBUfloat);
+ disableAllMTLPixFmtCaps(BC6H_RGBFloat);
+ disableAllMTLPixFmtCaps(BC7_RGBAUnorm);
+ disableAllMTLPixFmtCaps(BC7_RGBAUnorm_sRGB);
+
+#pragma clang diagnostic pop
+ }
+
+ if (!p_device.supports32BitMSAA) {
+ disableMTLPixFmtCaps(R32Uint, MSAA);
+ disableMTLPixFmtCaps(R32Uint, Resolve);
+ disableMTLPixFmtCaps(R32Sint, MSAA);
+ disableMTLPixFmtCaps(R32Sint, Resolve);
+ disableMTLPixFmtCaps(R32Float, MSAA);
+ disableMTLPixFmtCaps(R32Float, Resolve);
+ disableMTLPixFmtCaps(RG32Uint, MSAA);
+ disableMTLPixFmtCaps(RG32Uint, Resolve);
+ disableMTLPixFmtCaps(RG32Sint, MSAA);
+ disableMTLPixFmtCaps(RG32Sint, Resolve);
+ disableMTLPixFmtCaps(RG32Float, MSAA);
+ disableMTLPixFmtCaps(RG32Float, Resolve);
+ disableMTLPixFmtCaps(RGBA32Uint, MSAA);
+ disableMTLPixFmtCaps(RGBA32Uint, Resolve);
+ disableMTLPixFmtCaps(RGBA32Sint, MSAA);
+ disableMTLPixFmtCaps(RGBA32Sint, Resolve);
+ disableMTLPixFmtCaps(RGBA32Float, MSAA);
+ disableMTLPixFmtCaps(RGBA32Float, Resolve);
+ }
+
+ if (!p_device.supports32BitFloatFiltering) {
+ disableMTLPixFmtCaps(R32Float, Filter);
+ disableMTLPixFmtCaps(RG32Float, Filter);
+ disableMTLPixFmtCaps(RGBA32Float, Filter);
+ }
+
+#if TARGET_OS_OSX
+ addGPUMTLPixFmtCaps(Apple1, R32Uint, Atomic);
+ addGPUMTLPixFmtCaps(Apple1, R32Sint, Atomic);
+
+ if (p_device.isDepth24Stencil8PixelFormatSupported) {
+ addGPUMTLPixFmtCaps(Apple1, Depth24Unorm_Stencil8, DRFMR);
+ }
+
+ addFeatSetMTLPixFmtCaps(macOS_GPUFamily1_v2, Depth16Unorm, DRFMR);
+
+ addFeatSetMTLPixFmtCaps(macOS_GPUFamily1_v3, BGR10A2Unorm, RFCMRB);
+
+ addGPUMTLPixFmtCaps(Apple5, R8Unorm_sRGB, All);
+
+ addGPUMTLPixFmtCaps(Apple5, RG8Unorm_sRGB, All);
+
+ addGPUMTLPixFmtCaps(Apple5, B5G6R5Unorm, RFCMRB);
+ addGPUMTLPixFmtCaps(Apple5, A1BGR5Unorm, RFCMRB);
+ addGPUMTLPixFmtCaps(Apple5, ABGR4Unorm, RFCMRB);
+ addGPUMTLPixFmtCaps(Apple5, BGR5A1Unorm, RFCMRB);
+
+ addGPUMTLPixFmtCaps(Apple5, RGBA8Unorm_sRGB, All);
+ addGPUMTLPixFmtCaps(Apple5, BGRA8Unorm_sRGB, All);
+
+ // Blending is actually supported for this format, but format channels cannot be individually write-enabled during blending.
+ // Disabling blending is the least-intrusive way to handle this in a Godot-friendly way.
+ addGPUMTLPixFmtCaps(Apple5, RGB9E5Float, All);
+ disableMTLPixFmtCaps(RGB9E5Float, Blend);
+
+ addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_2BPP, RF);
+ addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_2BPP_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_4BPP, RF);
+ addGPUMTLPixFmtCaps(Apple5, PVRTC_RGBA_4BPP_sRGB, RF);
+
+ addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8, RF);
+ addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8A1, RF);
+ addGPUMTLPixFmtCaps(Apple5, ETC2_RGB8A1_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple5, EAC_RGBA8, RF);
+ addGPUMTLPixFmtCaps(Apple5, EAC_RGBA8_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple5, EAC_R11Unorm, RF);
+ addGPUMTLPixFmtCaps(Apple5, EAC_R11Snorm, RF);
+ addGPUMTLPixFmtCaps(Apple5, EAC_RG11Unorm, RF);
+ addGPUMTLPixFmtCaps(Apple5, EAC_RG11Snorm, RF);
+
+ addGPUMTLPixFmtCaps(Apple5, ASTC_4x4_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_4x4_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_4x4_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_5x4_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_5x4_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_5x4_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_5x5_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_5x5_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_5x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_6x5_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_6x5_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_6x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_6x6_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_6x6_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_6x6_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_8x5_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_8x5_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_8x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_8x6_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_8x6_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_8x6_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_8x8_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_8x8_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_8x8_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x5_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x5_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x6_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x6_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x6_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x8_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x8_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x8_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x10_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_10x10_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x10_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_12x10_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_12x10_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_12x10_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_12x12_LDR, RF);
+ addGPUMTLPixFmtCaps(Apple5, ASTC_12x12_sRGB, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_12x12_HDR, RF);
+
+ addGPUMTLPixFmtCaps(Apple5, BGRA10_XR, All);
+ addGPUMTLPixFmtCaps(Apple5, BGRA10_XR_sRGB, All);
+ addGPUMTLPixFmtCaps(Apple5, BGR10_XR, All);
+ addGPUMTLPixFmtCaps(Apple5, BGR10_XR_sRGB, All);
+
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UCharNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, CharNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UChar, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, Char, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UShortNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, ShortNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UShort, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, Short, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, Half, Vertex);
+ addFeatSetMTLVtxFmtCaps(macOS_GPUFamily1_v3, UChar4Normalized_BGRA, Vertex);
+#endif
+
+#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, R8Unorm_sRGB, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, R8Unorm_sRGB, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, R8Snorm, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, RG8Unorm_sRGB, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RG8Unorm_sRGB, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, RG8Snorm, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Uint, RWC);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Uint, Atomic);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Sint, RWC);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Sint, Atomic);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, R32Float, RWCMB);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, RGBA8Unorm_sRGB, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGBA8Unorm_sRGB, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, RGBA8Snorm, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v3, BGRA8Unorm_sRGB, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, BGRA8Unorm_sRGB, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGB10A2Unorm, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGB10A2Uint, RWCM);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RG11B10Float, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, RGB9E5Float, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RG32Uint, RWC);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RG32Sint, RWC);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RG32Float, RWCB);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RGBA32Uint, RWC);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RGBA32Sint, RWC);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v2, RGBA32Float, RWC);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_4x4_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_4x4_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x4_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x4_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x5_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_5x5_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x5_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x5_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x6_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_6x6_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x5_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x5_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x6_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x6_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x8_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_8x8_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x5_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x5_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x6_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x6_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x8_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x8_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x10_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_10x10_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x10_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x10_sRGB, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x12_LDR, RF);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily2_v1, ASTC_12x12_sRGB, RF);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, Depth32Float, DRMR);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, Depth32Float_Stencil8, DRMR);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v1, Stencil8, DRMR);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGRA10_XR, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGRA10_XR_sRGB, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGR10_XR, All);
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily3_v2, BGR10_XR_sRGB, All);
+
+ addFeatSetMTLPixFmtCaps(iOS_GPUFamily1_v4, BGR10A2Unorm, All);
+
+ addGPUMTLPixFmtCaps(Apple6, ASTC_4x4_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_5x4_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_5x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_6x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_6x6_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_8x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_8x6_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_8x8_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x5_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x6_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x8_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_10x10_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_12x10_HDR, RF);
+ addGPUMTLPixFmtCaps(Apple6, ASTC_12x12_HDR, RF);
+
+ addGPUMTLPixFmtCaps(Apple1, Depth16Unorm, DRFM);
+ addGPUMTLPixFmtCaps(Apple3, Depth16Unorm, DRFMR);
+
+ // Vertex formats.
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UCharNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, CharNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UChar, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, Char, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UShortNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, ShortNormalized, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UShort, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, Short, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, Half, Vertex);
+ addFeatSetMTLVtxFmtCaps(iOS_GPUFamily1_v4, UChar4Normalized_BGRA, Vertex);
+
+// Disable for iOS simulator last.
+#if TARGET_OS_SIMULATOR
+ if (![mtlDevice supportsFamily:MTLGPUFamilyApple5]) {
+ disableAllMTLPixFmtCaps(R8Unorm_sRGB);
+ disableAllMTLPixFmtCaps(RG8Unorm_sRGB);
+ disableAllMTLPixFmtCaps(B5G6R5Unorm);
+ disableAllMTLPixFmtCaps(A1BGR5Unorm);
+ disableAllMTLPixFmtCaps(ABGR4Unorm);
+ disableAllMTLPixFmtCaps(BGR5A1Unorm);
+
+ disableAllMTLPixFmtCaps(BGRA10_XR);
+ disableAllMTLPixFmtCaps(BGRA10_XR_sRGB);
+ disableAllMTLPixFmtCaps(BGR10_XR);
+ disableAllMTLPixFmtCaps(BGR10_XR_sRGB);
+
+ disableAllMTLPixFmtCaps(GBGR422);
+ disableAllMTLPixFmtCaps(BGRG422);
+
+ disableMTLPixFmtCaps(RGB9E5Float, ColorAtt);
+
+ disableMTLPixFmtCaps(R8Unorm_sRGB, Write);
+ disableMTLPixFmtCaps(RG8Unorm_sRGB, Write);
+ disableMTLPixFmtCaps(RGBA8Unorm_sRGB, Write);
+ disableMTLPixFmtCaps(BGRA8Unorm_sRGB, Write);
+ disableMTLPixFmtCaps(PVRTC_RGBA_2BPP_sRGB, Write);
+ disableMTLPixFmtCaps(PVRTC_RGBA_4BPP_sRGB, Write);
+ disableMTLPixFmtCaps(ETC2_RGB8_sRGB, Write);
+ disableMTLPixFmtCaps(ETC2_RGB8A1_sRGB, Write);
+ disableMTLPixFmtCaps(EAC_RGBA8_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_4x4_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_5x4_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_5x5_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_6x5_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_6x6_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_8x5_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_8x6_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_8x8_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_10x5_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_10x6_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_10x8_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_10x10_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_12x10_sRGB, Write);
+ disableMTLPixFmtCaps(ASTC_12x12_sRGB, Write);
+ }
+#endif
+#endif
+}
+
+#undef addFeatSetMTLPixFmtCaps
+#undef addGPUOSMTLPixFmtCaps
+#undef disableMTLPixFmtCaps
+#undef disableAllMTLPixFmtCaps
+#undef addFeatSetMTLVtxFmtCaps
+
+// Populates the DataFormat lookup maps and connects Godot and Metal pixel formats to one-another.
+void PixelFormats::buildDFFormatMaps() {
+ // Iterate through the DataFormat descriptions, populate the lookup maps and back pointers,
+ // and validate the Metal formats for the platform and OS.
+ for (uint32_t fmtIdx = 0; fmtIdx < RD::DATA_FORMAT_MAX; fmtIdx++) {
+ DataFormatDesc &dfDesc = _dataFormatDescriptions[fmtIdx];
+ DataFormat dfFmt = dfDesc.dataFormat;
+ if (dfFmt != RD::DATA_FORMAT_MAX) {
+ // Populate the back reference from the Metal formats to the Godot format.
+ // Validate the corresponding Metal formats for the platform, and clear them
+ // in the Godot format if not supported.
+ if (dfDesc.mtlPixelFormat) {
+ MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(dfDesc.mtlPixelFormat);
+ if (mtlDesc.dataFormat == RD::DATA_FORMAT_MAX) {
+ mtlDesc.dataFormat = dfFmt;
+ }
+ if (!mtlDesc.isSupported()) {
+ dfDesc.mtlPixelFormat = MTLPixelFormatInvalid;
+ }
+ }
+ if (dfDesc.mtlPixelFormatSubstitute) {
+ MTLFormatDesc &mtlDesc = getMTLPixelFormatDesc(dfDesc.mtlPixelFormatSubstitute);
+ if (!mtlDesc.isSupported()) {
+ dfDesc.mtlPixelFormatSubstitute = MTLPixelFormatInvalid;
+ }
+ }
+ if (dfDesc.mtlVertexFormat) {
+ MTLFormatDesc &mtlDesc = getMTLVertexFormatDesc(dfDesc.mtlVertexFormat);
+ if (mtlDesc.dataFormat == RD::DATA_FORMAT_MAX) {
+ mtlDesc.dataFormat = dfFmt;
+ }
+ if (!mtlDesc.isSupported()) {
+ dfDesc.mtlVertexFormat = MTLVertexFormatInvalid;
+ }
+ }
+ if (dfDesc.mtlVertexFormatSubstitute) {
+ MTLFormatDesc &mtlDesc = getMTLVertexFormatDesc(dfDesc.mtlVertexFormatSubstitute);
+ if (!mtlDesc.isSupported()) {
+ dfDesc.mtlVertexFormatSubstitute = MTLVertexFormatInvalid;
+ }
+ }
+ }
+ }
+}
diff --git a/drivers/metal/rendering_context_driver_metal.h b/drivers/metal/rendering_context_driver_metal.h
new file mode 100644
index 0000000000..0363ab111a
--- /dev/null
+++ b/drivers/metal/rendering_context_driver_metal.h
@@ -0,0 +1,206 @@
+/**************************************************************************/
+/* rendering_context_driver_metal.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#ifndef RENDERING_CONTEXT_DRIVER_METAL_H
+#define RENDERING_CONTEXT_DRIVER_METAL_H
+
+#ifdef METAL_ENABLED
+
+#import "rendering_device_driver_metal.h"
+
+#import "servers/rendering/rendering_context_driver.h"
+
+#import <CoreGraphics/CGGeometry.h>
+#import <Metal/Metal.h>
+#import <QuartzCore/CALayer.h>
+
+@class CAMetalLayer;
+@protocol CAMetalDrawable;
+class PixelFormats;
+class MDResourceCache;
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingContextDriverMetal : public RenderingContextDriver {
+protected:
+ id<MTLDevice> metal_device = nil;
+ Device device; // There is only one device on Apple Silicon.
+
+public:
+ Error initialize() final override;
+ const Device &device_get(uint32_t p_device_index) const final override;
+ uint32_t device_get_count() const final override;
+ bool device_supports_present(uint32_t p_device_index, SurfaceID p_surface) const final override { return true; }
+ RenderingDeviceDriver *driver_create() final override;
+ void driver_free(RenderingDeviceDriver *p_driver) final override;
+ SurfaceID surface_create(const void *p_platform_data) final override;
+ void surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) final override;
+ void surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) final override;
+ DisplayServer::VSyncMode surface_get_vsync_mode(SurfaceID p_surface) const final override;
+ uint32_t surface_get_width(SurfaceID p_surface) const final override;
+ uint32_t surface_get_height(SurfaceID p_surface) const final override;
+ void surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) final override;
+ bool surface_get_needs_resize(SurfaceID p_surface) const final override;
+ void surface_destroy(SurfaceID p_surface) final override;
+ bool is_debug_utils_enabled() const final override { return true; }
+
+#pragma mark - Metal-specific methods
+
+ // Platform-specific data for the Windows embedded in this driver.
+ struct WindowPlatformData {
+ CAMetalLayer *__unsafe_unretained layer;
+ };
+
+ class Surface {
+ protected:
+ id<MTLDevice> device;
+
+ public:
+ uint32_t width = 0;
+ uint32_t height = 0;
+ DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED;
+ bool needs_resize = false;
+
+ Surface(id<MTLDevice> p_device) :
+ device(p_device) {}
+ virtual ~Surface() = default;
+
+ MTLPixelFormat get_pixel_format() const { return MTLPixelFormatBGRA8Unorm; }
+ virtual Error resize(uint32_t p_desired_framebuffer_count) = 0;
+ virtual RDD::FramebufferID acquire_next_frame_buffer() = 0;
+ virtual void present(MDCommandBuffer *p_cmd_buffer) = 0;
+ };
+
+ class SurfaceLayer : public Surface {
+ CAMetalLayer *__unsafe_unretained layer = nil;
+ LocalVector<MDFrameBuffer> frame_buffers;
+ LocalVector<id<MTLDrawable>> drawables;
+ uint32_t rear = -1;
+ uint32_t front = 0;
+ uint32_t count = 0;
+
+ public:
+ SurfaceLayer(CAMetalLayer *p_layer, id<MTLDevice> p_device) :
+ Surface(p_device), layer(p_layer) {
+ layer.allowsNextDrawableTimeout = YES;
+ layer.framebufferOnly = YES;
+ layer.opaque = OS::get_singleton()->is_layered_allowed() ? NO : YES;
+ layer.pixelFormat = get_pixel_format();
+ layer.device = p_device;
+ }
+
+ ~SurfaceLayer() override {
+ layer = nil;
+ }
+
+ Error resize(uint32_t p_desired_framebuffer_count) override final {
+ if (width == 0 || height == 0) {
+ // Very likely the window is minimized, don't create a swap chain.
+ return ERR_SKIP;
+ }
+
+ CGSize drawableSize = CGSizeMake(width, height);
+ CGSize current = layer.drawableSize;
+ if (!CGSizeEqualToSize(current, drawableSize)) {
+ layer.drawableSize = drawableSize;
+ }
+
+ // Metal supports a maximum of 3 drawables.
+ p_desired_framebuffer_count = MIN(3U, p_desired_framebuffer_count);
+ layer.maximumDrawableCount = p_desired_framebuffer_count;
+
+#if TARGET_OS_OSX
+ // Display sync is only supported on macOS.
+ switch (vsync_mode) {
+ case DisplayServer::VSYNC_MAILBOX:
+ case DisplayServer::VSYNC_ADAPTIVE:
+ case DisplayServer::VSYNC_ENABLED:
+ layer.displaySyncEnabled = YES;
+ break;
+ case DisplayServer::VSYNC_DISABLED:
+ layer.displaySyncEnabled = NO;
+ break;
+ }
+#endif
+ drawables.resize(p_desired_framebuffer_count);
+ frame_buffers.resize(p_desired_framebuffer_count);
+ for (uint32_t i = 0; i < p_desired_framebuffer_count; i++) {
+ // Reserve space for the drawable texture.
+ frame_buffers[i].textures.resize(1);
+ }
+
+ return OK;
+ }
+
+ RDD::FramebufferID acquire_next_frame_buffer() override final {
+ if (count == frame_buffers.size()) {
+ return RDD::FramebufferID();
+ }
+
+ rear = (rear + 1) % frame_buffers.size();
+ count++;
+
+ MDFrameBuffer &frame_buffer = frame_buffers[rear];
+ frame_buffer.size = Size2i(width, height);
+
+ id<CAMetalDrawable> drawable = layer.nextDrawable;
+ ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available");
+ drawables[rear] = drawable;
+ frame_buffer.textures.write[0] = drawable.texture;
+
+ return RDD::FramebufferID(&frame_buffer);
+ }
+
+ void present(MDCommandBuffer *p_cmd_buffer) override final {
+ if (count == 0) {
+ return;
+ }
+
+ // Release texture and drawable.
+ frame_buffers[front].textures.write[0] = nil;
+ id<MTLDrawable> drawable = drawables[front];
+ drawables[front] = nil;
+
+ count--;
+ front = (front + 1) % frame_buffers.size();
+
+ [p_cmd_buffer->get_command_buffer() presentDrawable:drawable];
+ }
+ };
+
+ id<MTLDevice> get_metal_device() const { return metal_device; }
+
+#pragma mark - Initialization
+
+ RenderingContextDriverMetal();
+ ~RenderingContextDriverMetal() override;
+};
+
+#endif // METAL_ENABLED
+
+#endif // RENDERING_CONTEXT_DRIVER_METAL_H
diff --git a/drivers/metal/rendering_context_driver_metal.mm b/drivers/metal/rendering_context_driver_metal.mm
new file mode 100644
index 0000000000..b257d7142a
--- /dev/null
+++ b/drivers/metal/rendering_context_driver_metal.mm
@@ -0,0 +1,134 @@
+/**************************************************************************/
+/* rendering_context_driver_metal.mm */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#import "rendering_context_driver_metal.h"
+
+@protocol MTLDeviceEx <MTLDevice>
+#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 130300
+- (void)setShouldMaximizeConcurrentCompilation:(BOOL)v;
+#endif
+@end
+
+RenderingContextDriverMetal::RenderingContextDriverMetal() {
+}
+
+RenderingContextDriverMetal::~RenderingContextDriverMetal() {
+}
+
+Error RenderingContextDriverMetal::initialize() {
+ metal_device = MTLCreateSystemDefaultDevice();
+#if TARGET_OS_OSX
+ if (@available(macOS 13.3, *)) {
+ [id<MTLDeviceEx>(metal_device) setShouldMaximizeConcurrentCompilation:YES];
+ }
+#endif
+ device.type = DEVICE_TYPE_INTEGRATED_GPU;
+ device.vendor = VENDOR_APPLE;
+ device.workarounds = Workarounds();
+
+ MetalDeviceProperties props(metal_device);
+ int version = (int)props.features.highestFamily - (int)MTLGPUFamilyApple1 + 1;
+ device.name = vformat("%s (Apple%d)", metal_device.name.UTF8String, version);
+
+ return OK;
+}
+
+const RenderingContextDriver::Device &RenderingContextDriverMetal::device_get(uint32_t p_device_index) const {
+ DEV_ASSERT(p_device_index < 1);
+ return device;
+}
+
+uint32_t RenderingContextDriverMetal::device_get_count() const {
+ return 1;
+}
+
+RenderingDeviceDriver *RenderingContextDriverMetal::driver_create() {
+ return memnew(RenderingDeviceDriverMetal(this));
+}
+
+void RenderingContextDriverMetal::driver_free(RenderingDeviceDriver *p_driver) {
+ memdelete(p_driver);
+}
+
+RenderingContextDriver::SurfaceID RenderingContextDriverMetal::surface_create(const void *p_platform_data) {
+ const WindowPlatformData *wpd = (const WindowPlatformData *)(p_platform_data);
+ Surface *surface = memnew(SurfaceLayer(wpd->layer, metal_device));
+
+ return SurfaceID(surface);
+}
+
+void RenderingContextDriverMetal::surface_set_size(SurfaceID p_surface, uint32_t p_width, uint32_t p_height) {
+ Surface *surface = (Surface *)(p_surface);
+ if (surface->width == p_width && surface->height == p_height) {
+ return;
+ }
+ surface->width = p_width;
+ surface->height = p_height;
+ surface->needs_resize = true;
+}
+
+void RenderingContextDriverMetal::surface_set_vsync_mode(SurfaceID p_surface, DisplayServer::VSyncMode p_vsync_mode) {
+ Surface *surface = (Surface *)(p_surface);
+ if (surface->vsync_mode == p_vsync_mode) {
+ return;
+ }
+ surface->vsync_mode = p_vsync_mode;
+ surface->needs_resize = true;
+}
+
+DisplayServer::VSyncMode RenderingContextDriverMetal::surface_get_vsync_mode(SurfaceID p_surface) const {
+ Surface *surface = (Surface *)(p_surface);
+ return surface->vsync_mode;
+}
+
+uint32_t RenderingContextDriverMetal::surface_get_width(SurfaceID p_surface) const {
+ Surface *surface = (Surface *)(p_surface);
+ return surface->width;
+}
+
+uint32_t RenderingContextDriverMetal::surface_get_height(SurfaceID p_surface) const {
+ Surface *surface = (Surface *)(p_surface);
+ return surface->height;
+}
+
+void RenderingContextDriverMetal::surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) {
+ Surface *surface = (Surface *)(p_surface);
+ surface->needs_resize = p_needs_resize;
+}
+
+bool RenderingContextDriverMetal::surface_get_needs_resize(SurfaceID p_surface) const {
+ Surface *surface = (Surface *)(p_surface);
+ return surface->needs_resize;
+}
+
+void RenderingContextDriverMetal::surface_destroy(SurfaceID p_surface) {
+ Surface *surface = (Surface *)(p_surface);
+ memdelete(surface);
+}
diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h
new file mode 100644
index 0000000000..7c23624e43
--- /dev/null
+++ b/drivers/metal/rendering_device_driver_metal.h
@@ -0,0 +1,437 @@
+/**************************************************************************/
+/* rendering_device_driver_metal.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#ifndef RENDERING_DEVICE_DRIVER_METAL_H
+#define RENDERING_DEVICE_DRIVER_METAL_H
+
+#import "metal_objects.h"
+
+#import "servers/rendering/rendering_device_driver.h"
+
+#import <Metal/Metal.h>
+#import <spirv.hpp>
+#import <variant>
+
+#ifdef DEBUG_ENABLED
+#ifndef _DEBUG
+#define _DEBUG
+#endif
+#endif
+
+class RenderingContextDriverMetal;
+
+class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public RenderingDeviceDriver {
+ friend struct ShaderCacheEntry;
+
+ template <typename T>
+ using Result = std::variant<T, Error>;
+
+#pragma mark - Generic
+
+ RenderingContextDriverMetal *context_driver = nullptr;
+ RenderingContextDriver::Device context_device;
+ id<MTLDevice> device = nil;
+
+ uint32_t version_major = 2;
+ uint32_t version_minor = 0;
+ MetalDeviceProperties *metal_device_properties = nullptr;
+ PixelFormats *pixel_formats = nullptr;
+ std::unique_ptr<MDResourceCache> resource_cache;
+
+ RDD::Capabilities capabilities;
+ RDD::MultiviewCapabilities multiview_capabilities;
+
+ id<MTLBinaryArchive> archive = nil;
+ uint32_t archive_count = 0;
+
+ id<MTLCommandQueue> device_queue = nil;
+ id<MTLCaptureScope> device_scope = nil;
+
+ String pipeline_cache_id;
+
+ Error _create_device();
+ Error _check_capabilities();
+
+#pragma mark - Shader Cache
+
+ ShaderLoadStrategy _shader_load_strategy = ShaderLoadStrategy::DEFAULT;
+
+ /**
+ * The shader cache is a map of hashes of the Metal source to shader cache entries.
+ *
+ * To prevent unbounded growth of the cache, cache entries are automatically freed when
+ * there are no more references to the MDLibrary associated with the cache entry.
+ */
+ HashMap<SHA256Digest, ShaderCacheEntry *, HashableHasher<SHA256Digest>> _shader_cache;
+ void shader_cache_free_entry(const SHA256Digest &key);
+
+public:
+ Error initialize(uint32_t p_device_index, uint32_t p_frame_count) override final;
+
+#pragma mark - Memory
+
+#pragma mark - Buffers
+
+public:
+ virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final;
+ virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final;
+ virtual void buffer_free(BufferID p_buffer) override final;
+ virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) override final;
+ virtual uint8_t *buffer_map(BufferID p_buffer) override final;
+ virtual void buffer_unmap(BufferID p_buffer) override final;
+
+#pragma mark - Texture
+
+private:
+ // Returns true if the texture is a valid linear format.
+ Result<bool> is_valid_linear(TextureFormat const &p_format) const;
+ void _get_sub_resource(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) const;
+
+public:
+ virtual TextureID texture_create(const TextureFormat &p_format, const TextureView &p_view) override final;
+ virtual TextureID texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil) override final;
+ virtual TextureID texture_create_shared(TextureID p_original_texture, const TextureView &p_view) override final;
+ virtual TextureID texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) override final;
+ virtual void texture_free(TextureID p_texture) override final;
+ virtual uint64_t texture_get_allocation_size(TextureID p_texture) override final;
+ virtual void texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) override final;
+ virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final;
+ virtual void texture_unmap(TextureID p_texture) override final;
+ virtual BitField<TextureUsageBits> texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final;
+ virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final;
+
+#pragma mark - Sampler
+
+public:
+ virtual SamplerID sampler_create(const SamplerState &p_state) final override;
+ virtual void sampler_free(SamplerID p_sampler) final override;
+ virtual bool sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) override final;
+
+#pragma mark - Vertex Array
+
+private:
+public:
+ virtual VertexFormatID vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) override final;
+ virtual void vertex_format_free(VertexFormatID p_vertex_format) override final;
+
+#pragma mark - Barriers
+
+ virtual void command_pipeline_barrier(
+ CommandBufferID p_cmd_buffer,
+ BitField<PipelineStageBits> p_src_stages,
+ BitField<PipelineStageBits> p_dst_stages,
+ VectorView<MemoryBarrier> p_memory_barriers,
+ VectorView<BufferBarrier> p_buffer_barriers,
+ VectorView<TextureBarrier> p_texture_barriers) override final;
+
+#pragma mark - Fences
+
+private:
+ struct Fence {
+ dispatch_semaphore_t semaphore;
+ Fence() :
+ semaphore(dispatch_semaphore_create(0)) {}
+ };
+
+public:
+ virtual FenceID fence_create() override final;
+ virtual Error fence_wait(FenceID p_fence) override final;
+ virtual void fence_free(FenceID p_fence) override final;
+
+#pragma mark - Semaphores
+
+public:
+ virtual SemaphoreID semaphore_create() override final;
+ virtual void semaphore_free(SemaphoreID p_semaphore) override final;
+
+#pragma mark - Commands
+ // ----- QUEUE FAMILY -----
+
+ virtual CommandQueueFamilyID command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface = 0) override final;
+
+ // ----- QUEUE -----
+public:
+ virtual CommandQueueID command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue = false) override final;
+ virtual Error command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) override final;
+ virtual void command_queue_free(CommandQueueID p_cmd_queue) override final;
+
+ // ----- POOL -----
+
+ virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final;
+ virtual void command_pool_free(CommandPoolID p_cmd_pool) override final;
+
+ // ----- BUFFER -----
+
+private:
+ // Used to maintain references.
+ Vector<MDCommandBuffer *> command_buffers;
+
+public:
+ virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final;
+ virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final;
+ virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final;
+ virtual void command_buffer_end(CommandBufferID p_cmd_buffer) override final;
+ virtual void command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) override final;
+
+#pragma mark - Swapchain
+
+private:
+ struct SwapChain {
+ RenderingContextDriver::SurfaceID surface = RenderingContextDriver::SurfaceID();
+ RenderPassID render_pass;
+ RDD::DataFormat data_format = DATA_FORMAT_MAX;
+ SwapChain() :
+ render_pass(nullptr) {}
+ };
+
+ void _swap_chain_release(SwapChain *p_swap_chain);
+ void _swap_chain_release_buffers(SwapChain *p_swap_chain);
+
+public:
+ virtual SwapChainID swap_chain_create(RenderingContextDriver::SurfaceID p_surface) override final;
+ virtual Error swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) override final;
+ virtual FramebufferID swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) override final;
+ virtual RenderPassID swap_chain_get_render_pass(SwapChainID p_swap_chain) override final;
+ virtual DataFormat swap_chain_get_format(SwapChainID p_swap_chain) override final;
+ virtual void swap_chain_free(SwapChainID p_swap_chain) override final;
+
+#pragma mark - Frame Buffer
+
+ virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) override final;
+ virtual void framebuffer_free(FramebufferID p_framebuffer) override final;
+
+#pragma mark - Shader
+
+private:
+ // Serialization types need access to private state.
+
+ friend struct ShaderStageData;
+ friend struct SpecializationConstantData;
+ friend struct UniformData;
+ friend struct ShaderBinaryData;
+ friend struct PushConstantData;
+
+private:
+ Error _reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection);
+
+public:
+ virtual String shader_get_binary_cache_key() override final;
+ virtual Vector<uint8_t> shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) override final;
+ virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final;
+ virtual void shader_free(ShaderID p_shader) override final;
+ virtual void shader_destroy_modules(ShaderID p_shader) override final;
+
+#pragma mark - Uniform Set
+
+public:
+ virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index) override final;
+ virtual void uniform_set_free(UniformSetID p_uniform_set) override final;
+
+#pragma mark - Commands
+
+ virtual void command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
+
+#pragma mark Transfer
+
+private:
+ enum class CopySource {
+ Buffer,
+ Texture,
+ };
+ void _copy_texture_buffer(CommandBufferID p_cmd_buffer,
+ CopySource p_source,
+ TextureID p_texture,
+ BufferID p_buffer,
+ VectorView<BufferTextureCopyRegion> p_regions);
+
+public:
+ virtual void command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) override final;
+ virtual void command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) override final;
+
+ virtual void command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) override final;
+ virtual void command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) override final;
+ virtual void command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) override final;
+
+ virtual void command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) override final;
+ virtual void command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) override final;
+
+#pragma mark Pipeline
+
+private:
+ Result<id<MTLFunction>> _create_function(MDLibrary *p_library, NSString *p_name, VectorView<PipelineSpecializationConstant> &p_specialization_constants);
+
+public:
+ virtual void pipeline_free(PipelineID p_pipeline_id) override final;
+
+ // ----- BINDING -----
+
+ virtual void command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_first_index, VectorView<uint32_t> p_data) override final;
+
+ // ----- CACHE -----
+private:
+ String _pipeline_get_cache_path() const;
+
+public:
+ virtual bool pipeline_cache_create(const Vector<uint8_t> &p_data) override final;
+ virtual void pipeline_cache_free() override final;
+ virtual size_t pipeline_cache_query_size() override final;
+ virtual Vector<uint8_t> pipeline_cache_serialize() override final;
+
+#pragma mark Rendering
+
+ // ----- SUBPASS -----
+
+ virtual RenderPassID render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count) override final;
+ virtual void render_pass_free(RenderPassID p_render_pass) override final;
+
+ // ----- COMMANDS -----
+
+public:
+ virtual void command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) override final;
+ virtual void command_end_render_pass(CommandBufferID p_cmd_buffer) override final;
+ virtual void command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) override final;
+ virtual void command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) override final;
+ virtual void command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) override final;
+ virtual void command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) override final;
+
+ // Binding.
+ virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
+ virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
+
+ // Drawing.
+ virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final;
+ virtual void command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) override final;
+ virtual void command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final;
+ virtual void command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final;
+ virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final;
+ virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final;
+
+ // Buffer binding.
+ virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final;
+ virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final;
+
+ // Dynamic state.
+ virtual void command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) override final;
+ virtual void command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) override final;
+
+ // ----- PIPELINE -----
+
+ virtual PipelineID render_pipeline_create(
+ ShaderID p_shader,
+ VertexFormatID p_vertex_format,
+ RenderPrimitive p_render_primitive,
+ PipelineRasterizationState p_rasterization_state,
+ PipelineMultisampleState p_multisample_state,
+ PipelineDepthStencilState p_depth_stencil_state,
+ PipelineColorBlendState p_blend_state,
+ VectorView<int32_t> p_color_attachments,
+ BitField<PipelineDynamicStateFlags> p_dynamic_state,
+ RenderPassID p_render_pass,
+ uint32_t p_render_subpass,
+ VectorView<PipelineSpecializationConstant> p_specialization_constants) override final;
+
+#pragma mark - Compute
+
+ // ----- COMMANDS -----
+
+ // Binding.
+ virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
+ virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
+
+ // Dispatching.
+ virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final;
+ virtual void command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) override final;
+
+ // ----- PIPELINE -----
+
+ virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) override final;
+
+#pragma mark - Queries
+
+ // ----- TIMESTAMP -----
+
+ // Basic.
+ virtual QueryPoolID timestamp_query_pool_create(uint32_t p_query_count) override final;
+ virtual void timestamp_query_pool_free(QueryPoolID p_pool_id) override final;
+ virtual void timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) override final;
+ virtual uint64_t timestamp_query_result_to_time(uint64_t p_result) override final;
+
+ // Commands.
+ virtual void command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) override final;
+ virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) override final;
+
+#pragma mark - Labels
+
+ virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final;
+ virtual void command_end_label(CommandBufferID p_cmd_buffer) override final;
+
+#pragma mark - Debug
+
+ virtual void command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) override final;
+
+#pragma mark - Submission
+
+ virtual void begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) override final;
+ virtual void end_segment() override final;
+
+#pragma mark - Miscellaneous
+
+ virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final;
+ virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final;
+ virtual uint64_t get_total_memory_used() override final;
+ virtual uint64_t limit_get(Limit p_limit) override final;
+ virtual uint64_t api_trait_get(ApiTrait p_trait) override final;
+ virtual bool has_feature(Features p_feature) override final;
+ virtual const MultiviewCapabilities &get_multiview_capabilities() override final;
+ virtual String get_api_name() const override final { return "Metal"; };
+ virtual String get_api_version() const override final;
+ virtual String get_pipeline_cache_uuid() const override final;
+ virtual const Capabilities &get_capabilities() const override final;
+ virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final;
+
+ // Metal-specific.
+ id<MTLDevice> get_device() const { return device; }
+ PixelFormats &get_pixel_formats() const { return *pixel_formats; }
+ MDResourceCache &get_resource_cache() const { return *resource_cache; }
+ MetalDeviceProperties const &get_device_properties() const { return *metal_device_properties; }
+
+ _FORCE_INLINE_ uint32_t get_metal_buffer_index_for_vertex_attribute_binding(uint32_t p_binding) {
+ return (metal_device_properties->limits.maxPerStageBufferCount - 1) - p_binding;
+ }
+
+ size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const;
+ size_t get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const;
+
+ /******************/
+ RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver);
+ ~RenderingDeviceDriverMetal();
+};
+
+#endif // RENDERING_DEVICE_DRIVER_METAL_H
diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm
new file mode 100644
index 0000000000..9d691a0d23
--- /dev/null
+++ b/drivers/metal/rendering_device_driver_metal.mm
@@ -0,0 +1,3965 @@
+/**************************************************************************/
+/* rendering_device_driver_metal.mm */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+/**************************************************************************/
+/* */
+/* Portions of this code were derived from MoltenVK. */
+/* */
+/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
+/* (http://www.brenwill.com) */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/**************************************************************************/
+
+#import "rendering_device_driver_metal.h"
+
+#import "pixel_formats.h"
+#import "rendering_context_driver_metal.h"
+
+#import "core/io/compression.h"
+#import "core/io/marshalls.h"
+#import "core/string/ustring.h"
+#import "core/templates/hash_map.h"
+
+#import <Metal/MTLTexture.h>
+#import <Metal/Metal.h>
+#import <os/log.h>
+#import <os/signpost.h>
+#import <spirv_msl.hpp>
+#import <spirv_parser.hpp>
+
+#pragma mark - Logging
+
+os_log_t LOG_DRIVER;
+// Used for dynamic tracing.
+os_log_t LOG_INTERVALS;
+
+__attribute__((constructor)) static void InitializeLogging(void) {
+ LOG_DRIVER = os_log_create("org.godotengine.godot.metal", OS_LOG_CATEGORY_POINTS_OF_INTEREST);
+ LOG_INTERVALS = os_log_create("org.godotengine.godot.metal", "events");
+}
+
+/*****************/
+/**** GENERIC ****/
+/*****************/
+
+// RDD::CompareOperator == VkCompareOp.
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, MTLCompareFunctionNever));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, MTLCompareFunctionLess));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_EQUAL, MTLCompareFunctionEqual));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS_OR_EQUAL, MTLCompareFunctionLessEqual));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER, MTLCompareFunctionGreater));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, MTLCompareFunctionNotEqual));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTLCompareFunctionGreaterEqual));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTLCompareFunctionAlways));
+
+_FORCE_INLINE_ MTLSize mipmapLevelSizeFromTexture(id<MTLTexture> p_tex, NSUInteger p_level) {
+ MTLSize lvlSize;
+ lvlSize.width = MAX(p_tex.width >> p_level, 1UL);
+ lvlSize.height = MAX(p_tex.height >> p_level, 1UL);
+ lvlSize.depth = MAX(p_tex.depth >> p_level, 1UL);
+ return lvlSize;
+}
+
+_FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_level) {
+ if (p_level == 0) {
+ return p_size;
+ }
+
+ MTLSize lvlSize;
+ lvlSize.width = MAX(p_size.width >> p_level, 1UL);
+ lvlSize.height = MAX(p_size.height >> p_level, 1UL);
+ lvlSize.depth = MAX(p_size.depth >> p_level, 1UL);
+ return lvlSize;
+}
+
+_FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) {
+ return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth;
+}
+
+/*****************/
+/**** BUFFERS ****/
+/*****************/
+
+RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) {
+ MTLResourceOptions options = MTLResourceHazardTrackingModeTracked;
+ switch (p_allocation_type) {
+ case MEMORY_ALLOCATION_TYPE_CPU:
+ options |= MTLResourceStorageModeShared;
+ break;
+ case MEMORY_ALLOCATION_TYPE_GPU:
+ options |= MTLResourceStorageModePrivate;
+ break;
+ }
+
+ id<MTLBuffer> obj = [device newBufferWithLength:p_size options:options];
+ ERR_FAIL_NULL_V_MSG(obj, BufferID(), "Can't create buffer of size: " + itos(p_size));
+ return rid::make(obj);
+}
+
+bool RenderingDeviceDriverMetal::buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) {
+ // Nothing to do.
+ return true;
+}
+
+void RenderingDeviceDriverMetal::buffer_free(BufferID p_buffer) {
+ rid::release(p_buffer);
+}
+
+uint64_t RenderingDeviceDriverMetal::buffer_get_allocation_size(BufferID p_buffer) {
+ id<MTLBuffer> obj = rid::get(p_buffer);
+ return obj.allocatedSize;
+}
+
+uint8_t *RenderingDeviceDriverMetal::buffer_map(BufferID p_buffer) {
+ id<MTLBuffer> obj = rid::get(p_buffer);
+ ERR_FAIL_COND_V_MSG(obj.storageMode != MTLStorageModeShared, nullptr, "Unable to map private buffers");
+ return (uint8_t *)obj.contents;
+}
+
+void RenderingDeviceDriverMetal::buffer_unmap(BufferID p_buffer) {
+ // Nothing to do.
+}
+
+#pragma mark - Texture
+
+#pragma mark - Format Conversions
+
+static const MTLTextureType TEXTURE_TYPE[RD::TEXTURE_TYPE_MAX] = {
+ MTLTextureType1D,
+ MTLTextureType2D,
+ MTLTextureType3D,
+ MTLTextureTypeCube,
+ MTLTextureType1DArray,
+ MTLTextureType2DArray,
+ MTLTextureTypeCubeArray,
+};
+
+RenderingDeviceDriverMetal::Result<bool> RenderingDeviceDriverMetal::is_valid_linear(TextureFormat const &p_format) const {
+ if (!flags::any(p_format.usage_bits, TEXTURE_USAGE_CPU_READ_BIT)) {
+ return false;
+ }
+
+ PixelFormats &pf = *pixel_formats;
+ MTLFormatType ft = pf.getFormatType(p_format.format);
+
+ // Requesting a linear format, which has further restrictions, similar to Vulkan
+ // when specifying VK_IMAGE_TILING_LINEAR.
+
+ ERR_FAIL_COND_V_MSG(p_format.texture_type != TEXTURE_TYPE_2D, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must be 2D");
+ ERR_FAIL_COND_V_MSG(ft != MTLFormatType::DepthStencil, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must not be a depth/stencil format");
+ ERR_FAIL_COND_V_MSG(ft != MTLFormatType::Compressed, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must not be a compressed format");
+ ERR_FAIL_COND_V_MSG(p_format.mipmaps != 1, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must have 1 mipmap level");
+ ERR_FAIL_COND_V_MSG(p_format.array_layers != 1, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must have 1 array layer");
+ ERR_FAIL_COND_V_MSG(p_format.samples != TEXTURE_SAMPLES_1, ERR_CANT_CREATE, "Linear (TEXTURE_USAGE_CPU_READ_BIT) textures must have 1 sample");
+
+ return true;
+}
+
+RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p_format, const TextureView &p_view) {
+ MTLTextureDescriptor *desc = [MTLTextureDescriptor new];
+ desc.textureType = TEXTURE_TYPE[p_format.texture_type];
+
+ PixelFormats &formats = *pixel_formats;
+ desc.pixelFormat = formats.getMTLPixelFormat(p_format.format);
+ MTLFmtCaps format_caps = formats.getCapabilities(desc.pixelFormat);
+
+ desc.width = p_format.width;
+ desc.height = p_format.height;
+ desc.depth = p_format.depth;
+ desc.mipmapLevelCount = p_format.mipmaps;
+
+ if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY ||
+ p_format.texture_type == TEXTURE_TYPE_2D_ARRAY) {
+ desc.arrayLength = p_format.array_layers;
+ } else if (p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) {
+ desc.arrayLength = p_format.array_layers / 6;
+ }
+
+ // TODO(sgc): Evaluate lossy texture support (perhaps as a project option?)
+ // https://developer.apple.com/videos/play/tech-talks/10876?time=459
+ // desc.compressionType = MTLTextureCompressionTypeLossy;
+
+ if (p_format.samples > TEXTURE_SAMPLES_1) {
+ SampleCount supported = (*metal_device_properties).find_nearest_supported_sample_count(p_format.samples);
+
+ if (supported > SampleCount1) {
+ bool ok = p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY;
+ if (ok) {
+ switch (p_format.texture_type) {
+ case TEXTURE_TYPE_2D:
+ desc.textureType = MTLTextureType2DMultisample;
+ break;
+ case TEXTURE_TYPE_2D_ARRAY:
+ desc.textureType = MTLTextureType2DMultisampleArray;
+ break;
+ default:
+ break;
+ }
+ desc.sampleCount = (NSUInteger)supported;
+ if (p_format.mipmaps > 1) {
+ // For a buffer-backed or multi-sample texture, the value must be 1.
+ WARN_PRINT("mipmaps == 1 for multi-sample textures");
+ desc.mipmapLevelCount = 1;
+ }
+ } else {
+ WARN_PRINT("Unsupported multi-sample texture type; disabling multi-sample");
+ }
+ }
+ }
+
+ static const MTLTextureSwizzle COMPONENT_SWIZZLE[TEXTURE_SWIZZLE_MAX] = {
+ static_cast<MTLTextureSwizzle>(255), // IDENTITY
+ MTLTextureSwizzleZero,
+ MTLTextureSwizzleOne,
+ MTLTextureSwizzleRed,
+ MTLTextureSwizzleGreen,
+ MTLTextureSwizzleBlue,
+ MTLTextureSwizzleAlpha,
+ };
+
+ MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake(
+ p_view.swizzle_r != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_r] : MTLTextureSwizzleRed,
+ p_view.swizzle_g != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_g] : MTLTextureSwizzleGreen,
+ p_view.swizzle_b != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_b] : MTLTextureSwizzleBlue,
+ p_view.swizzle_a != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_a] : MTLTextureSwizzleAlpha);
+
+ // Represents a swizzle operation that is a no-op.
+ static MTLTextureSwizzleChannels IDENTITY_SWIZZLE = {
+ .red = MTLTextureSwizzleRed,
+ .green = MTLTextureSwizzleGreen,
+ .blue = MTLTextureSwizzleBlue,
+ .alpha = MTLTextureSwizzleAlpha,
+ };
+
+ bool no_swizzle = memcmp(&IDENTITY_SWIZZLE, &swizzle, sizeof(MTLTextureSwizzleChannels)) == 0;
+ if (!no_swizzle) {
+ desc.swizzle = swizzle;
+ }
+
+ // Usage.
+ MTLResourceOptions options = MTLResourceCPUCacheModeDefaultCache | MTLResourceHazardTrackingModeTracked;
+ if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) {
+ options |= MTLResourceStorageModeShared;
+ } else {
+ options |= MTLResourceStorageModePrivate;
+ }
+ desc.resourceOptions = options;
+
+ if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
+ desc.usage |= MTLTextureUsageShaderRead;
+ }
+
+ if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
+ desc.usage |= MTLTextureUsageShaderWrite;
+ }
+
+ if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {
+ desc.usage |= MTLTextureUsageShaderWrite;
+ }
+
+ bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));
+
+ if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
+ can_be_attachment) {
+ desc.usage |= MTLTextureUsageRenderTarget;
+ }
+
+ if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) {
+ desc.usage |= MTLTextureUsageShaderRead;
+ }
+
+ if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
+ ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");
+ }
+
+ if (flags::any(p_format.usage_bits, TEXTURE_USAGE_CAN_UPDATE_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT) &&
+ can_be_attachment && no_swizzle) {
+ // Per MoltenVK, can be cleared as a render attachment.
+ desc.usage |= MTLTextureUsageRenderTarget;
+ }
+ if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT) {
+ // Covered by blits.
+ }
+
+ // Create texture views with a different component layout.
+ if (!p_format.shareable_formats.is_empty()) {
+ desc.usage |= MTLTextureUsagePixelFormatView;
+ }
+
+ // Allocate memory.
+
+ bool is_linear;
+ {
+ Result<bool> is_linear_or_err = is_valid_linear(p_format);
+ ERR_FAIL_COND_V(std::holds_alternative<Error>(is_linear_or_err), TextureID());
+ is_linear = std::get<bool>(is_linear_or_err);
+ }
+
+ // Check if it is a linear format for atomic operations and therefore needs a buffer,
+ // as generally Metal does not support atomic operations on textures.
+ bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT));
+
+ id<MTLTexture> obj = nil;
+ if (needs_buffer) {
+ // Linear textures are restricted to 2D textures, a single mipmap level and a single array layer.
+ MTLPixelFormat pixel_format = desc.pixelFormat;
+ size_t row_alignment = get_texel_buffer_alignment_for_format(p_format.format);
+ size_t bytes_per_row = formats.getBytesPerRow(pixel_format, p_format.width);
+ bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment);
+ size_t bytes_per_layer = formats.getBytesPerLayer(pixel_format, bytes_per_row, p_format.height);
+ size_t byte_count = bytes_per_layer * p_format.depth * p_format.array_layers;
+
+ id<MTLBuffer> buf = [device newBufferWithLength:byte_count options:options];
+ obj = [buf newTextureWithDescriptor:desc offset:0 bytesPerRow:bytes_per_row];
+ } else {
+ obj = [device newTextureWithDescriptor:desc];
+ }
+ ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create texture.");
+
+ return rid::make(obj);
+}
+
+RDD::TextureID RenderingDeviceDriverMetal::texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil) {
+ ERR_FAIL_V_MSG(RDD::TextureID(), "not implemented");
+}
+
+RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) {
+ id<MTLTexture> src_texture = rid::get(p_original_texture);
+
+#if DEV_ENABLED
+ if (src_texture.sampleCount > 1) {
+ // TODO(sgc): is it ok to create a shared texture from a multi-sample texture?
+ WARN_PRINT("Is it safe to create a shared texture from multi-sample texture?");
+ }
+#endif
+
+ MTLPixelFormat format = pixel_formats->getMTLPixelFormat(p_view.format);
+
+ static const MTLTextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = {
+ static_cast<MTLTextureSwizzle>(255), // IDENTITY
+ MTLTextureSwizzleZero,
+ MTLTextureSwizzleOne,
+ MTLTextureSwizzleRed,
+ MTLTextureSwizzleGreen,
+ MTLTextureSwizzleBlue,
+ MTLTextureSwizzleAlpha,
+ };
+
+#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTLTextureSwizzle##CHAN)
+ MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake(
+ SWIZZLE(r, Red),
+ SWIZZLE(g, Green),
+ SWIZZLE(b, Blue),
+ SWIZZLE(a, Alpha));
+#undef SWIZZLE
+ id<MTLTexture> obj = [src_texture newTextureViewWithPixelFormat:format
+ textureType:src_texture.textureType
+ levels:NSMakeRange(0, src_texture.mipmapLevelCount)
+ slices:NSMakeRange(0, src_texture.arrayLength)
+ swizzle:swizzle];
+ ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture");
+ return rid::make(obj);
+}
+
+RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) {
+ id<MTLTexture> src_texture = rid::get(p_original_texture);
+
+ static const MTLTextureType VIEW_TYPES[] = {
+ MTLTextureType1D, // MTLTextureType1D
+ MTLTextureType1D, // MTLTextureType1DArray
+ MTLTextureType2D, // MTLTextureType2D
+ MTLTextureType2D, // MTLTextureType2DArray
+ MTLTextureType2D, // MTLTextureType2DMultisample
+ MTLTextureType2D, // MTLTextureTypeCube
+ MTLTextureType2D, // MTLTextureTypeCubeArray
+ MTLTextureType2D, // MTLTextureType3D
+ MTLTextureType2D, // MTLTextureType2DMultisampleArray
+ };
+
+ MTLTextureType textureType = VIEW_TYPES[src_texture.textureType];
+ switch (p_slice_type) {
+ case TEXTURE_SLICE_2D: {
+ textureType = MTLTextureType2D;
+ } break;
+ case TEXTURE_SLICE_3D: {
+ textureType = MTLTextureType3D;
+ } break;
+ case TEXTURE_SLICE_CUBEMAP: {
+ textureType = MTLTextureTypeCube;
+ } break;
+ case TEXTURE_SLICE_2D_ARRAY: {
+ textureType = MTLTextureType2DArray;
+ } break;
+ case TEXTURE_SLICE_MAX: {
+ ERR_FAIL_V_MSG(TextureID(), "Invalid texture slice type");
+ } break;
+ }
+
+ MTLPixelFormat format = pixel_formats->getMTLPixelFormat(p_view.format);
+
+ static const MTLTextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = {
+ static_cast<MTLTextureSwizzle>(255), // IDENTITY
+ MTLTextureSwizzleZero,
+ MTLTextureSwizzleOne,
+ MTLTextureSwizzleRed,
+ MTLTextureSwizzleGreen,
+ MTLTextureSwizzleBlue,
+ MTLTextureSwizzleAlpha,
+ };
+
+#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTLTextureSwizzle##CHAN)
+ MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake(
+ SWIZZLE(r, Red),
+ SWIZZLE(g, Green),
+ SWIZZLE(b, Blue),
+ SWIZZLE(a, Alpha));
+#undef SWIZZLE
+ id<MTLTexture> obj = [src_texture newTextureViewWithPixelFormat:format
+ textureType:textureType
+ levels:NSMakeRange(p_mipmap, p_mipmaps)
+ slices:NSMakeRange(p_layer, p_layers)
+ swizzle:swizzle];
+ ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture");
+ return rid::make(obj);
+}
+
+void RenderingDeviceDriverMetal::texture_free(TextureID p_texture) {
+ rid::release(p_texture);
+}
+
+uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_texture) {
+ id<MTLTexture> obj = rid::get(p_texture);
+ return obj.allocatedSize;
+}
+
+void RenderingDeviceDriverMetal::_get_sub_resource(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) const {
+ id<MTLTexture> obj = rid::get(p_texture);
+
+ *r_layout = {};
+
+ PixelFormats &pf = *pixel_formats;
+
+ size_t row_alignment = get_texel_buffer_alignment_for_format(obj.pixelFormat);
+ size_t offset = 0;
+ size_t array_layers = obj.arrayLength;
+ MTLSize size = MTLSizeMake(obj.width, obj.height, obj.depth);
+ MTLPixelFormat pixel_format = obj.pixelFormat;
+
+ // First skip over the mipmap levels.
+ for (uint32_t mipLvl = 0; mipLvl < p_subresource.mipmap; mipLvl++) {
+ MTLSize mip_size = mipmapLevelSizeFromSize(size, mipLvl);
+ size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mip_size.width);
+ bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment);
+ size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mip_size.height);
+ offset += bytes_per_layer * mip_size.depth * array_layers;
+ }
+
+ // Get current mipmap.
+ MTLSize mip_size = mipmapLevelSizeFromSize(size, p_subresource.mipmap);
+ size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mip_size.width);
+ bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment);
+ size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mip_size.height);
+ r_layout->size = bytes_per_layer * mip_size.depth;
+ r_layout->offset = offset + (r_layout->size * p_subresource.layer - 1);
+ r_layout->depth_pitch = bytes_per_layer;
+ r_layout->row_pitch = bytes_per_row;
+ r_layout->layer_pitch = r_layout->size * array_layers;
+}
+
+void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) {
+ id<MTLTexture> obj = rid::get(p_texture);
+ *r_layout = {};
+
+ if ((obj.resourceOptions & MTLResourceStorageModePrivate) != 0) {
+ MTLSize sz = MTLSizeMake(obj.width, obj.height, obj.depth);
+
+ PixelFormats &pf = *pixel_formats;
+ DataFormat format = pf.getDataFormat(obj.pixelFormat);
+ if (p_subresource.mipmap > 0) {
+ r_layout->offset = get_image_format_required_size(format, sz.width, sz.height, sz.depth, p_subresource.mipmap);
+ }
+
+ sz = mipmapLevelSizeFromSize(sz, p_subresource.mipmap);
+
+ uint32_t bw = 0, bh = 0;
+ get_compressed_image_format_block_dimensions(format, bw, bh);
+ uint32_t sbw = 0, sbh = 0;
+ r_layout->size = get_image_format_required_size(format, sz.width, sz.height, sz.depth, 1, &sbw, &sbh);
+ r_layout->row_pitch = r_layout->size / ((sbh / bh) * sz.depth);
+ r_layout->depth_pitch = r_layout->size / sz.depth;
+ r_layout->layer_pitch = r_layout->size / obj.arrayLength;
+ } else {
+ CRASH_NOW_MSG("need to calculate layout for shared texture");
+ }
+}
+
+uint8_t *RenderingDeviceDriverMetal::texture_map(TextureID p_texture, const TextureSubresource &p_subresource) {
+ id<MTLTexture> obj = rid::get(p_texture);
+ ERR_FAIL_NULL_V_MSG(obj.buffer, nullptr, "texture is not created from a buffer");
+
+ TextureCopyableLayout layout;
+ _get_sub_resource(p_texture, p_subresource, &layout);
+ return (uint8_t *)(obj.buffer.contents) + layout.offset;
+ PixelFormats &pf = *pixel_formats;
+
+ size_t row_alignment = get_texel_buffer_alignment_for_format(obj.pixelFormat);
+ size_t offset = 0;
+ size_t array_layers = obj.arrayLength;
+ MTLSize size = MTLSizeMake(obj.width, obj.height, obj.depth);
+ MTLPixelFormat pixel_format = obj.pixelFormat;
+
+ // First skip over the mipmap levels.
+ for (uint32_t mipLvl = 0; mipLvl < p_subresource.mipmap; mipLvl++) {
+ MTLSize mipExtent = mipmapLevelSizeFromSize(size, mipLvl);
+ size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mipExtent.width);
+ bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment);
+ size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mipExtent.height);
+ offset += bytes_per_layer * mipExtent.depth * array_layers;
+ }
+
+ if (p_subresource.layer > 1) {
+ // Calculate offset to desired layer.
+ MTLSize mipExtent = mipmapLevelSizeFromSize(size, p_subresource.mipmap);
+ size_t bytes_per_row = pf.getBytesPerRow(pixel_format, mipExtent.width);
+ bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment);
+ size_t bytes_per_layer = pf.getBytesPerLayer(pixel_format, bytes_per_row, mipExtent.height);
+ offset += bytes_per_layer * mipExtent.depth * (p_subresource.layer - 1);
+ }
+
+ // TODO: Confirm with rendering team that there is no other way Godot may attempt to map a texture with multiple mipmaps or array layers.
+
+ // NOTE: It is not possible to create a buffer-backed texture with mipmaps or array layers,
+ // as noted in the is_valid_linear function, so the offset calculation SHOULD always be zero.
+ // Given that, this code should be simplified.
+
+ return (uint8_t *)(obj.buffer.contents) + offset;
+}
+
+void RenderingDeviceDriverMetal::texture_unmap(TextureID p_texture) {
+ // Nothing to do.
+}
+
+BitField<RDD::TextureUsageBits> RenderingDeviceDriverMetal::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) {
+ PixelFormats &pf = *pixel_formats;
+ if (pf.getMTLPixelFormat(p_format) == MTLPixelFormatInvalid) {
+ return 0;
+ }
+
+ MTLFmtCaps caps = pf.getCapabilities(p_format);
+
+ // Everything supported by default makes an all-or-nothing check easier for the caller.
+ BitField<RDD::TextureUsageBits> supported = INT64_MAX;
+ supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT); // No VRS support for Metal.
+
+ if (!flags::any(caps, kMTLFmtCapsColorAtt)) {
+ supported.clear_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT);
+ }
+ if (!flags::any(caps, kMTLFmtCapsDSAtt)) {
+ supported.clear_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
+ }
+ if (!flags::any(caps, kMTLFmtCapsRead)) {
+ supported.clear_flag(TEXTURE_USAGE_SAMPLING_BIT);
+ }
+ if (!flags::any(caps, kMTLFmtCapsAtomic)) {
+ supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT);
+ }
+
+ return supported;
+}
+
+bool RenderingDeviceDriverMetal::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {
+ r_raw_reinterpretation = false;
+ return true;
+}
+
+#pragma mark - Sampler
+
+static const MTLCompareFunction COMPARE_OPERATORS[RD::COMPARE_OP_MAX] = {
+ MTLCompareFunctionNever,
+ MTLCompareFunctionLess,
+ MTLCompareFunctionEqual,
+ MTLCompareFunctionLessEqual,
+ MTLCompareFunctionGreater,
+ MTLCompareFunctionNotEqual,
+ MTLCompareFunctionGreaterEqual,
+ MTLCompareFunctionAlways,
+};
+
+static const MTLStencilOperation STENCIL_OPERATIONS[RD::STENCIL_OP_MAX] = {
+ MTLStencilOperationKeep,
+ MTLStencilOperationZero,
+ MTLStencilOperationReplace,
+ MTLStencilOperationIncrementClamp,
+ MTLStencilOperationDecrementClamp,
+ MTLStencilOperationInvert,
+ MTLStencilOperationIncrementWrap,
+ MTLStencilOperationDecrementWrap,
+};
+
+static const MTLBlendFactor BLEND_FACTORS[RD::BLEND_FACTOR_MAX] = {
+ MTLBlendFactorZero,
+ MTLBlendFactorOne,
+ MTLBlendFactorSourceColor,
+ MTLBlendFactorOneMinusSourceColor,
+ MTLBlendFactorDestinationColor,
+ MTLBlendFactorOneMinusDestinationColor,
+ MTLBlendFactorSourceAlpha,
+ MTLBlendFactorOneMinusSourceAlpha,
+ MTLBlendFactorDestinationAlpha,
+ MTLBlendFactorOneMinusDestinationAlpha,
+ MTLBlendFactorBlendColor,
+ MTLBlendFactorOneMinusBlendColor,
+ MTLBlendFactorBlendAlpha,
+ MTLBlendFactorOneMinusBlendAlpha,
+ MTLBlendFactorSourceAlphaSaturated,
+ MTLBlendFactorSource1Color,
+ MTLBlendFactorOneMinusSource1Color,
+ MTLBlendFactorSource1Alpha,
+ MTLBlendFactorOneMinusSource1Alpha,
+};
+static const MTLBlendOperation BLEND_OPERATIONS[RD::BLEND_OP_MAX] = {
+ MTLBlendOperationAdd,
+ MTLBlendOperationSubtract,
+ MTLBlendOperationReverseSubtract,
+ MTLBlendOperationMin,
+ MTLBlendOperationMax,
+};
+
+static const API_AVAILABLE(macos(11.0), ios(14.0)) MTLSamplerAddressMode ADDRESS_MODES[RD::SAMPLER_REPEAT_MODE_MAX] = {
+ MTLSamplerAddressModeRepeat,
+ MTLSamplerAddressModeMirrorRepeat,
+ MTLSamplerAddressModeClampToEdge,
+ MTLSamplerAddressModeClampToBorderColor,
+ MTLSamplerAddressModeMirrorClampToEdge,
+};
+
+static const API_AVAILABLE(macos(11.0), ios(14.0)) MTLSamplerBorderColor SAMPLER_BORDER_COLORS[RD::SAMPLER_BORDER_COLOR_MAX] = {
+ MTLSamplerBorderColorTransparentBlack,
+ MTLSamplerBorderColorTransparentBlack,
+ MTLSamplerBorderColorOpaqueBlack,
+ MTLSamplerBorderColorOpaqueBlack,
+ MTLSamplerBorderColorOpaqueWhite,
+ MTLSamplerBorderColorOpaqueWhite,
+};
+
+RDD::SamplerID RenderingDeviceDriverMetal::sampler_create(const SamplerState &p_state) {
+ MTLSamplerDescriptor *desc = [MTLSamplerDescriptor new];
+ desc.supportArgumentBuffers = YES;
+
+ desc.magFilter = p_state.mag_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest;
+ desc.minFilter = p_state.min_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest;
+ desc.mipFilter = p_state.mip_filter == SAMPLER_FILTER_LINEAR ? MTLSamplerMipFilterLinear : MTLSamplerMipFilterNearest;
+
+ desc.sAddressMode = ADDRESS_MODES[p_state.repeat_u];
+ desc.tAddressMode = ADDRESS_MODES[p_state.repeat_v];
+ desc.rAddressMode = ADDRESS_MODES[p_state.repeat_w];
+
+ if (p_state.use_anisotropy) {
+ desc.maxAnisotropy = p_state.anisotropy_max;
+ }
+
+ desc.compareFunction = COMPARE_OPERATORS[p_state.compare_op];
+
+ desc.lodMinClamp = p_state.min_lod;
+ desc.lodMaxClamp = p_state.max_lod;
+
+ desc.borderColor = SAMPLER_BORDER_COLORS[p_state.border_color];
+
+ desc.normalizedCoordinates = !p_state.unnormalized_uvw;
+
+ if (p_state.lod_bias != 0.0) {
+ WARN_VERBOSE("Metal does not support LOD bias for samplers.");
+ }
+
+ id<MTLSamplerState> obj = [device newSamplerStateWithDescriptor:desc];
+ ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerStateWithDescriptor failed");
+ return rid::make(obj);
+}
+
+void RenderingDeviceDriverMetal::sampler_free(SamplerID p_sampler) {
+ rid::release(p_sampler);
+}
+
+bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) {
+ switch (p_filter) {
+ case SAMPLER_FILTER_NEAREST:
+ return true;
+ case SAMPLER_FILTER_LINEAR: {
+ MTLFmtCaps caps = pixel_formats->getCapabilities(p_format);
+ return flags::any(caps, kMTLFmtCapsFilter);
+ }
+ }
+}
+
+#pragma mark - Vertex Array
+
+RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) {
+ MTLVertexDescriptor *desc = MTLVertexDescriptor.vertexDescriptor;
+
+ for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) {
+ VertexAttribute const &vf = p_vertex_attribs[i];
+
+ ERR_FAIL_COND_V_MSG(get_format_vertex_size(vf.format) == 0, VertexFormatID(),
+ "Data format for attachment (" + itos(i) + "), '" + FORMAT_NAMES[vf.format] + "', is not valid for a vertex array.");
+
+ desc.attributes[vf.location].format = pixel_formats->getMTLVertexFormat(vf.format);
+ desc.attributes[vf.location].offset = vf.offset;
+ uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(i);
+ desc.attributes[vf.location].bufferIndex = idx;
+ if (vf.stride == 0) {
+ desc.layouts[idx].stepFunction = MTLVertexStepFunctionConstant;
+ desc.layouts[idx].stepRate = 0;
+ desc.layouts[idx].stride = pixel_formats->getBytesPerBlock(vf.format);
+ } else {
+ desc.layouts[idx].stepFunction = vf.frequency == VERTEX_FREQUENCY_VERTEX ? MTLVertexStepFunctionPerVertex : MTLVertexStepFunctionPerInstance;
+ desc.layouts[idx].stepRate = 1;
+ desc.layouts[idx].stride = vf.stride;
+ }
+ }
+
+ return rid::make(desc);
+}
+
+void RenderingDeviceDriverMetal::vertex_format_free(VertexFormatID p_vertex_format) {
+ rid::release(p_vertex_format);
+}
+
+#pragma mark - Barriers
+
+void RenderingDeviceDriverMetal::command_pipeline_barrier(
+ CommandBufferID p_cmd_buffer,
+ BitField<PipelineStageBits> p_src_stages,
+ BitField<PipelineStageBits> p_dst_stages,
+ VectorView<MemoryBarrier> p_memory_barriers,
+ VectorView<BufferBarrier> p_buffer_barriers,
+ VectorView<TextureBarrier> p_texture_barriers) {
+ WARN_PRINT_ONCE("not implemented");
+}
+
+#pragma mark - Fences
+
+RDD::FenceID RenderingDeviceDriverMetal::fence_create() {
+ Fence *fence = memnew(Fence);
+ return FenceID(fence);
+}
+
+Error RenderingDeviceDriverMetal::fence_wait(FenceID p_fence) {
+ Fence *fence = (Fence *)(p_fence.id);
+
+ // Wait forever, so this function is infallible.
+ dispatch_semaphore_wait(fence->semaphore, DISPATCH_TIME_FOREVER);
+
+ return OK;
+}
+
+void RenderingDeviceDriverMetal::fence_free(FenceID p_fence) {
+ Fence *fence = (Fence *)(p_fence.id);
+ memdelete(fence);
+}
+
+#pragma mark - Semaphores
+
+RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create() {
+ // Metal doesn't use semaphores, as their purpose within Godot is to ensure ordering of command buffer execution.
+ return SemaphoreID(1);
+}
+
+void RenderingDeviceDriverMetal::semaphore_free(SemaphoreID p_semaphore) {
+}
+
+#pragma mark - Queues
+
+RDD::CommandQueueFamilyID RenderingDeviceDriverMetal::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) {
+ if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT) || (p_surface != 0)) {
+ return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT);
+ } else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_COMPUTE_BIT)) {
+ return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_COMPUTE_BIT);
+ } else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_TRANSFER_BIT)) {
+ return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_TRANSFER_BIT);
+ } else {
+ return CommandQueueFamilyID();
+ }
+}
+
+RDD::CommandQueueID RenderingDeviceDriverMetal::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) {
+ return CommandQueueID(1);
+}
+
+Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID>, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID>, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) {
+ uint32_t size = p_cmd_buffers.size();
+ if (size == 0) {
+ return OK;
+ }
+
+ for (uint32_t i = 0; i < size - 1; i++) {
+ MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[i].id);
+ cmd_buffer->commit();
+ }
+
+ // The last command buffer will signal the fence and semaphores.
+ MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id);
+ Fence *fence = (Fence *)(p_cmd_fence.id);
+ if (fence != nullptr) {
+ [cmd_buffer->get_command_buffer() addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
+ dispatch_semaphore_signal(fence->semaphore);
+ }];
+ }
+
+ for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
+ SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
+ RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);
+ metal_surface->present(cmd_buffer);
+ }
+
+ cmd_buffer->commit();
+
+ if (p_swap_chains.size() > 0) {
+ // Used as a signal that we're presenting, so this is the end of a frame.
+ [device_scope endScope];
+ [device_scope beginScope];
+ }
+
+ return OK;
+}
+
+void RenderingDeviceDriverMetal::command_queue_free(CommandQueueID p_cmd_queue) {
+}
+
+#pragma mark - Command Buffers
+
+// ----- POOL -----
+
+RDD::CommandPoolID RenderingDeviceDriverMetal::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) {
+ DEV_ASSERT(p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY);
+ return rid::make(device_queue);
+}
+
+void RenderingDeviceDriverMetal::command_pool_free(CommandPoolID p_cmd_pool) {
+ rid::release(p_cmd_pool);
+}
+
+// ----- BUFFER -----
+
+RDD::CommandBufferID RenderingDeviceDriverMetal::command_buffer_create(CommandPoolID p_cmd_pool) {
+ id<MTLCommandQueue> queue = rid::get(p_cmd_pool);
+ MDCommandBuffer *obj = new MDCommandBuffer(queue, this);
+ command_buffers.push_back(obj);
+ return CommandBufferID(obj);
+}
+
+bool RenderingDeviceDriverMetal::command_buffer_begin(CommandBufferID p_cmd_buffer) {
+ MDCommandBuffer *obj = (MDCommandBuffer *)(p_cmd_buffer.id);
+ obj->begin();
+ return true;
+}
+
+bool RenderingDeviceDriverMetal::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) {
+ ERR_FAIL_V_MSG(false, "not implemented");
+}
+
+void RenderingDeviceDriverMetal::command_buffer_end(CommandBufferID p_cmd_buffer) {
+ MDCommandBuffer *obj = (MDCommandBuffer *)(p_cmd_buffer.id);
+ obj->end();
+}
+
+void RenderingDeviceDriverMetal::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) {
+ ERR_FAIL_MSG("not implemented");
+}
+
+#pragma mark - Swap Chain
+
+void RenderingDeviceDriverMetal::_swap_chain_release(SwapChain *p_swap_chain) {
+ _swap_chain_release_buffers(p_swap_chain);
+}
+
+void RenderingDeviceDriverMetal::_swap_chain_release_buffers(SwapChain *p_swap_chain) {
+}
+
+RDD::SwapChainID RenderingDeviceDriverMetal::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) {
+ RenderingContextDriverMetal::Surface const *surface = (RenderingContextDriverMetal::Surface *)(p_surface);
+
+ // Create the render pass that will be used to draw to the swap chain's framebuffers.
+ RDD::Attachment attachment;
+ attachment.format = pixel_formats->getDataFormat(surface->get_pixel_format());
+ attachment.samples = RDD::TEXTURE_SAMPLES_1;
+ attachment.load_op = RDD::ATTACHMENT_LOAD_OP_CLEAR;
+ attachment.store_op = RDD::ATTACHMENT_STORE_OP_STORE;
+
+ RDD::Subpass subpass;
+ RDD::AttachmentReference color_ref;
+ color_ref.attachment = 0;
+ color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT);
+ subpass.color_references.push_back(color_ref);
+
+ RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1);
+ ERR_FAIL_COND_V(!render_pass, SwapChainID());
+
+ // Create the empty swap chain until it is resized.
+ SwapChain *swap_chain = memnew(SwapChain);
+ swap_chain->surface = p_surface;
+ swap_chain->data_format = attachment.format;
+ swap_chain->render_pass = render_pass;
+ return SwapChainID(swap_chain);
+}
+
+Error RenderingDeviceDriverMetal::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) {
+ DEV_ASSERT(p_cmd_queue.id != 0);
+ DEV_ASSERT(p_swap_chain.id != 0);
+
+ SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
+ RenderingContextDriverMetal::Surface *surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);
+ surface->resize(p_desired_framebuffer_count);
+
+ // Once everything's been created correctly, indicate the surface no longer needs to be resized.
+ context_driver->surface_set_needs_resize(swap_chain->surface, false);
+
+ return OK;
+}
+
+RDD::FramebufferID RenderingDeviceDriverMetal::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) {
+ DEV_ASSERT(p_cmd_queue.id != 0);
+ DEV_ASSERT(p_swap_chain.id != 0);
+
+ SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
+ if (context_driver->surface_get_needs_resize(swap_chain->surface)) {
+ r_resize_required = true;
+ return FramebufferID();
+ }
+
+ RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);
+ return metal_surface->acquire_next_frame_buffer();
+}
+
+RDD::RenderPassID RenderingDeviceDriverMetal::swap_chain_get_render_pass(SwapChainID p_swap_chain) {
+ const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);
+ return swap_chain->render_pass;
+}
+
+RDD::DataFormat RenderingDeviceDriverMetal::swap_chain_get_format(SwapChainID p_swap_chain) {
+ const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);
+ return swap_chain->data_format;
+}
+
+void RenderingDeviceDriverMetal::swap_chain_free(SwapChainID p_swap_chain) {
+ SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
+ _swap_chain_release(swap_chain);
+ render_pass_free(swap_chain->render_pass);
+ memdelete(swap_chain);
+}
+
+#pragma mark - Frame buffer
+
+RDD::FramebufferID RenderingDeviceDriverMetal::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) {
+ MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
+
+ Vector<MTL::Texture> textures;
+ textures.resize(p_attachments.size());
+
+ for (uint32_t i = 0; i < p_attachments.size(); i += 1) {
+ MDAttachment const &a = pass->attachments[i];
+ id<MTLTexture> tex = rid::get(p_attachments[i]);
+ if (tex == nil) {
+#if DEV_ENABLED
+ WARN_PRINT("Invalid texture for attachment " + itos(i));
+#endif
+ }
+ if (a.samples > 1) {
+ if (tex.sampleCount != a.samples) {
+#if DEV_ENABLED
+ WARN_PRINT("Mismatched sample count for attachment " + itos(i) + "; expected " + itos(a.samples) + ", got " + itos(tex.sampleCount));
+#endif
+ }
+ }
+ textures.write[i] = tex;
+ }
+
+ MDFrameBuffer *fb = new MDFrameBuffer(textures, Size2i(p_width, p_height));
+ return FramebufferID(fb);
+}
+
+void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) {
+ MDFrameBuffer *obj = (MDFrameBuffer *)(p_framebuffer.id);
+ delete obj;
+}
+
+#pragma mark - Shader
+
+const uint32_t SHADER_BINARY_VERSION = 1;
+
+// region Serialization
+
+class BufWriter;
+
+template <typename T>
+concept Serializable = requires(T t, BufWriter &p_writer) {
+ {
+ t.serialize_size()
+ } -> std::same_as<size_t>;
+ {
+ t.serialize(p_writer)
+ } -> std::same_as<void>;
+};
+
+class BufWriter {
+ uint8_t *data = nullptr;
+ uint64_t length = 0; // Length of data.
+ uint64_t pos = 0;
+
+public:
+ BufWriter(uint8_t *p_data, uint64_t p_length) :
+ data(p_data), length(p_length) {}
+
+ template <Serializable T>
+ void write(T const &p_value) {
+ p_value.serialize(*this);
+ }
+
+ _FORCE_INLINE_ void write(uint32_t p_value) {
+ DEV_ASSERT(pos + sizeof(uint32_t) <= length);
+ pos += encode_uint32(p_value, data + pos);
+ }
+
+ _FORCE_INLINE_ void write(RD::ShaderStage p_value) {
+ write((uint32_t)p_value);
+ }
+
+ _FORCE_INLINE_ void write(bool p_value) {
+ DEV_ASSERT(pos + sizeof(uint8_t) <= length);
+ *(data + pos) = p_value ? 1 : 0;
+ pos += 1;
+ }
+
+ _FORCE_INLINE_ void write(int p_value) {
+ write((uint32_t)p_value);
+ }
+
+ _FORCE_INLINE_ void write(uint64_t p_value) {
+ DEV_ASSERT(pos + sizeof(uint64_t) <= length);
+ pos += encode_uint64(p_value, data + pos);
+ }
+
+ _FORCE_INLINE_ void write(float p_value) {
+ DEV_ASSERT(pos + sizeof(float) <= length);
+ pos += encode_float(p_value, data + pos);
+ }
+
+ _FORCE_INLINE_ void write(double p_value) {
+ DEV_ASSERT(pos + sizeof(double) <= length);
+ pos += encode_double(p_value, data + pos);
+ }
+
+ void write_compressed(CharString const &p_string) {
+ write(p_string.length()); // Uncompressed size.
+
+ DEV_ASSERT(pos + sizeof(uint32_t) + Compression::get_max_compressed_buffer_size(p_string.length(), Compression::MODE_ZSTD) <= length);
+
+ // Save pointer for compressed size.
+ uint8_t *dst_size_ptr = data + pos; // Compressed size.
+ pos += sizeof(uint32_t);
+
+ int dst_size = Compression::compress(data + pos, reinterpret_cast<uint8_t const *>(p_string.ptr()), p_string.length(), Compression::MODE_ZSTD);
+ encode_uint32(dst_size, dst_size_ptr);
+ pos += dst_size;
+ }
+
+ void write(CharString const &p_string) {
+ write_buffer(reinterpret_cast<const uint8_t *>(p_string.ptr()), p_string.length());
+ }
+
+ template <typename T>
+ void write(VectorView<T> p_vector) {
+ write(p_vector.size());
+ for (uint32_t i = 0; i < p_vector.size(); i++) {
+ T const &e = p_vector[i];
+ write(e);
+ }
+ }
+
+ void write(VectorView<uint8_t> p_vector) {
+ write_buffer(p_vector.ptr(), p_vector.size());
+ }
+
+ template <typename K, typename V>
+ void write(HashMap<K, V> const &p_map) {
+ write(p_map.size());
+ for (KeyValue<K, V> const &e : p_map) {
+ write(e.key);
+ write(e.value);
+ }
+ }
+
+ uint64_t get_pos() const {
+ return pos;
+ }
+
+ uint64_t get_length() const {
+ return length;
+ }
+
+private:
+ void write_buffer(uint8_t const *p_buffer, uint32_t p_length) {
+ write(p_length);
+
+ DEV_ASSERT(pos + p_length <= length);
+ memcpy(data + pos, p_buffer, p_length);
+ pos += p_length;
+ }
+};
+
+class BufReader;
+
+template <typename T>
+concept Deserializable = requires(T t, BufReader &p_reader) {
+ {
+ t.serialize_size()
+ } -> std::same_as<size_t>;
+ {
+ t.deserialize(p_reader)
+ } -> std::same_as<void>;
+};
+
+class BufReader {
+ uint8_t const *data = nullptr;
+ uint64_t length = 0;
+ uint64_t pos = 0;
+
+ bool check_length(size_t p_size) {
+ if (status != Status::OK)
+ return false;
+
+ if (pos + p_size > length) {
+ status = Status::SHORT_BUFFER;
+ return false;
+ }
+ return true;
+ }
+
+#define CHECK(p_size) \
+ if (!check_length(p_size)) \
+ return
+
+public:
+ enum class Status {
+ OK,
+ SHORT_BUFFER,
+ BAD_COMPRESSION,
+ };
+
+ Status status = Status::OK;
+
+ BufReader(uint8_t const *p_data, uint64_t p_length) :
+ data(p_data), length(p_length) {}
+
+ template <Deserializable T>
+ void read(T &p_value) {
+ p_value.deserialize(*this);
+ }
+
+ _FORCE_INLINE_ void read(uint32_t &p_val) {
+ CHECK(sizeof(uint32_t));
+
+ p_val = decode_uint32(data + pos);
+ pos += sizeof(uint32_t);
+ }
+
+ _FORCE_INLINE_ void read(RD::ShaderStage &p_val) {
+ uint32_t val;
+ read(val);
+ p_val = (RD::ShaderStage)val;
+ }
+
+ _FORCE_INLINE_ void read(bool &p_val) {
+ CHECK(sizeof(uint8_t));
+
+ p_val = *(data + pos) > 0;
+ pos += 1;
+ }
+
+ _FORCE_INLINE_ void read(uint64_t &p_val) {
+ CHECK(sizeof(uint64_t));
+
+ p_val = decode_uint64(data + pos);
+ pos += sizeof(uint64_t);
+ }
+
+ _FORCE_INLINE_ void read(float &p_val) {
+ CHECK(sizeof(float));
+
+ p_val = decode_float(data + pos);
+ pos += sizeof(float);
+ }
+
+ _FORCE_INLINE_ void read(double &p_val) {
+ CHECK(sizeof(double));
+
+ p_val = decode_double(data + pos);
+ pos += sizeof(double);
+ }
+
+ void read(CharString &p_val) {
+ uint32_t len;
+ read(len);
+ CHECK(len);
+ p_val.resize(len + 1 /* NUL */);
+ memcpy(p_val.ptrw(), data + pos, len);
+ p_val.set(len, 0);
+ pos += len;
+ }
+
+ void read_compressed(CharString &p_val) {
+ uint32_t len;
+ read(len);
+ uint32_t comp_size;
+ read(comp_size);
+
+ CHECK(comp_size);
+
+ p_val.resize(len + 1 /* NUL */);
+ uint32_t bytes = (uint32_t)Compression::decompress(reinterpret_cast<uint8_t *>(p_val.ptrw()), len, data + pos, comp_size, Compression::MODE_ZSTD);
+ if (bytes != len) {
+ status = Status::BAD_COMPRESSION;
+ return;
+ }
+ p_val.set(len, 0);
+ pos += comp_size;
+ }
+
+ void read(LocalVector<uint8_t> &p_val) {
+ uint32_t len;
+ read(len);
+ CHECK(len);
+ p_val.resize(len);
+ memcpy(p_val.ptr(), data + pos, len);
+ pos += len;
+ }
+
+ template <typename T>
+ void read(LocalVector<T> &p_val) {
+ uint32_t len;
+ read(len);
+ CHECK(len);
+ p_val.resize(len);
+ for (uint32_t i = 0; i < len; i++) {
+ read(p_val[i]);
+ }
+ }
+
+ template <typename K, typename V>
+ void read(HashMap<K, V> &p_map) {
+ uint32_t len;
+ read(len);
+ CHECK(len);
+ p_map.reserve(len);
+ for (uint32_t i = 0; i < len; i++) {
+ K key;
+ read(key);
+ V value;
+ read(value);
+ p_map[key] = value;
+ }
+ }
+
+#undef CHECK
+};
+
+const uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535;
+
+struct ComputeSize {
+ uint32_t x = 0;
+ uint32_t y = 0;
+ uint32_t z = 0;
+
+ size_t serialize_size() const {
+ return sizeof(uint32_t) * 3;
+ }
+
+ void serialize(BufWriter &p_writer) const {
+ p_writer.write(x);
+ p_writer.write(y);
+ p_writer.write(z);
+ }
+
+ void deserialize(BufReader &p_reader) {
+ p_reader.read(x);
+ p_reader.read(y);
+ p_reader.read(z);
+ }
+};
+
+struct ShaderStageData {
+ RD::ShaderStage stage = RD::ShaderStage::SHADER_STAGE_MAX;
+ CharString entry_point_name;
+ CharString source;
+
+ size_t serialize_size() const {
+ int comp_size = Compression::get_max_compressed_buffer_size(source.length(), Compression::MODE_ZSTD);
+ return sizeof(uint32_t) // Stage.
+ + sizeof(uint32_t) /* entry_point_name.utf8().length */ + entry_point_name.length() + sizeof(uint32_t) /* uncompressed size */ + sizeof(uint32_t) /* compressed size */ + comp_size;
+ }
+
+ void serialize(BufWriter &p_writer) const {
+ p_writer.write((uint32_t)stage);
+ p_writer.write(entry_point_name);
+ p_writer.write_compressed(source);
+ }
+
+ void deserialize(BufReader &p_reader) {
+ p_reader.read((uint32_t &)stage);
+ p_reader.read(entry_point_name);
+ p_reader.read_compressed(source);
+ }
+};
+
+struct SpecializationConstantData {
+ uint32_t constant_id = UINT32_MAX;
+ RD::PipelineSpecializationConstantType type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT;
+ ShaderStageUsage stages = ShaderStageUsage::None;
+ // Specifies the stages the constant is used by Metal.
+ ShaderStageUsage used_stages = ShaderStageUsage::None;
+ uint32_t int_value = UINT32_MAX;
+
+ size_t serialize_size() const {
+ return sizeof(constant_id) + sizeof(uint32_t) // type
+ + sizeof(stages) + sizeof(used_stages) // used_stages
+ + sizeof(int_value); // int_value
+ }
+
+ void serialize(BufWriter &p_writer) const {
+ p_writer.write(constant_id);
+ p_writer.write((uint32_t)type);
+ p_writer.write(stages);
+ p_writer.write(used_stages);
+ p_writer.write(int_value);
+ }
+
+ void deserialize(BufReader &p_reader) {
+ p_reader.read(constant_id);
+ p_reader.read((uint32_t &)type);
+ p_reader.read((uint32_t &)stages);
+ p_reader.read((uint32_t &)used_stages);
+ p_reader.read(int_value);
+ }
+};
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformData {
+ RD::UniformType type = RD::UniformType::UNIFORM_TYPE_MAX;
+ uint32_t binding = UINT32_MAX;
+ bool writable = false;
+ uint32_t length = UINT32_MAX;
+ ShaderStageUsage stages = ShaderStageUsage::None;
+ // Specifies the stages the uniform data is
+ // used by the Metal shader.
+ ShaderStageUsage active_stages = ShaderStageUsage::None;
+ BindingInfoMap bindings;
+ BindingInfoMap bindings_secondary;
+
+ size_t serialize_size() const {
+ size_t size = 0;
+ size += sizeof(uint32_t); // type
+ size += sizeof(uint32_t); // binding
+ size += sizeof(uint32_t); // writable
+ size += sizeof(uint32_t); // length
+ size += sizeof(uint32_t); // stages
+ size += sizeof(uint32_t); // active_stages
+ size += sizeof(uint32_t); // bindings.size()
+ size += sizeof(uint32_t) * bindings.size(); // Total size of keys.
+ for (KeyValue<RD::ShaderStage, BindingInfo> const &e : bindings) {
+ size += e.value.serialize_size();
+ }
+ size += sizeof(uint32_t); // bindings_secondary.size()
+ size += sizeof(uint32_t) * bindings_secondary.size(); // Total size of keys.
+ for (KeyValue<RD::ShaderStage, BindingInfo> const &e : bindings_secondary) {
+ size += e.value.serialize_size();
+ }
+ return size;
+ }
+
+ void serialize(BufWriter &p_writer) const {
+ p_writer.write((uint32_t)type);
+ p_writer.write(binding);
+ p_writer.write(writable);
+ p_writer.write(length);
+ p_writer.write(stages);
+ p_writer.write(active_stages);
+ p_writer.write(bindings);
+ p_writer.write(bindings_secondary);
+ }
+
+ void deserialize(BufReader &p_reader) {
+ p_reader.read((uint32_t &)type);
+ p_reader.read(binding);
+ p_reader.read(writable);
+ p_reader.read(length);
+ p_reader.read((uint32_t &)stages);
+ p_reader.read((uint32_t &)active_stages);
+ p_reader.read(bindings);
+ p_reader.read(bindings_secondary);
+ }
+};
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformSetData {
+ uint32_t index = UINT32_MAX;
+ LocalVector<UniformData> uniforms;
+
+ size_t serialize_size() const {
+ size_t size = 0;
+ size += sizeof(uint32_t); // index
+ size += sizeof(uint32_t); // uniforms.size()
+ for (UniformData const &e : uniforms) {
+ size += e.serialize_size();
+ }
+ return size;
+ }
+
+ void serialize(BufWriter &p_writer) const {
+ p_writer.write(index);
+ p_writer.write(VectorView(uniforms));
+ }
+
+ void deserialize(BufReader &p_reader) {
+ p_reader.read(index);
+ p_reader.read(uniforms);
+ }
+};
+
+struct PushConstantData {
+ uint32_t size = UINT32_MAX;
+ ShaderStageUsage stages = ShaderStageUsage::None;
+ ShaderStageUsage used_stages = ShaderStageUsage::None;
+ HashMap<RD::ShaderStage, uint32_t> msl_binding;
+
+ size_t serialize_size() const {
+ return sizeof(uint32_t) // size
+ + sizeof(uint32_t) // stages
+ + sizeof(uint32_t) // used_stages
+ + sizeof(uint32_t) // msl_binding.size()
+ + sizeof(uint32_t) * msl_binding.size() // keys
+ + sizeof(uint32_t) * msl_binding.size(); // values
+ }
+
+ void serialize(BufWriter &p_writer) const {
+ p_writer.write(size);
+ p_writer.write((uint32_t)stages);
+ p_writer.write((uint32_t)used_stages);
+ p_writer.write(msl_binding);
+ }
+
+ void deserialize(BufReader &p_reader) {
+ p_reader.read(size);
+ p_reader.read((uint32_t &)stages);
+ p_reader.read((uint32_t &)used_stages);
+ p_reader.read(msl_binding);
+ }
+};
+
+struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
+ CharString shader_name;
+ // The Metal language version specified when compiling SPIR-V to MSL.
+ // Format is major * 10000 + minor * 100 + patch.
+ uint32_t msl_version = UINT32_MAX;
+ uint32_t vertex_input_mask = UINT32_MAX;
+ uint32_t fragment_output_mask = UINT32_MAX;
+ uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX;
+ uint32_t is_compute = UINT32_MAX;
+ ComputeSize compute_local_size;
+ PushConstantData push_constant;
+ LocalVector<ShaderStageData> stages;
+ LocalVector<SpecializationConstantData> constants;
+ LocalVector<UniformSetData> uniforms;
+
+ MTLLanguageVersion get_msl_version() const {
+ uint32_t major = msl_version / 10000;
+ uint32_t minor = (msl_version / 100) % 100;
+ return MTLLanguageVersion((major << 0x10) + minor);
+ }
+
+ size_t serialize_size() const {
+ size_t size = 0;
+ size += sizeof(uint32_t) + shader_name.length(); // shader_name
+ size += sizeof(uint32_t); // msl_version
+ size += sizeof(uint32_t); // vertex_input_mask
+ size += sizeof(uint32_t); // fragment_output_mask
+ size += sizeof(uint32_t); // spirv_specialization_constants_ids_mask
+ size += sizeof(uint32_t); // is_compute
+ size += compute_local_size.serialize_size(); // compute_local_size
+ size += push_constant.serialize_size(); // push_constant
+ size += sizeof(uint32_t); // stages.size()
+ for (ShaderStageData const &e : stages) {
+ size += e.serialize_size();
+ }
+ size += sizeof(uint32_t); // constants.size()
+ for (SpecializationConstantData const &e : constants) {
+ size += e.serialize_size();
+ }
+ size += sizeof(uint32_t); // uniforms.size()
+ for (UniformSetData const &e : uniforms) {
+ size += e.serialize_size();
+ }
+ return size;
+ }
+
+ void serialize(BufWriter &p_writer) const {
+ p_writer.write(shader_name);
+ p_writer.write(msl_version);
+ p_writer.write(vertex_input_mask);
+ p_writer.write(fragment_output_mask);
+ p_writer.write(spirv_specialization_constants_ids_mask);
+ p_writer.write(is_compute);
+ p_writer.write(compute_local_size);
+ p_writer.write(push_constant);
+ p_writer.write(VectorView(stages));
+ p_writer.write(VectorView(constants));
+ p_writer.write(VectorView(uniforms));
+ }
+
+ void deserialize(BufReader &p_reader) {
+ p_reader.read(shader_name);
+ p_reader.read(msl_version);
+ p_reader.read(vertex_input_mask);
+ p_reader.read(fragment_output_mask);
+ p_reader.read(spirv_specialization_constants_ids_mask);
+ p_reader.read(is_compute);
+ p_reader.read(compute_local_size);
+ p_reader.read(push_constant);
+ p_reader.read(stages);
+ p_reader.read(constants);
+ p_reader.read(uniforms);
+ }
+};
+
+// endregion
+
+String RenderingDeviceDriverMetal::shader_get_binary_cache_key() {
+ return "Metal-SV" + uitos(SHADER_BINARY_VERSION);
+}
+
+Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection) {
+ using namespace spirv_cross;
+ using spirv_cross::Resource;
+
+ r_reflection = {};
+
+ for (uint32_t i = 0; i < p_spirv.size(); i++) {
+ ShaderStageSPIRVData const &v = p_spirv[i];
+ ShaderStage stage = v.shader_stage;
+ uint32_t const *const ir = reinterpret_cast<uint32_t const *const>(v.spirv.ptr());
+ size_t word_count = v.spirv.size() / sizeof(uint32_t);
+ Parser parser(ir, word_count);
+ try {
+ parser.parse();
+ } catch (CompilerError &e) {
+ ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what());
+ }
+
+ ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage);
+
+ if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) {
+ r_reflection.is_compute = true;
+ ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED,
+ "Compute shaders can only receive one stage, dedicated to compute.");
+ }
+ ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED,
+ "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once.");
+
+ ParsedIR &pir = parser.get_parsed_ir();
+ using BT = SPIRType::BaseType;
+
+ Compiler compiler(std::move(pir));
+
+ if (r_reflection.is_compute) {
+ r_reflection.compute_local_size[0] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 0);
+ r_reflection.compute_local_size[1] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 1);
+ r_reflection.compute_local_size[2] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 2);
+ }
+
+ // Parse bindings.
+
+ auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) {
+ uint32_t res = -1;
+ if (compiler.has_decoration(id, decoration)) {
+ res = compiler.get_decoration(id, decoration);
+ }
+ return res;
+ };
+
+ // Always clearer than a boolean.
+ enum class Writable {
+ No,
+ Maybe,
+ };
+
+ // clang-format off
+ enum {
+ SPIRV_WORD_SIZE = sizeof(uint32_t),
+ SPIRV_DATA_ALIGNMENT = 4 * SPIRV_WORD_SIZE,
+ };
+ // clang-format on
+
+ auto process_uniforms = [&r_reflection, &compiler, &get_decoration, stage, stage_flag](SmallVector<Resource> &resources, Writable writable, std::function<RDD::UniformType(SPIRType const &)> uniform_type) {
+ for (Resource const &res : resources) {
+ ShaderUniform uniform;
+
+ std::string const &name = compiler.get_name(res.id);
+ uint32_t set = get_decoration(res.id, spv::DecorationDescriptorSet);
+ ERR_FAIL_COND_V_MSG(set == (uint32_t)-1, FAILED, "No descriptor set found");
+ ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, FAILED, "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ").");
+
+ uniform.binding = get_decoration(res.id, spv::DecorationBinding);
+ ERR_FAIL_COND_V_MSG(uniform.binding == (uint32_t)-1, FAILED, "No binding found");
+
+ SPIRType const &a_type = compiler.get_type(res.type_id);
+ uniform.type = uniform_type(a_type);
+
+ // Update length.
+ switch (a_type.basetype) {
+ case BT::Struct: {
+ if (uniform.type == UNIFORM_TYPE_STORAGE_BUFFER) {
+ // Consistent with spirv_reflect.
+ uniform.length = 0;
+ } else {
+ uniform.length = round_up_to_alignment(compiler.get_declared_struct_size(a_type), SPIRV_DATA_ALIGNMENT);
+ }
+ } break;
+ case BT::Image:
+ case BT::Sampler:
+ case BT::SampledImage: {
+ uniform.length = 1;
+ for (uint32_t const &a : a_type.array) {
+ uniform.length *= a;
+ }
+ } break;
+ default:
+ break;
+ }
+
+ // Update writable.
+ if (writable == Writable::Maybe) {
+ if (a_type.basetype == BT::Struct) {
+ Bitset flags = compiler.get_buffer_block_flags(res.id);
+ uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable) && !flags.get(spv::DecorationNonWritable);
+ } else if (a_type.basetype == BT::Image) {
+ if (a_type.image.access == spv::AccessQualifierMax) {
+ uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable);
+ } else {
+ uniform.writable = a_type.image.access != spv::AccessQualifierReadOnly;
+ }
+ }
+ }
+
+ if (set < (uint32_t)r_reflection.uniform_sets.size()) {
+ // Check if this already exists.
+ bool exists = false;
+ for (uint32_t k = 0; k < r_reflection.uniform_sets[set].size(); k++) {
+ if (r_reflection.uniform_sets[set][k].binding == uniform.binding) {
+ // Already exists, verify that it's the same type.
+ ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].type != uniform.type, FAILED,
+ "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform type.");
+
+ // Also, verify that it's the same size.
+ ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].length != uniform.length, FAILED,
+ "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform size.");
+
+ // Also, verify that it has the same writability.
+ ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].writable != uniform.writable, FAILED,
+ "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability.");
+
+ // Just append stage mask and continue.
+ r_reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag);
+ exists = true;
+ break;
+ }
+ }
+
+ if (exists) {
+ continue; // Merged.
+ }
+ }
+
+ uniform.stages.set_flag(stage_flag);
+
+ if (set >= (uint32_t)r_reflection.uniform_sets.size()) {
+ r_reflection.uniform_sets.resize(set + 1);
+ }
+
+ r_reflection.uniform_sets.write[set].push_back(uniform);
+ }
+
+ return OK;
+ };
+
+ ShaderResources resources = compiler.get_shader_resources();
+
+ process_uniforms(resources.uniform_buffers, Writable::No, [](SPIRType const &a_type) {
+ DEV_ASSERT(a_type.basetype == BT::Struct);
+ return UNIFORM_TYPE_UNIFORM_BUFFER;
+ });
+
+ process_uniforms(resources.storage_buffers, Writable::Maybe, [](SPIRType const &a_type) {
+ DEV_ASSERT(a_type.basetype == BT::Struct);
+ return UNIFORM_TYPE_STORAGE_BUFFER;
+ });
+
+ process_uniforms(resources.storage_images, Writable::Maybe, [](SPIRType const &a_type) {
+ DEV_ASSERT(a_type.basetype == BT::Image);
+ if (a_type.image.dim == spv::DimBuffer) {
+ return UNIFORM_TYPE_IMAGE_BUFFER;
+ } else {
+ return UNIFORM_TYPE_IMAGE;
+ }
+ });
+
+ process_uniforms(resources.sampled_images, Writable::No, [](SPIRType const &a_type) {
+ DEV_ASSERT(a_type.basetype == BT::SampledImage);
+ return UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
+ });
+
+ process_uniforms(resources.separate_images, Writable::No, [](SPIRType const &a_type) {
+ DEV_ASSERT(a_type.basetype == BT::Image);
+ if (a_type.image.dim == spv::DimBuffer) {
+ return UNIFORM_TYPE_TEXTURE_BUFFER;
+ } else {
+ return UNIFORM_TYPE_TEXTURE;
+ }
+ });
+
+ process_uniforms(resources.separate_samplers, Writable::No, [](SPIRType const &a_type) {
+ DEV_ASSERT(a_type.basetype == BT::Sampler);
+ return UNIFORM_TYPE_SAMPLER;
+ });
+
+ process_uniforms(resources.subpass_inputs, Writable::No, [](SPIRType const &a_type) {
+ DEV_ASSERT(a_type.basetype == BT::Image && a_type.image.dim == spv::DimSubpassData);
+ return UNIFORM_TYPE_INPUT_ATTACHMENT;
+ });
+
+ if (!resources.push_constant_buffers.empty()) {
+ // There can be only one push constant block.
+ Resource const &res = resources.push_constant_buffers.front();
+
+ size_t push_constant_size = round_up_to_alignment(compiler.get_declared_struct_size(compiler.get_type(res.base_type_id)), SPIRV_DATA_ALIGNMENT);
+ ERR_FAIL_COND_V_MSG(r_reflection.push_constant_size && r_reflection.push_constant_size != push_constant_size, FAILED,
+ "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages.");
+
+ r_reflection.push_constant_size = push_constant_size;
+ r_reflection.push_constant_stages.set_flag(stage_flag);
+ }
+
+ ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), FAILED, "Atomic counters not supported");
+ ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), FAILED, "Acceleration structures not supported");
+ ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), FAILED, "Shader record buffers not supported");
+
+ if (stage == SHADER_STAGE_VERTEX && !resources.stage_inputs.empty()) {
+ for (Resource const &res : resources.stage_inputs) {
+ SPIRType a_type = compiler.get_type(res.base_type_id);
+ uint32_t loc = get_decoration(res.id, spv::DecorationLocation);
+ if (loc != (uint32_t)-1) {
+ r_reflection.vertex_input_mask |= 1 << loc;
+ }
+ }
+ }
+
+ if (stage == SHADER_STAGE_FRAGMENT && !resources.stage_outputs.empty()) {
+ for (Resource const &res : resources.stage_outputs) {
+ SPIRType a_type = compiler.get_type(res.base_type_id);
+ uint32_t loc = get_decoration(res.id, spv::DecorationLocation);
+ uint32_t built_in = spv::BuiltIn(get_decoration(res.id, spv::DecorationBuiltIn));
+ if (loc != (uint32_t)-1 && built_in != spv::BuiltInFragDepth) {
+ r_reflection.fragment_output_mask |= 1 << loc;
+ }
+ }
+ }
+
+ // Specialization constants.
+ for (SpecializationConstant const &constant : compiler.get_specialization_constants()) {
+ int32_t existing = -1;
+ ShaderSpecializationConstant sconst;
+ SPIRConstant &spc = compiler.get_constant(constant.id);
+ SPIRType const &spct = compiler.get_type(spc.constant_type);
+
+ sconst.constant_id = constant.constant_id;
+ sconst.int_value = 0;
+
+ switch (spct.basetype) {
+ case BT::Boolean: {
+ sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL;
+ sconst.bool_value = spc.scalar() != 0;
+ } break;
+ case BT::Int:
+ case BT::UInt: {
+ sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT;
+ sconst.int_value = spc.scalar();
+ } break;
+ case BT::Float: {
+ sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT;
+ sconst.float_value = spc.scalar_f32();
+ } break;
+ default:
+ ERR_FAIL_V_MSG(FAILED, "Unsupported specialization constant type");
+ }
+ sconst.stages.set_flag(stage_flag);
+
+ for (uint32_t k = 0; k < r_reflection.specialization_constants.size(); k++) {
+ if (r_reflection.specialization_constants[k].constant_id == sconst.constant_id) {
+ ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ.");
+ ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ.");
+ existing = k;
+ break;
+ }
+ }
+
+ if (existing > 0) {
+ r_reflection.specialization_constants.write[existing].stages.set_flag(stage_flag);
+ } else {
+ r_reflection.specialization_constants.push_back(sconst);
+ }
+ }
+
+ r_reflection.stages.set_flag(stage_flag);
+ }
+
+ // Sort all uniform_sets.
+ for (uint32_t i = 0; i < r_reflection.uniform_sets.size(); i++) {
+ r_reflection.uniform_sets.write[i].sort();
+ }
+
+ return OK;
+}
+
+Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) {
+ using Result = ::Vector<uint8_t>;
+ using namespace spirv_cross;
+ using spirv_cross::CompilerMSL;
+ using spirv_cross::Resource;
+
+ ShaderReflection spirv_data;
+ ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data), Result());
+
+ ShaderBinaryData bin_data{};
+ if (!p_shader_name.is_empty()) {
+ bin_data.shader_name = p_shader_name.utf8();
+ } else {
+ bin_data.shader_name = "unnamed";
+ }
+
+ bin_data.vertex_input_mask = spirv_data.vertex_input_mask;
+ bin_data.fragment_output_mask = spirv_data.fragment_output_mask;
+ bin_data.compute_local_size = ComputeSize{
+ .x = spirv_data.compute_local_size[0],
+ .y = spirv_data.compute_local_size[1],
+ .z = spirv_data.compute_local_size[2],
+ };
+ bin_data.is_compute = spirv_data.is_compute;
+ bin_data.push_constant.size = spirv_data.push_constant_size;
+ bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages;
+
+ for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) {
+ const ::Vector<ShaderUniform> &spirv_set = spirv_data.uniform_sets[i];
+ UniformSetData set{ .index = i };
+ for (const ShaderUniform &spirv_uniform : spirv_set) {
+ UniformData binding{};
+ binding.type = spirv_uniform.type;
+ binding.binding = spirv_uniform.binding;
+ binding.writable = spirv_uniform.writable;
+ binding.stages = (ShaderStageUsage)(uint8_t)spirv_uniform.stages;
+ binding.length = spirv_uniform.length;
+ set.uniforms.push_back(binding);
+ }
+ bin_data.uniforms.push_back(set);
+ }
+
+ for (const ShaderSpecializationConstant &spirv_sc : spirv_data.specialization_constants) {
+ SpecializationConstantData spec_constant{};
+ spec_constant.type = spirv_sc.type;
+ spec_constant.constant_id = spirv_sc.constant_id;
+ spec_constant.int_value = spirv_sc.int_value;
+ spec_constant.stages = (ShaderStageUsage)(uint8_t)spirv_sc.stages;
+ bin_data.constants.push_back(spec_constant);
+ bin_data.spirv_specialization_constants_ids_mask |= (1 << spirv_sc.constant_id);
+ }
+
+ // Reflection using SPIRV-Cross:
+ // https://github.com/KhronosGroup/SPIRV-Cross/wiki/Reflection-API-user-guide
+
+ CompilerMSL::Options msl_options{};
+ msl_options.set_msl_version(version_major, version_minor);
+ if (version_major == 3 && version_minor >= 1) {
+ // TODO(sgc): Restrict to Metal 3.0 for now, until bugs in SPIRV-cross image atomics are resolved.
+ msl_options.set_msl_version(3, 0);
+ }
+ bin_data.msl_version = msl_options.msl_version;
+#if TARGET_OS_OSX
+ msl_options.platform = CompilerMSL::Options::macOS;
+#else
+ msl_options.platform = CompilerMSL::Options::iOS;
+#endif
+
+#if TARGET_OS_IOS
+ msl_options.ios_use_simdgroup_functions = (*metal_device_properties).features.simdPermute;
+#endif
+
+ msl_options.argument_buffers = true;
+ msl_options.force_active_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers.
+ // msl_options.pad_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers.
+ msl_options.texture_buffer_native = true; // Enable texture buffer support.
+ msl_options.use_framebuffer_fetch_subpasses = false;
+ msl_options.pad_fragment_output_components = true;
+ msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID;
+ msl_options.agx_manual_cube_grad_fixup = true;
+
+ CompilerGLSL::Options options{};
+ options.vertex.flip_vert_y = true;
+#if DEV_ENABLED
+ options.emit_line_directives = true;
+#endif
+
+ for (uint32_t i = 0; i < p_spirv.size(); i++) {
+ ShaderStageSPIRVData const &v = p_spirv[i];
+ ShaderStage stage = v.shader_stage;
+ char const *stage_name = SHADER_STAGE_NAMES[stage];
+ uint32_t const *const ir = reinterpret_cast<uint32_t const *const>(v.spirv.ptr());
+ size_t word_count = v.spirv.size() / sizeof(uint32_t);
+ Parser parser(ir, word_count);
+ try {
+ parser.parse();
+ } catch (CompilerError &e) {
+ ERR_FAIL_V_MSG(Result(), "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what());
+ }
+
+ CompilerMSL compiler(std::move(parser.get_parsed_ir()));
+ compiler.set_msl_options(msl_options);
+ compiler.set_common_options(options);
+
+ std::unordered_set<VariableID> active = compiler.get_active_interface_variables();
+ ShaderResources resources = compiler.get_shader_resources();
+
+ std::string source = compiler.compile();
+
+ ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, Result(), "Expected a single entry point and stage.");
+
+ EntryPoint &entry_point_stage = compiler.get_entry_points_and_stages().front();
+ SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model);
+
+ // Process specialization constants.
+ if (!compiler.get_specialization_constants().empty()) {
+ for (SpecializationConstant const &constant : compiler.get_specialization_constants()) {
+ LocalVector<SpecializationConstantData>::Iterator res = bin_data.constants.begin();
+ while (res != bin_data.constants.end()) {
+ if (res->constant_id == constant.constant_id) {
+ res->used_stages |= 1 << stage;
+ break;
+ }
+ ++res;
+ }
+ if (res == bin_data.constants.end()) {
+ WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id));
+ }
+ }
+ }
+
+ // Process bindings.
+
+ LocalVector<UniformSetData> &uniform_sets = bin_data.uniforms;
+ using BT = SPIRType::BaseType;
+
+ // Always clearer than a boolean.
+ enum class Writable {
+ No,
+ Maybe,
+ };
+
+ // Returns a std::optional containing the value of the
+ // decoration, if it exists.
+ auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) {
+ uint32_t res = -1;
+ if (compiler.has_decoration(id, decoration)) {
+ res = compiler.get_decoration(id, decoration);
+ }
+ return res;
+ };
+
+ auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector<Resource> &resources, Writable writable) {
+ for (Resource const &res : resources) {
+ uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet);
+ uint32_t dbin = get_decoration(res.id, spv::DecorationBinding);
+ UniformData *found = nullptr;
+ if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets.size()) {
+ UniformSetData &set = uniform_sets[dset];
+ LocalVector<UniformData>::Iterator pos = set.uniforms.begin();
+ while (pos != set.uniforms.end()) {
+ if (dbin == pos->binding) {
+ found = &(*pos);
+ break;
+ }
+ ++pos;
+ }
+ }
+
+ ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found");
+
+ bool is_active = active.find(res.id) != active.end();
+ if (is_active) {
+ found->active_stages |= 1 << stage;
+ }
+
+ BindingInfo primary{};
+
+ SPIRType const &a_type = compiler.get_type(res.type_id);
+ BT basetype = a_type.basetype;
+
+ switch (basetype) {
+ case BT::Struct: {
+ primary.dataType = MTLDataTypePointer;
+ } break;
+
+ case BT::Image:
+ case BT::SampledImage: {
+ primary.dataType = MTLDataTypeTexture;
+ } break;
+
+ case BT::Sampler: {
+ primary.dataType = MTLDataTypeSampler;
+ } break;
+
+ default: {
+ ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType");
+ } break;
+ }
+
+ // Find array length.
+ if (basetype == BT::Image || basetype == BT::SampledImage) {
+ primary.arrayLength = 1;
+ for (uint32_t const &a : a_type.array) {
+ primary.arrayLength *= a;
+ }
+ primary.isMultisampled = a_type.image.ms;
+
+ SPIRType::ImageType const &image = a_type.image;
+ primary.imageFormat = image.format;
+
+ switch (image.dim) {
+ case spv::Dim1D: {
+ if (image.arrayed) {
+ primary.textureType = MTLTextureType1DArray;
+ } else {
+ primary.textureType = MTLTextureType1D;
+ }
+ } break;
+ case spv::DimSubpassData: {
+ DISPATCH_FALLTHROUGH;
+ }
+ case spv::Dim2D: {
+ if (image.arrayed && image.ms) {
+ primary.textureType = MTLTextureType2DMultisampleArray;
+ } else if (image.arrayed) {
+ primary.textureType = MTLTextureType2DArray;
+ } else if (image.ms) {
+ primary.textureType = MTLTextureType2DMultisample;
+ } else {
+ primary.textureType = MTLTextureType2D;
+ }
+ } break;
+ case spv::Dim3D: {
+ primary.textureType = MTLTextureType3D;
+ } break;
+ case spv::DimCube: {
+ if (image.arrayed) {
+ primary.textureType = MTLTextureTypeCube;
+ }
+ } break;
+ case spv::DimRect: {
+ } break;
+ case spv::DimBuffer: {
+ // VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
+ primary.textureType = MTLTextureTypeTextureBuffer;
+ } break;
+ case spv::DimMax: {
+ // Add all enumerations to silence the compiler warning
+ // and generate future warnings, should a new one be added.
+ } break;
+ }
+ }
+
+ // Update writable.
+ if (writable == Writable::Maybe) {
+ if (basetype == BT::Struct) {
+ Bitset flags = compiler.get_buffer_block_flags(res.id);
+ if (!flags.get(spv::DecorationNonWritable)) {
+ if (flags.get(spv::DecorationNonReadable)) {
+ primary.access = MTLBindingAccessWriteOnly;
+ } else {
+ primary.access = MTLBindingAccessReadWrite;
+ }
+ }
+ } else if (basetype == BT::Image) {
+ switch (a_type.image.access) {
+ case spv::AccessQualifierWriteOnly:
+ primary.access = MTLBindingAccessWriteOnly;
+ break;
+ case spv::AccessQualifierReadWrite:
+ primary.access = MTLBindingAccessReadWrite;
+ break;
+ case spv::AccessQualifierReadOnly:
+ break;
+ case spv::AccessQualifierMax:
+ DISPATCH_FALLTHROUGH;
+ default:
+ if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) {
+ if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) {
+ primary.access = MTLBindingAccessWriteOnly;
+ } else {
+ primary.access = MTLBindingAccessReadWrite;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ switch (primary.access) {
+ case MTLBindingAccessReadOnly:
+ primary.usage = MTLResourceUsageRead;
+ break;
+ case MTLBindingAccessWriteOnly:
+ primary.usage = MTLResourceUsageWrite;
+ break;
+ case MTLBindingAccessReadWrite:
+ primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite;
+ break;
+ }
+
+ primary.index = compiler.get_automatic_msl_resource_binding(res.id);
+
+ found->bindings[stage] = primary;
+
+ // A sampled image contains two bindings, the primary
+ // is to the image, and the secondary is to the associated sampler.
+ if (basetype == BT::SampledImage) {
+ uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
+ if (binding != (uint32_t)-1) {
+ found->bindings_secondary[stage] = BindingInfo{
+ .dataType = MTLDataTypeSampler,
+ .index = binding,
+ .access = MTLBindingAccessReadOnly,
+ };
+ }
+ }
+
+ // An image may have a secondary binding if it is used
+ // for atomic operations.
+ if (basetype == BT::Image) {
+ uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
+ if (binding != (uint32_t)-1) {
+ found->bindings_secondary[stage] = BindingInfo{
+ .dataType = MTLDataTypePointer,
+ .index = binding,
+ .access = MTLBindingAccessReadWrite,
+ };
+ }
+ }
+ }
+ return Error::OK;
+ };
+
+ if (!resources.uniform_buffers.empty()) {
+ Error err = descriptor_bindings(resources.uniform_buffers, Writable::No);
+ ERR_FAIL_COND_V(err != OK, Result());
+ }
+ if (!resources.storage_buffers.empty()) {
+ Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe);
+ ERR_FAIL_COND_V(err != OK, Result());
+ }
+ if (!resources.storage_images.empty()) {
+ Error err = descriptor_bindings(resources.storage_images, Writable::Maybe);
+ ERR_FAIL_COND_V(err != OK, Result());
+ }
+ if (!resources.sampled_images.empty()) {
+ Error err = descriptor_bindings(resources.sampled_images, Writable::No);
+ ERR_FAIL_COND_V(err != OK, Result());
+ }
+ if (!resources.separate_images.empty()) {
+ Error err = descriptor_bindings(resources.separate_images, Writable::No);
+ ERR_FAIL_COND_V(err != OK, Result());
+ }
+ if (!resources.separate_samplers.empty()) {
+ Error err = descriptor_bindings(resources.separate_samplers, Writable::No);
+ ERR_FAIL_COND_V(err != OK, Result());
+ }
+ if (!resources.subpass_inputs.empty()) {
+ Error err = descriptor_bindings(resources.subpass_inputs, Writable::No);
+ ERR_FAIL_COND_V(err != OK, Result());
+ }
+
+ if (!resources.push_constant_buffers.empty()) {
+ for (Resource const &res : resources.push_constant_buffers) {
+ uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id);
+ if (binding != (uint32_t)-1) {
+ bin_data.push_constant.used_stages |= 1 << stage;
+ bin_data.push_constant.msl_binding[stage] = binding;
+ }
+ }
+ }
+
+ ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), Result(), "Atomic counters not supported");
+ ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), Result(), "Acceleration structures not supported");
+ ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), Result(), "Shader record buffers not supported");
+
+ if (!resources.stage_inputs.empty()) {
+ for (Resource const &res : resources.stage_inputs) {
+ uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id);
+ if (binding != (uint32_t)-1) {
+ bin_data.vertex_input_mask |= 1 << binding;
+ }
+ }
+ }
+
+ ShaderStageData stage_data;
+ stage_data.stage = v.shader_stage;
+ stage_data.entry_point_name = entry_point.name.c_str();
+ stage_data.source = source.c_str();
+ bin_data.stages.push_back(stage_data);
+ }
+
+ size_t vec_size = bin_data.serialize_size() + 8;
+
+ ::Vector<uint8_t> ret;
+ ret.resize(vec_size);
+ BufWriter writer(ret.ptrw(), vec_size);
+ const uint8_t HEADER[4] = { 'G', 'M', 'S', 'L' };
+ writer.write(*(uint32_t *)HEADER);
+ writer.write(SHADER_BINARY_VERSION);
+ bin_data.serialize(writer);
+ ret.resize(writer.get_pos());
+
+ return ret;
+}
+
+void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key) {
+ if (ShaderCacheEntry **pentry = _shader_cache.getptr(key); pentry != nullptr) {
+ ShaderCacheEntry *entry = *pentry;
+ _shader_cache.erase(key);
+ entry->library = nil;
+ memdelete(entry);
+ }
+}
+
+RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) {
+ r_shader_desc = {}; // Driver-agnostic.
+
+ const uint8_t *binptr = p_shader_binary.ptr();
+ uint32_t binsize = p_shader_binary.size();
+
+ BufReader reader(binptr, binsize);
+ uint8_t header[4];
+ reader.read((uint32_t &)header);
+ ERR_FAIL_COND_V_MSG(memcmp(header, "GMSL", 4) != 0, ShaderID(), "Invalid header");
+ uint32_t version = 0;
+ reader.read(version);
+ ERR_FAIL_COND_V_MSG(version != SHADER_BINARY_VERSION, ShaderID(), "Invalid shader binary version");
+
+ ShaderBinaryData binary_data;
+ binary_data.deserialize(reader);
+ switch (reader.status) {
+ case BufReader::Status::OK:
+ break;
+ case BufReader::Status::BAD_COMPRESSION:
+ ERR_FAIL_V_MSG(ShaderID(), "Invalid compressed data");
+ case BufReader::Status::SHORT_BUFFER:
+ ERR_FAIL_V_MSG(ShaderID(), "Unexpected end of buffer");
+ }
+
+ MTLCompileOptions *options = [MTLCompileOptions new];
+ options.languageVersion = binary_data.get_msl_version();
+ HashMap<ShaderStage, MDLibrary *> libraries;
+
+ for (ShaderStageData &shader_data : binary_data.stages) {
+ SHA256Digest key = SHA256Digest(shader_data.source.ptr(), shader_data.source.length());
+
+ if (ShaderCacheEntry **p = _shader_cache.getptr(key); p != nullptr) {
+ libraries[shader_data.stage] = (*p)->library;
+ continue;
+ }
+
+ NSString *source = [[NSString alloc] initWithBytes:(void *)shader_data.source.ptr()
+ length:shader_data.source.length()
+ encoding:NSUTF8StringEncoding];
+
+ ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, key));
+ cd->name = binary_data.shader_name;
+ cd->stage = shader_data.stage;
+
+ MDLibrary *library = [MDLibrary newLibraryWithCacheEntry:cd
+ device:device
+ source:source
+ options:options
+ strategy:_shader_load_strategy];
+ _shader_cache[key] = cd;
+ libraries[shader_data.stage] = library;
+ }
+
+ Vector<UniformSet> uniform_sets;
+ uniform_sets.resize(binary_data.uniforms.size());
+
+ r_shader_desc.uniform_sets.resize(binary_data.uniforms.size());
+
+ // Create sets.
+ for (UniformSetData &uniform_set : binary_data.uniforms) {
+ UniformSet &set = uniform_sets.write[uniform_set.index];
+ set.uniforms.resize(uniform_set.uniforms.size());
+
+ Vector<ShaderUniform> &uset = r_shader_desc.uniform_sets.write[uniform_set.index];
+ uset.resize(uniform_set.uniforms.size());
+
+ for (uint32_t i = 0; i < uniform_set.uniforms.size(); i++) {
+ UniformData &uniform = uniform_set.uniforms[i];
+
+ ShaderUniform su;
+ su.type = uniform.type;
+ su.writable = uniform.writable;
+ su.length = uniform.length;
+ su.binding = uniform.binding;
+ su.stages = uniform.stages;
+ uset.write[i] = su;
+
+ UniformInfo ui;
+ ui.binding = uniform.binding;
+ ui.active_stages = uniform.active_stages;
+ for (KeyValue<RDC::ShaderStage, BindingInfo> &kv : uniform.bindings) {
+ ui.bindings.insert(kv.key, kv.value);
+ }
+ for (KeyValue<RDC::ShaderStage, BindingInfo> &kv : uniform.bindings_secondary) {
+ ui.bindings_secondary.insert(kv.key, kv.value);
+ }
+ set.uniforms[i] = ui;
+ }
+ }
+ for (UniformSetData &uniform_set : binary_data.uniforms) {
+ UniformSet &set = uniform_sets.write[uniform_set.index];
+
+ // Make encoders.
+ for (ShaderStageData const &stage_data : binary_data.stages) {
+ ShaderStage stage = stage_data.stage;
+ NSMutableArray<MTLArgumentDescriptor *> *descriptors = [NSMutableArray new];
+
+ for (UniformInfo const &uniform : set.uniforms) {
+ BindingInfo const *binding_info = uniform.bindings.getptr(stage);
+ if (binding_info == nullptr)
+ continue;
+
+ [descriptors addObject:binding_info->new_argument_descriptor()];
+ BindingInfo const *secondary_binding_info = uniform.bindings_secondary.getptr(stage);
+ if (secondary_binding_info != nullptr) {
+ [descriptors addObject:secondary_binding_info->new_argument_descriptor()];
+ }
+ }
+
+ if (descriptors.count == 0) {
+ // No bindings.
+ continue;
+ }
+ // Sort by index.
+ [descriptors sortUsingComparator:^NSComparisonResult(MTLArgumentDescriptor *a, MTLArgumentDescriptor *b) {
+ if (a.index < b.index) {
+ return NSOrderedAscending;
+ } else if (a.index > b.index) {
+ return NSOrderedDescending;
+ } else {
+ return NSOrderedSame;
+ }
+ }];
+
+ id<MTLArgumentEncoder> enc = [device newArgumentEncoderWithArguments:descriptors];
+ set.encoders[stage] = enc;
+ set.offsets[stage] = set.buffer_size;
+ set.buffer_size += enc.encodedLength;
+ }
+ }
+
+ r_shader_desc.specialization_constants.resize(binary_data.constants.size());
+ for (uint32_t i = 0; i < binary_data.constants.size(); i++) {
+ SpecializationConstantData &c = binary_data.constants[i];
+
+ ShaderSpecializationConstant sc;
+ sc.type = c.type;
+ sc.constant_id = c.constant_id;
+ sc.int_value = c.int_value;
+ sc.stages = c.stages;
+ r_shader_desc.specialization_constants.write[i] = sc;
+ }
+
+ MDShader *shader = nullptr;
+ if (binary_data.is_compute) {
+ MDComputeShader *cs = new MDComputeShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_COMPUTE]);
+
+ uint32_t *binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_COMPUTE);
+ if (binding) {
+ cs->push_constants.size = binary_data.push_constant.size;
+ cs->push_constants.binding = *binding;
+ }
+
+ cs->local = MTLSizeMake(binary_data.compute_local_size.x, binary_data.compute_local_size.y, binary_data.compute_local_size.z);
+#if DEV_ENABLED
+ cs->kernel_source = binary_data.stages[0].source;
+#endif
+ shader = cs;
+ } else {
+ MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]);
+
+ uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX);
+ if (vert_binding) {
+ rs->push_constants.vert.size = binary_data.push_constant.size;
+ rs->push_constants.vert.binding = *vert_binding;
+ }
+ uint32_t *frag_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_FRAGMENT);
+ if (frag_binding) {
+ rs->push_constants.frag.size = binary_data.push_constant.size;
+ rs->push_constants.frag.binding = *frag_binding;
+ }
+
+#if DEV_ENABLED
+ for (ShaderStageData &stage_data : binary_data.stages) {
+ if (stage_data.stage == ShaderStage::SHADER_STAGE_VERTEX) {
+ rs->vert_source = stage_data.source;
+ } else if (stage_data.stage == ShaderStage::SHADER_STAGE_FRAGMENT) {
+ rs->frag_source = stage_data.source;
+ }
+ }
+#endif
+ shader = rs;
+ }
+
+ r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask;
+ r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask;
+ r_shader_desc.is_compute = binary_data.is_compute;
+ r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x;
+ r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y;
+ r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z;
+ r_shader_desc.push_constant_size = binary_data.push_constant.size;
+
+ return ShaderID(shader);
+}
+
+void RenderingDeviceDriverMetal::shader_free(ShaderID p_shader) {
+ MDShader *obj = (MDShader *)p_shader.id;
+ delete obj;
+}
+
+void RenderingDeviceDriverMetal::shader_destroy_modules(ShaderID p_shader) {
+ // TODO.
+}
+
+/*********************/
+/**** UNIFORM SET ****/
+/*********************/
+
+RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index) {
+ MDUniformSet *set = new MDUniformSet();
+ Vector<BoundUniform> bound_uniforms;
+ bound_uniforms.resize(p_uniforms.size());
+ for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {
+ bound_uniforms.write[i] = p_uniforms[i];
+ }
+ set->uniforms = bound_uniforms;
+ set->index = p_set_index;
+
+ return UniformSetID(set);
+}
+
+void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) {
+ MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id;
+ delete obj;
+}
+
+void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
+}
+
+#pragma mark - Transfer
+
+void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {
+ MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
+ id<MTLBuffer> buffer = rid::get(p_buffer);
+
+ id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
+ [blit fillBuffer:buffer
+ range:NSMakeRange(p_offset, p_size)
+ value:0];
+}
+
+void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) {
+ MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
+ id<MTLBuffer> src = rid::get(p_src_buffer);
+ id<MTLBuffer> dst = rid::get(p_dst_buffer);
+
+ id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
+
+ for (uint32_t i = 0; i < p_regions.size(); i++) {
+ BufferCopyRegion region = p_regions[i];
+ [blit copyFromBuffer:src
+ sourceOffset:region.src_offset
+ toBuffer:dst
+ destinationOffset:region.dst_offset
+ size:region.size];
+ }
+}
+
+MTLSize MTLSizeFromVector3i(Vector3i p_size) {
+ return MTLSizeMake(p_size.x, p_size.y, p_size.z);
+}
+
+MTLOrigin MTLOriginFromVector3i(Vector3i p_origin) {
+ return MTLOriginMake(p_origin.x, p_origin.y, p_origin.z);
+}
+
+// Clamps the size so that the sum of the origin and size do not exceed the maximum size.
+static inline MTLSize clampMTLSize(MTLSize p_size, MTLOrigin p_origin, MTLSize p_max_size) {
+ MTLSize clamped;
+ clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x);
+ clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y);
+ clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z);
+ return clamped;
+}
+
+void RenderingDeviceDriverMetal::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) {
+ MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
+ id<MTLTexture> src = rid::get(p_src_texture);
+ id<MTLTexture> dst = rid::get(p_dst_texture);
+
+ id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
+ PixelFormats &pf = *pixel_formats;
+
+ MTLPixelFormat src_fmt = src.pixelFormat;
+ bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed;
+ MTLPixelFormat dst_fmt = dst.pixelFormat;
+ bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed;
+
+ // Validate copy.
+ if (src.sampleCount != dst.sampleCount || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) {
+ ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts.");
+ }
+
+ // If source and destination have different formats and at least one is compressed, a temporary buffer is required.
+ bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed);
+ if (need_tmp_buffer) {
+ ERR_FAIL_MSG("not implemented: copy with intermediate buffer");
+ }
+
+ if (src_fmt != dst_fmt) {
+ // Map the source pixel format to the dst through a texture view on the source texture.
+ src = [src newTextureViewWithPixelFormat:dst_fmt];
+ }
+
+ for (uint32_t i = 0; i < p_regions.size(); i++) {
+ TextureCopyRegion region = p_regions[i];
+
+ MTLSize extent = MTLSizeFromVector3i(region.size);
+
+ // If copies can be performed using direct texture-texture copying, do so.
+ uint32_t src_level = region.src_subresources.mipmap;
+ uint32_t src_base_layer = region.src_subresources.base_layer;
+ MTLSize src_extent = mipmapLevelSizeFromTexture(src, src_level);
+ uint32_t dst_level = region.dst_subresources.mipmap;
+ uint32_t dst_base_layer = region.dst_subresources.base_layer;
+ MTLSize dst_extent = mipmapLevelSizeFromTexture(dst, dst_level);
+
+ // All layers may be copied at once, if the extent completely covers both images.
+ if (src_extent == extent && dst_extent == extent) {
+ [blit copyFromTexture:src
+ sourceSlice:src_base_layer
+ sourceLevel:src_level
+ toTexture:dst
+ destinationSlice:dst_base_layer
+ destinationLevel:dst_level
+ sliceCount:region.src_subresources.layer_count
+ levelCount:1];
+ } else {
+ MTLOrigin src_origin = MTLOriginFromVector3i(region.src_offset);
+ MTLSize src_size = clampMTLSize(extent, src_origin, src_extent);
+ uint32_t layer_count = 0;
+ if ((src.textureType == MTLTextureType3D) != (dst.textureType == MTLTextureType3D)) {
+ // In the case, the number of layers to copy is in extent.depth. Use that value,
+ // then clamp the depth, so we don't try to copy more than Metal will allow.
+ layer_count = extent.depth;
+ src_size.depth = 1;
+ } else {
+ layer_count = region.src_subresources.layer_count;
+ }
+ MTLOrigin dst_origin = MTLOriginFromVector3i(region.dst_offset);
+
+ for (uint32_t layer = 0; layer < layer_count; layer++) {
+ // We can copy between a 3D and a 2D image easily. Just copy between
+ // one slice of the 2D image and one plane of the 3D image at a time.
+ if ((src.textureType == MTLTextureType3D) == (dst.textureType == MTLTextureType3D)) {
+ [blit copyFromTexture:src
+ sourceSlice:src_base_layer + layer
+ sourceLevel:src_level
+ sourceOrigin:src_origin
+ sourceSize:src_size
+ toTexture:dst
+ destinationSlice:dst_base_layer + layer
+ destinationLevel:dst_level
+ destinationOrigin:dst_origin];
+ } else if (src.textureType == MTLTextureType3D) {
+ [blit copyFromTexture:src
+ sourceSlice:src_base_layer
+ sourceLevel:src_level
+ sourceOrigin:MTLOriginMake(src_origin.x, src_origin.y, src_origin.z + layer)
+ sourceSize:src_size
+ toTexture:dst
+ destinationSlice:dst_base_layer + layer
+ destinationLevel:dst_level
+ destinationOrigin:dst_origin];
+ } else {
+ DEV_ASSERT(dst.textureType == MTLTextureType3D);
+ [blit copyFromTexture:src
+ sourceSlice:src_base_layer + layer
+ sourceLevel:src_level
+ sourceOrigin:src_origin
+ sourceSize:src_size
+ toTexture:dst
+ destinationSlice:dst_base_layer
+ destinationLevel:dst_level
+ destinationOrigin:MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z + layer)];
+ }
+ }
+ }
+ }
+}
+
+void RenderingDeviceDriverMetal::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ id<MTLTexture> src_tex = rid::get(p_src_texture);
+ id<MTLTexture> dst_tex = rid::get(p_dst_texture);
+
+ MTLRenderPassDescriptor *mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor];
+ MTLRenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD.colorAttachments[0];
+ mtlColorAttDesc.loadAction = MTLLoadActionLoad;
+ mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve;
+
+ mtlColorAttDesc.texture = src_tex;
+ mtlColorAttDesc.resolveTexture = dst_tex;
+ mtlColorAttDesc.level = p_src_mipmap;
+ mtlColorAttDesc.slice = p_src_layer;
+ mtlColorAttDesc.resolveLevel = p_dst_mipmap;
+ mtlColorAttDesc.resolveSlice = p_dst_layer;
+ cb->encodeRenderCommandEncoderWithDescriptor(mtlRPD, @"Resolve Image");
+}
+
+void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ id<MTLTexture> src_tex = rid::get(p_texture);
+
+ if (src_tex.parentTexture) {
+ // Clear via the parent texture rather than the view.
+ src_tex = src_tex.parentTexture;
+ }
+
+ PixelFormats &pf = *pixel_formats;
+
+ if (pf.isDepthFormat(src_tex.pixelFormat) || pf.isStencilFormat(src_tex.pixelFormat)) {
+ ERR_FAIL_MSG("invalid: depth or stencil texture format");
+ }
+
+ MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor;
+
+ if (p_subresources.aspect.has_flag(TEXTURE_ASPECT_COLOR_BIT)) {
+ MTLRenderPassColorAttachmentDescriptor *caDesc = desc.colorAttachments[0];
+ caDesc.texture = src_tex;
+ caDesc.loadAction = MTLLoadActionClear;
+ caDesc.storeAction = MTLStoreActionStore;
+ caDesc.clearColor = MTLClearColorMake(p_color.r, p_color.g, p_color.b, p_color.a);
+
+ // Extract the mipmap levels that are to be updated.
+ uint32_t mipLvlStart = p_subresources.base_mipmap;
+ uint32_t mipLvlCnt = p_subresources.mipmap_count;
+ uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt;
+
+ uint32_t levelCount = src_tex.mipmapLevelCount;
+
+ // Extract the cube or array layers (slices) that are to be updated.
+ bool is3D = src_tex.textureType == MTLTextureType3D;
+ uint32_t layerStart = is3D ? 0 : p_subresources.base_layer;
+ uint32_t layerCnt = p_subresources.layer_count;
+ uint32_t layerEnd = layerStart + layerCnt;
+
+ MetalFeatures const &features = (*metal_device_properties).features;
+
+ // Iterate across mipmap levels and layers, and perform and empty render to clear each.
+ for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
+ ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range");
+
+ caDesc.level = mipLvl;
+
+ // If a 3D image, we need to get the depth for each level.
+ if (is3D) {
+ layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth;
+ layerEnd = layerStart + layerCnt;
+ }
+
+ if ((features.layeredRendering && src_tex.sampleCount == 1) || features.multisampleLayeredRendering) {
+ // We can clear all layers at once.
+ if (is3D) {
+ caDesc.depthPlane = layerStart;
+ } else {
+ caDesc.slice = layerStart;
+ }
+ desc.renderTargetArrayLength = layerCnt;
+ cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image");
+ } else {
+ for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
+ if (is3D) {
+ caDesc.depthPlane = layer;
+ } else {
+ caDesc.slice = layer;
+ }
+ cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image");
+ }
+ }
+ }
+ }
+}
+
+API_AVAILABLE(macos(11.0), ios(14.0))
+bool isArrayTexture(MTLTextureType p_type) {
+ return (p_type == MTLTextureType3D ||
+ p_type == MTLTextureType2DArray ||
+ p_type == MTLTextureType2DMultisampleArray ||
+ p_type == MTLTextureType1DArray);
+}
+
+void RenderingDeviceDriverMetal::_copy_texture_buffer(CommandBufferID p_cmd_buffer,
+ CopySource p_source,
+ TextureID p_texture,
+ BufferID p_buffer,
+ VectorView<BufferTextureCopyRegion> p_regions) {
+ MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
+ id<MTLBuffer> buffer = rid::get(p_buffer);
+ id<MTLTexture> texture = rid::get(p_texture);
+
+ id<MTLBlitCommandEncoder> enc = cmd->blit_command_encoder();
+
+ PixelFormats &pf = *pixel_formats;
+ MTLPixelFormat mtlPixFmt = texture.pixelFormat;
+
+ MTLBlitOption options = MTLBlitOptionNone;
+ if (pf.isPVRTCFormat(mtlPixFmt)) {
+ options |= MTLBlitOptionRowLinearPVRTC;
+ }
+
+ for (uint32_t i = 0; i < p_regions.size(); i++) {
+ BufferTextureCopyRegion region = p_regions[i];
+
+ uint32_t mip_level = region.texture_subresources.mipmap;
+ MTLOrigin txt_origin = MTLOriginMake(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z);
+ MTLSize src_extent = mipmapLevelSizeFromTexture(texture, mip_level);
+ MTLSize txt_size = clampMTLSize(MTLSizeMake(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z),
+ txt_origin,
+ src_extent);
+
+ uint32_t buffImgWd = region.texture_region_size.x;
+ uint32_t buffImgHt = region.texture_region_size.y;
+
+ NSUInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd);
+ NSUInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt);
+
+ MTLBlitOption blit_options = options;
+
+ if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) {
+ bool want_depth = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_DEPTH_BIT);
+ bool want_stencil = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_STENCIL_BIT);
+
+ // The stencil component is always 1 byte per pixel.
+ // Don't reduce depths of 32-bit depth/stencil formats.
+ if (want_depth && !want_stencil) {
+ if (pf.getBytesPerTexel(mtlPixFmt) != 4) {
+ bytesPerRow -= buffImgWd;
+ bytesPerImg -= buffImgWd * buffImgHt;
+ }
+ blit_options |= MTLBlitOptionDepthFromDepthStencil;
+ } else if (want_stencil && !want_depth) {
+ bytesPerRow = buffImgWd;
+ bytesPerImg = buffImgWd * buffImgHt;
+ blit_options |= MTLBlitOptionStencilFromDepthStencil;
+ }
+ }
+
+ if (!isArrayTexture(texture.textureType)) {
+ bytesPerImg = 0;
+ }
+
+ if (p_source == CopySource::Buffer) {
+ for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) {
+ [enc copyFromBuffer:buffer
+ sourceOffset:region.buffer_offset + (bytesPerImg * lyrIdx)
+ sourceBytesPerRow:bytesPerRow
+ sourceBytesPerImage:bytesPerImg
+ sourceSize:txt_size
+ toTexture:texture
+ destinationSlice:region.texture_subresources.base_layer + lyrIdx
+ destinationLevel:mip_level
+ destinationOrigin:txt_origin
+ options:blit_options];
+ }
+ } else {
+ for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) {
+ [enc copyFromTexture:texture
+ sourceSlice:region.texture_subresources.base_layer + lyrIdx
+ sourceLevel:mip_level
+ sourceOrigin:txt_origin
+ sourceSize:txt_size
+ toBuffer:buffer
+ destinationOffset:region.buffer_offset + (bytesPerImg * lyrIdx)
+ destinationBytesPerRow:bytesPerRow
+ destinationBytesPerImage:bytesPerImg
+ options:blit_options];
+ }
+ }
+ }
+}
+
+void RenderingDeviceDriverMetal::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) {
+ _copy_texture_buffer(p_cmd_buffer, CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions);
+}
+
+void RenderingDeviceDriverMetal::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) {
+ _copy_texture_buffer(p_cmd_buffer, CopySource::Texture, p_src_texture, p_dst_buffer, p_regions);
+}
+
+#pragma mark - Pipeline
+
+void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {
+ MDPipeline *obj = (MDPipeline *)(p_pipeline_id.id);
+ delete obj;
+}
+
+// ----- BINDING -----
+
+void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ MDShader *shader = (MDShader *)(p_shader.id);
+ shader->encode_push_constant_data(p_data, cb);
+}
+
+// ----- CACHE -----
+
+String RenderingDeviceDriverMetal::_pipeline_get_cache_path() const {
+ String path = OS::get_singleton()->get_user_data_dir() + "/metal/pipelines";
+ path += "." + context_device.name.validate_filename().replace(" ", "_").to_lower();
+ if (Engine::get_singleton()->is_editor_hint()) {
+ path += ".editor";
+ }
+ path += ".cache";
+
+ return path;
+}
+
+bool RenderingDeviceDriverMetal::pipeline_cache_create(const Vector<uint8_t> &p_data) {
+ return false;
+ CharString path = _pipeline_get_cache_path().utf8();
+ NSString *nPath = [[NSString alloc] initWithBytesNoCopy:path.ptrw()
+ length:path.length()
+ encoding:NSUTF8StringEncoding
+ freeWhenDone:NO];
+ MTLBinaryArchiveDescriptor *desc = [MTLBinaryArchiveDescriptor new];
+ if ([[NSFileManager defaultManager] fileExistsAtPath:nPath]) {
+ desc.url = [NSURL fileURLWithPath:nPath];
+ }
+ NSError *error = nil;
+ archive = [device newBinaryArchiveWithDescriptor:desc error:&error];
+ return true;
+}
+
+void RenderingDeviceDriverMetal::pipeline_cache_free() {
+ archive = nil;
+}
+
+size_t RenderingDeviceDriverMetal::pipeline_cache_query_size() {
+ return archive_count * 1024;
+}
+
+Vector<uint8_t> RenderingDeviceDriverMetal::pipeline_cache_serialize() {
+ if (!archive) {
+ return Vector<uint8_t>();
+ }
+
+ CharString path = _pipeline_get_cache_path().utf8();
+
+ NSString *nPath = [[NSString alloc] initWithBytesNoCopy:path.ptrw()
+ length:path.length()
+ encoding:NSUTF8StringEncoding
+ freeWhenDone:NO];
+ NSURL *target = [NSURL fileURLWithPath:nPath];
+ NSError *error = nil;
+ if ([archive serializeToURL:target error:&error]) {
+ return Vector<uint8_t>();
+ } else {
+ print_line(error.localizedDescription.UTF8String);
+ return Vector<uint8_t>();
+ }
+}
+
+#pragma mark - Rendering
+
+// ----- SUBPASS -----
+
+RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count) {
+ PixelFormats &pf = *pixel_formats;
+
+ size_t subpass_count = p_subpasses.size();
+
+ Vector<MDSubpass> subpasses;
+ subpasses.resize(subpass_count);
+ for (uint32_t i = 0; i < subpass_count; i++) {
+ MDSubpass &subpass = subpasses.write[i];
+ subpass.subpass_index = i;
+ subpass.input_references = p_subpasses[i].input_references;
+ subpass.color_references = p_subpasses[i].color_references;
+ subpass.depth_stencil_reference = p_subpasses[i].depth_stencil_reference;
+ subpass.resolve_references = p_subpasses[i].resolve_references;
+ }
+
+ static const MTLLoadAction LOAD_ACTIONS[] = {
+ [ATTACHMENT_LOAD_OP_LOAD] = MTLLoadActionLoad,
+ [ATTACHMENT_LOAD_OP_CLEAR] = MTLLoadActionClear,
+ [ATTACHMENT_LOAD_OP_DONT_CARE] = MTLLoadActionDontCare,
+ };
+
+ static const MTLStoreAction STORE_ACTIONS[] = {
+ [ATTACHMENT_STORE_OP_STORE] = MTLStoreActionStore,
+ [ATTACHMENT_STORE_OP_DONT_CARE] = MTLStoreActionDontCare,
+ };
+
+ Vector<MDAttachment> attachments;
+ attachments.resize(p_attachments.size());
+
+ for (uint32_t i = 0; i < p_attachments.size(); i++) {
+ Attachment const &a = p_attachments[i];
+ MDAttachment &mda = attachments.write[i];
+ MTLPixelFormat format = pf.getMTLPixelFormat(a.format);
+ mda.format = format;
+ if (a.samples > TEXTURE_SAMPLES_1) {
+ mda.samples = (*metal_device_properties).find_nearest_supported_sample_count(a.samples);
+ }
+ mda.loadAction = LOAD_ACTIONS[a.load_op];
+ mda.storeAction = STORE_ACTIONS[a.store_op];
+ bool is_depth = pf.isDepthFormat(format);
+ if (is_depth) {
+ mda.type |= MDAttachmentType::Depth;
+ }
+ bool is_stencil = pf.isStencilFormat(format);
+ if (is_stencil) {
+ mda.type |= MDAttachmentType::Stencil;
+ mda.stencilLoadAction = LOAD_ACTIONS[a.stencil_load_op];
+ mda.stencilStoreAction = STORE_ACTIONS[a.stencil_store_op];
+ }
+ if (!is_depth && !is_stencil) {
+ mda.type |= MDAttachmentType::Color;
+ }
+ }
+ MDRenderPass *obj = new MDRenderPass(attachments, subpasses);
+ return RenderPassID(obj);
+}
+
+void RenderingDeviceDriverMetal::render_pass_free(RenderPassID p_render_pass) {
+ MDRenderPass *obj = (MDRenderPass *)(p_render_pass.id);
+ delete obj;
+}
+
+// ----- COMMANDS -----
+
+void RenderingDeviceDriverMetal::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_begin_pass(p_render_pass, p_framebuffer, p_cmd_buffer_type, p_rect, p_clear_values);
+}
+
+void RenderingDeviceDriverMetal::command_end_render_pass(CommandBufferID p_cmd_buffer) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_end_pass();
+}
+
+void RenderingDeviceDriverMetal::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_next_subpass();
+}
+
+void RenderingDeviceDriverMetal::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_set_viewport(p_viewports);
+}
+
+void RenderingDeviceDriverMetal::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_set_scissor(p_scissors);
+}
+
+void RenderingDeviceDriverMetal::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_clear_attachments(p_attachment_clears, p_rects);
+}
+
+void RenderingDeviceDriverMetal::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->bind_pipeline(p_pipeline);
+}
+
+void RenderingDeviceDriverMetal::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_bind_uniform_set(p_uniform_set, p_shader, p_set_index);
+}
+
+void RenderingDeviceDriverMetal::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_draw(p_vertex_count, p_instance_count, p_base_vertex, p_first_instance);
+}
+
+void RenderingDeviceDriverMetal::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_draw_indexed(p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance);
+}
+
+void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_draw_indexed_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride);
+}
+
+void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_draw_indexed_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);
+}
+
+void RenderingDeviceDriverMetal::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_draw_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride);
+}
+
+void RenderingDeviceDriverMetal::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_draw_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);
+}
+
+void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets);
+}
+
+void RenderingDeviceDriverMetal::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_bind_index_buffer(p_buffer, p_format, p_offset);
+}
+
+void RenderingDeviceDriverMetal::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->render_set_blend_constants(p_constants);
+}
+
+void RenderingDeviceDriverMetal::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) {
+ if (!Math::is_equal_approx(p_width, 1.0f)) {
+ ERR_FAIL_MSG("Setting line widths other than 1.0 is not supported by the Metal rendering driver.");
+ }
+}
+
+// ----- PIPELINE -----
+
+RenderingDeviceDriverMetal::Result<id<MTLFunction>> RenderingDeviceDriverMetal::_create_function(MDLibrary *p_library, NSString *p_name, VectorView<PipelineSpecializationConstant> &p_specialization_constants) {
+ id<MTLLibrary> library = p_library.library;
+ if (!library) {
+ ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to compile Metal library");
+ }
+
+ id<MTLFunction> function = [library newFunctionWithName:p_name];
+ ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, "No function named main0");
+
+ if (function.functionConstantsDictionary.count == 0) {
+ return function;
+ }
+
+ NSArray<MTLFunctionConstant *> *constants = function.functionConstantsDictionary.allValues;
+ bool is_sorted = true;
+ for (uint32_t i = 1; i < constants.count; i++) {
+ if (constants[i - 1].index > constants[i].index) {
+ is_sorted = false;
+ break;
+ }
+ }
+
+ if (!is_sorted) {
+ constants = [constants sortedArrayUsingComparator:^NSComparisonResult(MTLFunctionConstant *a, MTLFunctionConstant *b) {
+ if (a.index < b.index) {
+ return NSOrderedAscending;
+ } else if (a.index > b.index) {
+ return NSOrderedDescending;
+ } else {
+ return NSOrderedSame;
+ }
+ }];
+ }
+
+ // Initialize an array of integers representing the indexes of p_specialization_constants
+ uint32_t *indexes = (uint32_t *)alloca(p_specialization_constants.size() * sizeof(uint32_t));
+ for (uint32_t i = 0; i < p_specialization_constants.size(); i++) {
+ indexes[i] = i;
+ }
+ // Sort the array of integers based on the values in p_specialization_constants
+ std::sort(indexes, &indexes[p_specialization_constants.size()], [&](int a, int b) {
+ return p_specialization_constants[a].constant_id < p_specialization_constants[b].constant_id;
+ });
+
+ MTLFunctionConstantValues *constantValues = [MTLFunctionConstantValues new];
+ uint32_t i = 0;
+ uint32_t j = 0;
+ while (i < constants.count && j < p_specialization_constants.size()) {
+ MTLFunctionConstant *curr = constants[i];
+ PipelineSpecializationConstant const &sc = p_specialization_constants[indexes[j]];
+ if (curr.index == sc.constant_id) {
+ switch (curr.type) {
+ case MTLDataTypeBool:
+ case MTLDataTypeFloat:
+ case MTLDataTypeInt:
+ case MTLDataTypeUInt: {
+ [constantValues setConstantValue:&sc.int_value
+ type:curr.type
+ atIndex:sc.constant_id];
+ } break;
+ default:
+ ERR_FAIL_V_MSG(function, "Invalid specialization constant type");
+ }
+ i++;
+ j++;
+ } else if (curr.index < sc.constant_id) {
+ i++;
+ } else {
+ j++;
+ }
+ }
+
+ if (i != constants.count) {
+ MTLFunctionConstant *curr = constants[i];
+ if (curr.index == R32UI_ALIGNMENT_CONSTANT_ID) {
+ uint32_t alignment = 16; // TODO(sgc): is this always correct?
+ [constantValues setConstantValue:&alignment
+ type:curr.type
+ atIndex:curr.index];
+ i++;
+ }
+ }
+
+ NSError *err = nil;
+ function = [library newFunctionWithName:@"main0"
+ constantValues:constantValues
+ error:&err];
+ ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, String("specialized function failed: ") + err.localizedDescription.UTF8String);
+
+ return function;
+}
+
+// RDD::PolygonCullMode == MTLCullMode.
+static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_DISABLED, MTLCullModeNone));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_FRONT, MTLCullModeFront));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_BACK, MTLCullModeBack));
+
+// RDD::StencilOperation == MTLStencilOperation.
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_KEEP, MTLStencilOperationKeep));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_ZERO, MTLStencilOperationZero));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_REPLACE, MTLStencilOperationReplace));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_CLAMP, MTLStencilOperationIncrementClamp));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_CLAMP, MTLStencilOperationDecrementClamp));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INVERT, MTLStencilOperationInvert));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_WRAP, MTLStencilOperationIncrementWrap));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_WRAP, MTLStencilOperationDecrementWrap));
+
+// RDD::BlendOperation == MTLBlendOperation.
+static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_ADD, MTLBlendOperationAdd));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_SUBTRACT, MTLBlendOperationSubtract));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_REVERSE_SUBTRACT, MTLBlendOperationReverseSubtract));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MINIMUM, MTLBlendOperationMin));
+static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MAXIMUM, MTLBlendOperationMax));
+
+RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create(
+ ShaderID p_shader,
+ VertexFormatID p_vertex_format,
+ RenderPrimitive p_render_primitive,
+ PipelineRasterizationState p_rasterization_state,
+ PipelineMultisampleState p_multisample_state,
+ PipelineDepthStencilState p_depth_stencil_state,
+ PipelineColorBlendState p_blend_state,
+ VectorView<int32_t> p_color_attachments,
+ BitField<PipelineDynamicStateFlags> p_dynamic_state,
+ RenderPassID p_render_pass,
+ uint32_t p_render_subpass,
+ VectorView<PipelineSpecializationConstant> p_specialization_constants) {
+ MDRenderShader *shader = (MDRenderShader *)(p_shader.id);
+ MTLVertexDescriptor *vert_desc = rid::get(p_vertex_format);
+ MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
+
+ os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader);
+ os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "render_pipeline_create", "shader_name=%{public}s", shader->name.get_data());
+ DEFER([=]() {
+ os_signpost_interval_end(LOG_INTERVALS, reflect_id, "render_pipeline_create");
+ });
+
+ os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline");
+
+ MTLRenderPipelineDescriptor *desc = [MTLRenderPipelineDescriptor new];
+
+ {
+ MDSubpass const &subpass = pass->subpasses[p_render_subpass];
+ for (uint32_t i = 0; i < subpass.color_references.size(); i++) {
+ uint32_t attachment = subpass.color_references[i].attachment;
+ if (attachment != AttachmentReference::UNUSED) {
+ MDAttachment const &a = pass->attachments[attachment];
+ desc.colorAttachments[i].pixelFormat = a.format;
+ }
+ }
+
+ if (subpass.depth_stencil_reference.attachment != AttachmentReference::UNUSED) {
+ uint32_t attachment = subpass.depth_stencil_reference.attachment;
+ MDAttachment const &a = pass->attachments[attachment];
+
+ if (a.type & MDAttachmentType::Depth) {
+ desc.depthAttachmentPixelFormat = a.format;
+ }
+
+ if (a.type & MDAttachmentType::Stencil) {
+ desc.stencilAttachmentPixelFormat = a.format;
+ }
+ }
+ }
+
+ desc.vertexDescriptor = vert_desc;
+ desc.label = [NSString stringWithUTF8String:shader->name.get_data()];
+
+ // Input assembly & tessellation.
+
+ MDRenderPipeline *pipeline = new MDRenderPipeline();
+
+ switch (p_render_primitive) {
+ case RENDER_PRIMITIVE_POINTS:
+ desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassPoint;
+ break;
+ case RENDER_PRIMITIVE_LINES:
+ case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY:
+ case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY:
+ case RENDER_PRIMITIVE_LINESTRIPS:
+ desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassLine;
+ break;
+ case RENDER_PRIMITIVE_TRIANGLES:
+ case RENDER_PRIMITIVE_TRIANGLE_STRIPS:
+ case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY:
+ case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY:
+ case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX:
+ desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle;
+ break;
+ case RENDER_PRIMITIVE_TESSELATION_PATCH:
+ desc.maxTessellationFactor = p_rasterization_state.patch_control_points;
+ desc.tessellationPartitionMode = MTLTessellationPartitionModeInteger;
+ ERR_FAIL_V_MSG(PipelineID(), "tessellation not implemented");
+ break;
+ case RENDER_PRIMITIVE_MAX:
+ default:
+ desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified;
+ break;
+ }
+
+ switch (p_render_primitive) {
+ case RENDER_PRIMITIVE_POINTS:
+ pipeline->raster_state.render_primitive = MTLPrimitiveTypePoint;
+ break;
+ case RENDER_PRIMITIVE_LINES:
+ case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY:
+ pipeline->raster_state.render_primitive = MTLPrimitiveTypeLine;
+ break;
+ case RENDER_PRIMITIVE_LINESTRIPS:
+ case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY:
+ pipeline->raster_state.render_primitive = MTLPrimitiveTypeLineStrip;
+ break;
+ case RENDER_PRIMITIVE_TRIANGLES:
+ case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY:
+ pipeline->raster_state.render_primitive = MTLPrimitiveTypeTriangle;
+ break;
+ case RENDER_PRIMITIVE_TRIANGLE_STRIPS:
+ case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY:
+ case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX:
+ pipeline->raster_state.render_primitive = MTLPrimitiveTypeTriangleStrip;
+ break;
+ default:
+ break;
+ }
+
+ // Rasterization.
+ desc.rasterizationEnabled = !p_rasterization_state.discard_primitives;
+ pipeline->raster_state.clip_mode = p_rasterization_state.enable_depth_clamp ? MTLDepthClipModeClamp : MTLDepthClipModeClip;
+ pipeline->raster_state.fill_mode = p_rasterization_state.wireframe ? MTLTriangleFillModeLines : MTLTriangleFillModeFill;
+
+ static const MTLCullMode CULL_MODE[3] = {
+ MTLCullModeNone,
+ MTLCullModeFront,
+ MTLCullModeBack,
+ };
+ pipeline->raster_state.cull_mode = CULL_MODE[p_rasterization_state.cull_mode];
+ pipeline->raster_state.winding = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE) ? MTLWindingClockwise : MTLWindingCounterClockwise;
+ pipeline->raster_state.depth_bias.enabled = p_rasterization_state.depth_bias_enabled;
+ pipeline->raster_state.depth_bias.depth_bias = p_rasterization_state.depth_bias_constant_factor;
+ pipeline->raster_state.depth_bias.slope_scale = p_rasterization_state.depth_bias_slope_factor;
+ pipeline->raster_state.depth_bias.clamp = p_rasterization_state.depth_bias_clamp;
+ // In Metal there is no line width.
+ if (!Math::is_equal_approx(p_rasterization_state.line_width, 1.0f)) {
+ WARN_PRINT("unsupported: line width");
+ }
+
+ // Multisample.
+ if (p_multisample_state.enable_sample_shading) {
+ WARN_PRINT("unsupported: multi-sample shading");
+ }
+
+ if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) {
+ pipeline->sample_count = (*metal_device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count);
+ }
+ desc.rasterSampleCount = static_cast<NSUInteger>(pipeline->sample_count);
+ desc.alphaToCoverageEnabled = p_multisample_state.enable_alpha_to_coverage;
+ desc.alphaToOneEnabled = p_multisample_state.enable_alpha_to_one;
+
+ // Depth stencil.
+ if (p_depth_stencil_state.enable_depth_test && desc.depthAttachmentPixelFormat != MTLPixelFormatInvalid) {
+ pipeline->raster_state.depth_test.enabled = true;
+ MTLDepthStencilDescriptor *ds_desc = [MTLDepthStencilDescriptor new];
+ ds_desc.depthWriteEnabled = p_depth_stencil_state.enable_depth_write;
+ ds_desc.depthCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.depth_compare_operator];
+ if (p_depth_stencil_state.enable_depth_range) {
+ WARN_PRINT("unsupported: depth range");
+ }
+
+ if (p_depth_stencil_state.enable_stencil) {
+ pipeline->raster_state.stencil.front_reference = p_depth_stencil_state.front_op.reference;
+ pipeline->raster_state.stencil.back_reference = p_depth_stencil_state.back_op.reference;
+
+ {
+ // Front.
+ MTLStencilDescriptor *sd = [MTLStencilDescriptor new];
+ sd.stencilFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.fail];
+ sd.depthStencilPassOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.pass];
+ sd.depthFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.front_op.depth_fail];
+ sd.stencilCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.front_op.compare];
+ sd.readMask = p_depth_stencil_state.front_op.compare_mask;
+ sd.writeMask = p_depth_stencil_state.front_op.write_mask;
+ ds_desc.frontFaceStencil = sd;
+ }
+ {
+ // Back.
+ MTLStencilDescriptor *sd = [MTLStencilDescriptor new];
+ sd.stencilFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.fail];
+ sd.depthStencilPassOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.pass];
+ sd.depthFailureOperation = STENCIL_OPERATIONS[p_depth_stencil_state.back_op.depth_fail];
+ sd.stencilCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.back_op.compare];
+ sd.readMask = p_depth_stencil_state.back_op.compare_mask;
+ sd.writeMask = p_depth_stencil_state.back_op.write_mask;
+ ds_desc.backFaceStencil = sd;
+ }
+ }
+
+ pipeline->depth_stencil = [device newDepthStencilStateWithDescriptor:ds_desc];
+ ERR_FAIL_NULL_V_MSG(pipeline->depth_stencil, PipelineID(), "Failed to create depth stencil state");
+ } else {
+ // TODO(sgc): FB13671991 raised as Apple docs state calling setDepthStencilState:nil is valid, but currently generates an exception
+ pipeline->depth_stencil = get_resource_cache().get_depth_stencil_state(false, false);
+ }
+
+ // Blend state.
+ {
+ for (uint32_t i = 0; i < p_color_attachments.size(); i++) {
+ if (p_color_attachments[i] == ATTACHMENT_UNUSED) {
+ continue;
+ }
+
+ const PipelineColorBlendState::Attachment &bs = p_blend_state.attachments[i];
+
+ MTLRenderPipelineColorAttachmentDescriptor *ca_desc = desc.colorAttachments[p_color_attachments[i]];
+ ca_desc.blendingEnabled = bs.enable_blend;
+
+ ca_desc.sourceRGBBlendFactor = BLEND_FACTORS[bs.src_color_blend_factor];
+ ca_desc.destinationRGBBlendFactor = BLEND_FACTORS[bs.dst_color_blend_factor];
+ ca_desc.rgbBlendOperation = BLEND_OPERATIONS[bs.color_blend_op];
+
+ ca_desc.sourceAlphaBlendFactor = BLEND_FACTORS[bs.src_alpha_blend_factor];
+ ca_desc.destinationAlphaBlendFactor = BLEND_FACTORS[bs.dst_alpha_blend_factor];
+ ca_desc.alphaBlendOperation = BLEND_OPERATIONS[bs.alpha_blend_op];
+
+ ca_desc.writeMask = MTLColorWriteMaskNone;
+ if (bs.write_r) {
+ ca_desc.writeMask |= MTLColorWriteMaskRed;
+ }
+ if (bs.write_g) {
+ ca_desc.writeMask |= MTLColorWriteMaskGreen;
+ }
+ if (bs.write_b) {
+ ca_desc.writeMask |= MTLColorWriteMaskBlue;
+ }
+ if (bs.write_a) {
+ ca_desc.writeMask |= MTLColorWriteMaskAlpha;
+ }
+ }
+
+ pipeline->raster_state.blend.r = p_blend_state.blend_constant.r;
+ pipeline->raster_state.blend.g = p_blend_state.blend_constant.g;
+ pipeline->raster_state.blend.b = p_blend_state.blend_constant.b;
+ pipeline->raster_state.blend.a = p_blend_state.blend_constant.a;
+ }
+
+ // Dynamic state.
+
+ if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BIAS)) {
+ pipeline->raster_state.depth_bias.enabled = true;
+ }
+
+ if (p_dynamic_state.has_flag(DYNAMIC_STATE_BLEND_CONSTANTS)) {
+ pipeline->raster_state.blend.enabled = true;
+ }
+
+ if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BOUNDS)) {
+ // TODO(sgc): ??
+ }
+
+ if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
+ // TODO(sgc): ??
+ }
+
+ if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
+ // TODO(sgc): ??
+ }
+
+ if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_REFERENCE)) {
+ pipeline->raster_state.stencil.enabled = true;
+ }
+
+ if (shader->vert != nil) {
+ Result<id<MTLFunction>> function_or_err = _create_function(shader->vert, @"main0", p_specialization_constants);
+ ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());
+ desc.vertexFunction = std::get<id<MTLFunction>>(function_or_err);
+ }
+
+ if (shader->frag != nil) {
+ Result<id<MTLFunction>> function_or_err = _create_function(shader->frag, @"main0", p_specialization_constants);
+ ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());
+ desc.fragmentFunction = std::get<id<MTLFunction>>(function_or_err);
+ }
+
+ if (archive) {
+ desc.binaryArchives = @[ archive ];
+ }
+
+ NSError *error = nil;
+ pipeline->state = [device newRenderPipelineStateWithDescriptor:desc
+ error:&error];
+ pipeline->shader = shader;
+
+ ERR_FAIL_COND_V_MSG(error != nil, PipelineID(), ([NSString stringWithFormat:@"error creating pipeline: %@", error.localizedDescription].UTF8String));
+
+ if (archive) {
+ if ([archive addRenderPipelineFunctionsWithDescriptor:desc error:&error]) {
+ archive_count += 1;
+ } else {
+ print_error(error.localizedDescription.UTF8String);
+ }
+ }
+
+ return PipelineID(pipeline);
+}
+
+#pragma mark - Compute
+
+// ----- COMMANDS -----
+
+void RenderingDeviceDriverMetal::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->bind_pipeline(p_pipeline);
+}
+
+void RenderingDeviceDriverMetal::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->compute_bind_uniform_set(p_uniform_set, p_shader, p_set_index);
+}
+
+void RenderingDeviceDriverMetal::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->compute_dispatch(p_x_groups, p_y_groups, p_z_groups);
+}
+
+void RenderingDeviceDriverMetal::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ cb->compute_dispatch_indirect(p_indirect_buffer, p_offset);
+}
+
+// ----- PIPELINE -----
+
+RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {
+ MDComputeShader *shader = (MDComputeShader *)(p_shader.id);
+
+ os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader);
+ os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "compute_pipeline_create", "shader_name=%{public}s", shader->name.get_data());
+ DEFER([=]() {
+ os_signpost_interval_end(LOG_INTERVALS, reflect_id, "compute_pipeline_create");
+ });
+
+ os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline");
+
+ Result<id<MTLFunction>> function_or_err = _create_function(shader->kernel, @"main0", p_specialization_constants);
+ ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());
+ id<MTLFunction> function = std::get<id<MTLFunction>>(function_or_err);
+
+ MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new];
+ desc.computeFunction = function;
+ if (archive) {
+ desc.binaryArchives = @[ archive ];
+ }
+
+ NSError *error;
+ id<MTLComputePipelineState> state = [device newComputePipelineStateWithDescriptor:desc
+ options:MTLPipelineOptionNone
+ reflection:nil
+ error:&error];
+ ERR_FAIL_COND_V_MSG(error != nil, PipelineID(), ([NSString stringWithFormat:@"error creating pipeline: %@", error.localizedDescription].UTF8String));
+
+ MDComputePipeline *pipeline = new MDComputePipeline(state);
+ pipeline->compute_state.local = shader->local;
+ pipeline->shader = shader;
+
+ if (archive) {
+ if ([archive addComputePipelineFunctionsWithDescriptor:desc error:&error]) {
+ archive_count += 1;
+ } else {
+ print_error(error.localizedDescription.UTF8String);
+ }
+ }
+
+ return PipelineID(pipeline);
+}
+
+#pragma mark - Queries
+
+// ----- TIMESTAMP -----
+
+RDD::QueryPoolID RenderingDeviceDriverMetal::timestamp_query_pool_create(uint32_t p_query_count) {
+ return QueryPoolID(1);
+}
+
+void RenderingDeviceDriverMetal::timestamp_query_pool_free(QueryPoolID p_pool_id) {
+}
+
+void RenderingDeviceDriverMetal::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) {
+ // Metal doesn't support timestamp queries, so we just clear the buffer.
+ bzero(r_results, p_query_count * sizeof(uint64_t));
+}
+
+uint64_t RenderingDeviceDriverMetal::timestamp_query_result_to_time(uint64_t p_result) {
+ return p_result;
+}
+
+void RenderingDeviceDriverMetal::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) {
+}
+
+void RenderingDeviceDriverMetal::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) {
+}
+
+#pragma mark - Labels
+
+void RenderingDeviceDriverMetal::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)p_label_name length:strlen(p_label_name) encoding:NSUTF8StringEncoding freeWhenDone:NO];
+ [cb->get_command_buffer() pushDebugGroup:s];
+}
+
+void RenderingDeviceDriverMetal::command_end_label(CommandBufferID p_cmd_buffer) {
+ MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
+ [cb->get_command_buffer() popDebugGroup];
+}
+
+#pragma mark - Debug
+
+void RenderingDeviceDriverMetal::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) {
+ // TODO: Implement.
+}
+
+#pragma mark - Submission
+
+void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) {
+}
+
+void RenderingDeviceDriverMetal::end_segment() {
+}
+
+#pragma mark - Misc
+
+void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) {
+ switch (p_type) {
+ case OBJECT_TYPE_TEXTURE: {
+ id<MTLTexture> tex = rid::get(p_driver_id);
+ tex.label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
+ } break;
+ case OBJECT_TYPE_SAMPLER: {
+ // Can't set label after creation.
+ } break;
+ case OBJECT_TYPE_BUFFER: {
+ id<MTLBuffer> buffer = rid::get(p_driver_id);
+ buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
+ } break;
+ case OBJECT_TYPE_SHADER: {
+ NSString *label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
+ MDShader *shader = (MDShader *)(p_driver_id.id);
+ if (MDRenderShader *rs = dynamic_cast<MDRenderShader *>(shader); rs != nullptr) {
+ [rs->vert setLabel:label];
+ [rs->frag setLabel:label];
+ } else if (MDComputeShader *cs = dynamic_cast<MDComputeShader *>(shader); cs != nullptr) {
+ [cs->kernel setLabel:label];
+ } else {
+ DEV_ASSERT(false);
+ }
+ } break;
+ case OBJECT_TYPE_UNIFORM_SET: {
+ MDUniformSet *set = (MDUniformSet *)(p_driver_id.id);
+ for (KeyValue<MDShader *, BoundUniformSet> &keyval : set->bound_uniforms) {
+ keyval.value.buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
+ }
+ } break;
+ case OBJECT_TYPE_PIPELINE: {
+ // Can't set label after creation.
+ } break;
+ default: {
+ DEV_ASSERT(false);
+ }
+ }
+}
+
+uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p_type, ID p_driver_id) {
+ switch (p_type) {
+ case DRIVER_RESOURCE_LOGICAL_DEVICE: {
+ return 0;
+ }
+ case DRIVER_RESOURCE_PHYSICAL_DEVICE: {
+ return 0;
+ }
+ case DRIVER_RESOURCE_TOPMOST_OBJECT: {
+ return 0;
+ }
+ case DRIVER_RESOURCE_COMMAND_QUEUE: {
+ return 0;
+ }
+ case DRIVER_RESOURCE_QUEUE_FAMILY: {
+ return 0;
+ }
+ case DRIVER_RESOURCE_TEXTURE: {
+ return p_driver_id.id;
+ }
+ case DRIVER_RESOURCE_TEXTURE_VIEW: {
+ return p_driver_id.id;
+ }
+ case DRIVER_RESOURCE_TEXTURE_DATA_FORMAT: {
+ return 0;
+ }
+ case DRIVER_RESOURCE_SAMPLER: {
+ return p_driver_id.id;
+ }
+ case DRIVER_RESOURCE_UNIFORM_SET:
+ return 0;
+ case DRIVER_RESOURCE_BUFFER: {
+ return p_driver_id.id;
+ }
+ case DRIVER_RESOURCE_COMPUTE_PIPELINE:
+ return 0;
+ case DRIVER_RESOURCE_RENDER_PIPELINE:
+ return 0;
+ default: {
+ return 0;
+ }
+ }
+}
+
+uint64_t RenderingDeviceDriverMetal::get_total_memory_used() {
+ return device.currentAllocatedSize;
+}
+
+uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
+ MetalDeviceProperties const &props = (*metal_device_properties);
+ MetalLimits const &limits = props.limits;
+
+#if defined(DEV_ENABLED)
+#define UNKNOWN(NAME) \
+ case NAME: \
+ WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \
+ return (uint64_t)1 << 30;
+#else
+#define UNKNOWN(NAME) \
+ case NAME: \
+ return (uint64_t)1 << 30
+#endif
+
+ // clang-format off
+ switch (p_limit) {
+ case LIMIT_MAX_BOUND_UNIFORM_SETS:
+ return limits.maxBoundDescriptorSets;
+ case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS:
+ return limits.maxColorAttachments;
+ case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET:
+ return limits.maxTexturesPerArgumentBuffer;
+ case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET:
+ return limits.maxSamplersPerArgumentBuffer;
+ case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET:
+ return limits.maxBuffersPerArgumentBuffer;
+ case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET:
+ return limits.maxTexturesPerArgumentBuffer;
+ case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET:
+ return limits.maxBuffersPerArgumentBuffer;
+ case LIMIT_MAX_DRAW_INDEXED_INDEX:
+ return limits.maxDrawIndexedIndexValue;
+ case LIMIT_MAX_FRAMEBUFFER_HEIGHT:
+ return limits.maxFramebufferHeight;
+ case LIMIT_MAX_FRAMEBUFFER_WIDTH:
+ return limits.maxFramebufferWidth;
+ case LIMIT_MAX_TEXTURE_ARRAY_LAYERS:
+ return limits.maxImageArrayLayers;
+ case LIMIT_MAX_TEXTURE_SIZE_1D:
+ return limits.maxImageDimension1D;
+ case LIMIT_MAX_TEXTURE_SIZE_2D:
+ return limits.maxImageDimension2D;
+ case LIMIT_MAX_TEXTURE_SIZE_3D:
+ return limits.maxImageDimension3D;
+ case LIMIT_MAX_TEXTURE_SIZE_CUBE:
+ return limits.maxImageDimensionCube;
+ case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE:
+ return limits.maxTexturesPerArgumentBuffer;
+ case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE:
+ return limits.maxSamplersPerArgumentBuffer;
+ case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE:
+ return limits.maxBuffersPerArgumentBuffer;
+ case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE:
+ return limits.maxTexturesPerArgumentBuffer;
+ case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE:
+ return limits.maxBuffersPerArgumentBuffer;
+ case LIMIT_MAX_PUSH_CONSTANT_SIZE:
+ return limits.maxBufferLength;
+ case LIMIT_MAX_UNIFORM_BUFFER_SIZE:
+ return limits.maxBufferLength;
+ case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET:
+ return limits.maxVertexDescriptorLayoutStride;
+ case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES:
+ return limits.maxVertexInputAttributes;
+ case LIMIT_MAX_VERTEX_INPUT_BINDINGS:
+ return limits.maxVertexInputBindings;
+ case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE:
+ return limits.maxVertexInputBindingStride;
+ case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT:
+ return limits.minUniformBufferOffsetAlignment;
+ case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X:
+ return limits.maxComputeWorkGroupCount.width;
+ case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y:
+ return limits.maxComputeWorkGroupCount.height;
+ case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z:
+ return limits.maxComputeWorkGroupCount.depth;
+ case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS:
+ return std::max({ limits.maxThreadsPerThreadGroup.width, limits.maxThreadsPerThreadGroup.height, limits.maxThreadsPerThreadGroup.depth });
+ case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X:
+ return limits.maxThreadsPerThreadGroup.width;
+ case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y:
+ return limits.maxThreadsPerThreadGroup.height;
+ case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
+ return limits.maxThreadsPerThreadGroup.depth;
+ case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
+ return limits.maxViewportDimensionX;
+ case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
+ return limits.maxViewportDimensionY;
+ case LIMIT_SUBGROUP_SIZE:
+ // MoltenVK sets the subgroupSize to the same as the maxSubgroupSize.
+ return limits.maxSubgroupSize;
+ case LIMIT_SUBGROUP_MIN_SIZE:
+ return limits.minSubgroupSize;
+ case LIMIT_SUBGROUP_MAX_SIZE:
+ return limits.maxSubgroupSize;
+ case LIMIT_SUBGROUP_IN_SHADERS:
+ return (int64_t)limits.subgroupSupportedShaderStages;
+ case LIMIT_SUBGROUP_OPERATIONS:
+ return (int64_t)limits.subgroupSupportedOperations;
+ UNKNOWN(LIMIT_VRS_TEXEL_WIDTH);
+ UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT);
+ default:
+ ERR_FAIL_V(0);
+ }
+ // clang-format on
+ return 0;
+}
+
+uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) {
+ switch (p_trait) {
+ case API_TRAIT_HONORS_PIPELINE_BARRIERS:
+ return 0;
+ default:
+ return RenderingDeviceDriver::api_trait_get(p_trait);
+ }
+}
+
+bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
+ switch (p_feature) {
+ case SUPPORTS_MULTIVIEW:
+ return false;
+ case SUPPORTS_FSR_HALF_FLOAT:
+ return true;
+ case SUPPORTS_ATTACHMENT_VRS:
+ // TODO(sgc): Maybe supported via https://developer.apple.com/documentation/metal/render_passes/rendering_at_different_rasterization_rates?language=objc
+ // See also:
+ //
+ // * https://forum.beyond3d.com/threads/variable-rate-shading-vs-variable-rate-rasterization.62243/post-2191363
+ //
+ return false;
+ case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+const RDD::MultiviewCapabilities &RenderingDeviceDriverMetal::get_multiview_capabilities() {
+ return multiview_capabilities;
+}
+
+String RenderingDeviceDriverMetal::get_api_version() const {
+ return vformat("%d.%d", version_major, version_minor);
+}
+
+String RenderingDeviceDriverMetal::get_pipeline_cache_uuid() const {
+ return pipeline_cache_id;
+}
+
+const RDD::Capabilities &RenderingDeviceDriverMetal::get_capabilities() const {
+ return capabilities;
+}
+
+bool RenderingDeviceDriverMetal::is_composite_alpha_supported(CommandQueueID p_queue) const {
+ // The CAMetalLayer.opaque property is configured according to this global setting.
+ return OS::get_singleton()->is_layered_allowed();
+}
+
+size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const {
+ return [device minimumLinearTextureAlignmentForPixelFormat:pixel_formats->getMTLPixelFormat(p_format)];
+}
+
+size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const {
+ return [device minimumLinearTextureAlignmentForPixelFormat:p_format];
+}
+
+/******************/
+
+RenderingDeviceDriverMetal::RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver) :
+ context_driver(p_context_driver) {
+ DEV_ASSERT(p_context_driver != nullptr);
+
+ if (String res = OS::get_singleton()->get_environment("GODOT_MTL_SHADER_LOAD_STRATEGY"); res == U"lazy") {
+ _shader_load_strategy = ShaderLoadStrategy::LAZY;
+ }
+}
+
+RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() {
+ for (MDCommandBuffer *cb : command_buffers) {
+ delete cb;
+ }
+
+ for (KeyValue<SHA256Digest, ShaderCacheEntry *> &kv : _shader_cache) {
+ memdelete(kv.value);
+ }
+}
+
+#pragma mark - Initialization
+
+Error RenderingDeviceDriverMetal::_create_device() {
+ device = context_driver->get_metal_device();
+
+ device_queue = [device newCommandQueue];
+ ERR_FAIL_NULL_V(device_queue, ERR_CANT_CREATE);
+
+ device_scope = [MTLCaptureManager.sharedCaptureManager newCaptureScopeWithCommandQueue:device_queue];
+ device_scope.label = @"Godot Frame";
+ [device_scope beginScope]; // Allow Xcode to capture the first frame, if desired.
+
+ resource_cache = std::make_unique<MDResourceCache>(this);
+
+ return OK;
+}
+
+Error RenderingDeviceDriverMetal::_check_capabilities() {
+ MTLCompileOptions *options = [MTLCompileOptions new];
+ version_major = (options.languageVersion >> 0x10) & 0xff;
+ version_minor = (options.languageVersion >> 0x00) & 0xff;
+
+ capabilities.device_family = DEVICE_METAL;
+ capabilities.version_major = version_major;
+ capabilities.version_minor = version_minor;
+
+ return OK;
+}
+
+Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p_frame_count) {
+ context_device = context_driver->device_get(p_device_index);
+ Error err = _create_device();
+ ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
+
+ err = _check_capabilities();
+ ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
+
+ // Set the pipeline cache ID based on the Metal version.
+ pipeline_cache_id = "metal-driver-" + get_api_version();
+
+ metal_device_properties = memnew(MetalDeviceProperties(device));
+ pixel_formats = memnew(PixelFormats(device));
+
+ // Check required features and abort if any of them is missing.
+ if (!metal_device_properties->features.imageCubeArray) {
+ // NOTE: Apple A11 (Apple4) GPUs support image cube arrays, which are devices from 2017 and newer.
+ String error_string = vformat("Your Apple GPU does not support the following features which are required to use Metal-based renderers in Godot:\n\n");
+ if (!metal_device_properties->features.imageCubeArray) {
+ error_string += "- No support for image cube arrays.\n";
+ }
+
+#if defined(IOS_ENABLED)
+ // iOS platform ports currently don't exit themselves when this method returns `ERR_CANT_CREATE`.
+ OS::get_singleton()->alert(error_string + "\nClick OK to exit (black screen will be visible).");
+#else
+ OS::get_singleton()->alert(error_string + "\nClick OK to exit.");
+#endif
+
+ return ERR_CANT_CREATE;
+ }
+
+ return OK;
+}
diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp
index 210507c2c6..32f2d7dd79 100644
--- a/drivers/unix/file_access_unix.cpp
+++ b/drivers/unix/file_access_unix.cpp
@@ -218,67 +218,13 @@ bool FileAccessUnix::eof_reached() const {
return last_error == ERR_FILE_EOF;
}
-uint8_t FileAccessUnix::get_8() const {
- ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use.");
- uint8_t b;
- if (fread(&b, 1, 1, f) == 0) {
- check_errors();
- b = '\0';
- }
- return b;
-}
-
-uint16_t FileAccessUnix::get_16() const {
- ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use.");
-
- uint16_t b = 0;
- if (fread(&b, 1, 2, f) != 2) {
- check_errors();
- }
-
- if (big_endian) {
- b = BSWAP16(b);
- }
-
- return b;
-}
-
-uint32_t FileAccessUnix::get_32() const {
- ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use.");
-
- uint32_t b = 0;
- if (fread(&b, 1, 4, f) != 4) {
- check_errors();
- }
-
- if (big_endian) {
- b = BSWAP32(b);
- }
-
- return b;
-}
-
-uint64_t FileAccessUnix::get_64() const {
- ERR_FAIL_NULL_V_MSG(f, 0, "File must be opened before use.");
-
- uint64_t b = 0;
- if (fread(&b, 1, 8, f) != 8) {
- check_errors();
- }
-
- if (big_endian) {
- b = BSWAP64(b);
- }
-
- return b;
-}
-
uint64_t FileAccessUnix::get_buffer(uint8_t *p_dst, uint64_t p_length) const {
- ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
ERR_FAIL_NULL_V_MSG(f, -1, "File must be opened before use.");
+ ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
uint64_t read = fread(p_dst, 1, p_length, f);
check_errors();
+
return read;
}
@@ -308,41 +254,6 @@ void FileAccessUnix::flush() {
fflush(f);
}
-void FileAccessUnix::store_8(uint8_t p_dest) {
- ERR_FAIL_NULL_MSG(f, "File must be opened before use.");
- ERR_FAIL_COND(fwrite(&p_dest, 1, 1, f) != 1);
-}
-
-void FileAccessUnix::store_16(uint16_t p_dest) {
- ERR_FAIL_NULL_MSG(f, "File must be opened before use.");
-
- if (big_endian) {
- p_dest = BSWAP16(p_dest);
- }
-
- ERR_FAIL_COND(fwrite(&p_dest, 1, 2, f) != 2);
-}
-
-void FileAccessUnix::store_32(uint32_t p_dest) {
- ERR_FAIL_NULL_MSG(f, "File must be opened before use.");
-
- if (big_endian) {
- p_dest = BSWAP32(p_dest);
- }
-
- ERR_FAIL_COND(fwrite(&p_dest, 1, 4, f) != 4);
-}
-
-void FileAccessUnix::store_64(uint64_t p_dest) {
- ERR_FAIL_NULL_MSG(f, "File must be opened before use.");
-
- if (big_endian) {
- p_dest = BSWAP64(p_dest);
- }
-
- ERR_FAIL_COND(fwrite(&p_dest, 1, 8, f) != 8);
-}
-
void FileAccessUnix::store_buffer(const uint8_t *p_src, uint64_t p_length) {
ERR_FAIL_NULL_MSG(f, "File must be opened before use.");
ERR_FAIL_COND(!p_src && p_length > 0);
@@ -383,7 +294,7 @@ uint64_t FileAccessUnix::_get_modified_time(const String &p_file) {
if (!err) {
return status.st_mtime;
} else {
- print_verbose("Failed to get modified time for: " + p_file + "");
+ WARN_PRINT("Failed to get modified time for: " + p_file);
return 0;
}
}
diff --git a/drivers/unix/file_access_unix.h b/drivers/unix/file_access_unix.h
index c0286dbff3..76f629f7c2 100644
--- a/drivers/unix/file_access_unix.h
+++ b/drivers/unix/file_access_unix.h
@@ -67,20 +67,12 @@ public:
virtual bool eof_reached() const override; ///< reading passed EOF
- virtual uint8_t get_8() const override; ///< get a byte
- virtual uint16_t get_16() const override;
- virtual uint32_t get_32() const override;
- virtual uint64_t get_64() const override;
virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override;
virtual Error get_error() const override; ///< get last error
virtual Error resize(int64_t p_length) override;
virtual void flush() override;
- virtual void store_8(uint8_t p_dest) override; ///< store a byte
- virtual void store_16(uint16_t p_dest) override;
- virtual void store_32(uint32_t p_dest) override;
- virtual void store_64(uint64_t p_dest) override;
virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes
virtual bool file_exists(const String &p_path) override; ///< return true if a file exists
diff --git a/drivers/unix/file_access_unix_pipe.cpp b/drivers/unix/file_access_unix_pipe.cpp
index 5d9a27ad05..34758e8c7d 100644
--- a/drivers/unix/file_access_unix_pipe.cpp
+++ b/drivers/unix/file_access_unix_pipe.cpp
@@ -125,22 +125,9 @@ String FileAccessUnixPipe::get_path_absolute() const {
return path_src;
}
-uint8_t FileAccessUnixPipe::get_8() const {
- ERR_FAIL_COND_V_MSG(fd[0] < 0, 0, "Pipe must be opened before use.");
-
- uint8_t b;
- if (::read(fd[0], &b, 1) == 0) {
- last_error = ERR_FILE_CANT_READ;
- b = '\0';
- } else {
- last_error = OK;
- }
- return b;
-}
-
uint64_t FileAccessUnixPipe::get_buffer(uint8_t *p_dst, uint64_t p_length) const {
- ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
ERR_FAIL_COND_V_MSG(fd[0] < 0, -1, "Pipe must be opened before use.");
+ ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
uint64_t read = ::read(fd[0], p_dst, p_length);
if (read == p_length) {
@@ -155,18 +142,10 @@ Error FileAccessUnixPipe::get_error() const {
return last_error;
}
-void FileAccessUnixPipe::store_8(uint8_t p_src) {
- ERR_FAIL_COND_MSG(fd[1] < 0, "Pipe must be opened before use.");
- if (::write(fd[1], &p_src, 1) != 1) {
- last_error = ERR_FILE_CANT_WRITE;
- } else {
- last_error = OK;
- }
-}
-
void FileAccessUnixPipe::store_buffer(const uint8_t *p_src, uint64_t p_length) {
ERR_FAIL_COND_MSG(fd[1] < 0, "Pipe must be opened before use.");
ERR_FAIL_COND(!p_src && p_length > 0);
+
if (::write(fd[1], p_src, p_length) != (ssize_t)p_length) {
last_error = ERR_FILE_CANT_WRITE;
} else {
diff --git a/drivers/unix/file_access_unix_pipe.h b/drivers/unix/file_access_unix_pipe.h
index 8e7988791b..19acdb5a37 100644
--- a/drivers/unix/file_access_unix_pipe.h
+++ b/drivers/unix/file_access_unix_pipe.h
@@ -65,14 +65,12 @@ public:
virtual bool eof_reached() const override { return false; }
- virtual uint8_t get_8() const override; ///< get a byte
virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override;
virtual Error get_error() const override; ///< get last error
virtual Error resize(int64_t p_length) override { return ERR_UNAVAILABLE; }
virtual void flush() override {}
- virtual void store_8(uint8_t p_src) override; ///< store a byte
virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes
virtual bool file_exists(const String &p_path) override { return false; }
diff --git a/drivers/vulkan/SCsub b/drivers/vulkan/SCsub
index 80d5f35305..1efef5ad77 100644
--- a/drivers/vulkan/SCsub
+++ b/drivers/vulkan/SCsub
@@ -16,14 +16,14 @@ if env["use_volk"]:
if env["platform"] == "android":
env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_ANDROID_KHR"])
elif env["platform"] == "ios":
- env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_IOS_MVK"])
+ env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_IOS_MVK", "VK_USE_PLATFORM_METAL_EXT"])
elif env["platform"] == "linuxbsd":
if env["x11"]:
env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_XLIB_KHR"])
if env["wayland"]:
env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_WAYLAND_KHR"])
elif env["platform"] == "macos":
- env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_MACOS_MVK"])
+ env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_MACOS_MVK", "VK_USE_PLATFORM_METAL_EXT"])
elif env["platform"] == "windows":
env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_WIN32_KHR"])
diff --git a/drivers/vulkan/rendering_context_driver_vulkan.cpp b/drivers/vulkan/rendering_context_driver_vulkan.cpp
index 7cba820978..df9bd98624 100644
--- a/drivers/vulkan/rendering_context_driver_vulkan.cpp
+++ b/drivers/vulkan/rendering_context_driver_vulkan.cpp
@@ -40,21 +40,355 @@
#include "rendering_device_driver_vulkan.h"
#include "vulkan_hooks.h"
+#if defined(VK_TRACK_DRIVER_MEMORY)
+/*************************************************/
+// Driver memory tracking
+/*************************************************/
+// Total driver memory and allocation amount.
+SafeNumeric<size_t> driver_memory_total_memory;
+SafeNumeric<size_t> driver_memory_total_alloc_count;
+// Amount of driver memory for every object type.
+SafeNumeric<size_t> driver_memory_tracker[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT][RenderingContextDriverVulkan::VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT];
+// Amount of allocations for every object type.
+SafeNumeric<uint32_t> driver_memory_allocation_count[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT][RenderingContextDriverVulkan::VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT];
+#endif
+
+#if defined(VK_TRACK_DEVICE_MEMORY)
+/*************************************************/
+// Device memory report
+/*************************************************/
+// Total device memory and allocation amount.
+HashMap<uint64_t, size_t> memory_report_table;
+// Total memory and allocation amount.
+SafeNumeric<uint64_t> memory_report_total_memory;
+SafeNumeric<uint64_t> memory_report_total_alloc_count;
+// Amount of device memory for every object type.
+SafeNumeric<size_t> memory_report_mem_usage[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT];
+// Amount of device memory allocations for every object type.
+SafeNumeric<size_t> memory_report_allocation_count[RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_COUNT];
+#endif
+
+const char *RenderingContextDriverVulkan::get_tracked_object_name(uint32_t p_type_index) const {
+#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY)
+ static constexpr const char *vkTrackedObjectTypeNames[] = { "UNKNOWN",
+ "INSTANCE",
+ "PHYSICAL_DEVICE",
+ "DEVICE",
+ "QUEUE",
+ "SEMAPHORE",
+ "COMMAND_BUFFER",
+ "FENCE",
+ "DEVICE_MEMORY",
+ "BUFFER",
+ "IMAGE",
+ "EVENT",
+ "QUERY_POOL",
+ "BUFFER_VIEW",
+ "IMAGE_VIEW",
+ "SHADER_MODULE",
+ "PIPELINE_CACHE",
+ "PIPELINE_LAYOUT",
+ "RENDER_PASS",
+ "PIPELINE",
+ "DESCRIPTOR_SET_LAYOUT",
+ "SAMPLER",
+ "DESCRIPTOR_POOL",
+ "DESCRIPTOR_SET",
+ "FRAMEBUFFER",
+ "COMMAND_POOL",
+ "DESCRIPTOR_UPDATE_TEMPLATE_KHR",
+ "SURFACE_KHR",
+ "SWAPCHAIN_KHR",
+ "DEBUG_UTILS_MESSENGER_EXT",
+ "DEBUG_REPORT_CALLBACK_EXT",
+ "ACCELERATION_STRUCTURE",
+ "VMA_BUFFER_OR_IMAGE" };
+
+ return vkTrackedObjectTypeNames[p_type_index];
+#else
+ return "VK_TRACK_*_MEMORY disabled at build time";
+#endif
+}
+
+#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY)
+uint64_t RenderingContextDriverVulkan::get_tracked_object_type_count() const {
+ return VK_TRACKED_OBJECT_TYPE_COUNT;
+}
+#endif
+
+#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY)
+RenderingContextDriverVulkan::VkTrackedObjectType vk_object_to_tracked_object(VkObjectType p_type) {
+ if (p_type > VK_OBJECT_TYPE_COMMAND_POOL && p_type != (VkObjectType)RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_VMA) {
+ switch (p_type) {
+ case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE:
+ return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR;
+ case VK_OBJECT_TYPE_SURFACE_KHR:
+ return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_SURFACE;
+ case VK_OBJECT_TYPE_SWAPCHAIN_KHR:
+ return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_SWAPCHAIN;
+ case VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT:
+ return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT;
+ case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT:
+ return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT;
+ case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR:
+ case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV:
+ return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE;
+ default:
+ _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "Unknown VkObjectType enum value " + itos((uint32_t)p_type) + ".Please add it to VkTrackedObjectType, switch statement in "
+ "vk_object_to_tracked_object and get_tracked_object_name.",
+ (int)p_type);
+ return (RenderingContextDriverVulkan::VkTrackedObjectType)VK_OBJECT_TYPE_UNKNOWN;
+ }
+ }
+
+ return (RenderingContextDriverVulkan::VkTrackedObjectType)p_type;
+}
+#endif
+
+#if defined(VK_TRACK_DEVICE_MEMORY)
+uint64_t RenderingContextDriverVulkan::get_device_total_memory() const {
+ return memory_report_total_memory.get();
+}
+
+uint64_t RenderingContextDriverVulkan::get_device_allocation_count() const {
+ return memory_report_total_alloc_count.get();
+}
+
+uint64_t RenderingContextDriverVulkan::get_device_memory_by_object_type(uint32_t p_type) const {
+ return memory_report_mem_usage[p_type].get();
+}
+
+uint64_t RenderingContextDriverVulkan::get_device_allocs_by_object_type(uint32_t p_type) const {
+ return memory_report_allocation_count[p_type].get();
+}
+#endif
+
+#if defined(VK_TRACK_DRIVER_MEMORY)
+uint64_t RenderingContextDriverVulkan::get_driver_total_memory() const {
+ return driver_memory_total_memory.get();
+}
+
+uint64_t RenderingContextDriverVulkan::get_driver_allocation_count() const {
+ return driver_memory_total_alloc_count.get();
+}
+
+uint64_t RenderingContextDriverVulkan::get_driver_memory_by_object_type(uint32_t p_type) const {
+ uint64_t ret = 0;
+ for (uint32_t i = 0; i < VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT; i++) {
+ ret += driver_memory_tracker[p_type][i].get();
+ }
+
+ return ret;
+}
+
+uint64_t RenderingContextDriverVulkan::get_driver_allocs_by_object_type(uint32_t p_type) const {
+ uint64_t ret = 0;
+ for (uint32_t i = 0; i < VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT; i++) {
+ ret += driver_memory_allocation_count[p_type][i].get();
+ }
+
+ return ret;
+}
+#endif
+
+#if defined(VK_TRACK_DEVICE_MEMORY)
+void RenderingContextDriverVulkan::memory_report_callback(const VkDeviceMemoryReportCallbackDataEXT *p_callback_data, void *p_user_data) {
+ if (!p_callback_data) {
+ return;
+ }
+ const RenderingContextDriverVulkan::VkTrackedObjectType obj_type = vk_object_to_tracked_object(p_callback_data->objectType);
+ uint64_t obj_id = p_callback_data->memoryObjectId;
+
+ if (p_callback_data->type == VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_ALLOCATE_EXT) {
+ // Realloc, update size
+ if (memory_report_table.has(obj_id)) {
+ memory_report_total_memory.sub(memory_report_table[obj_id]);
+ memory_report_mem_usage[obj_type].sub(memory_report_table[obj_id]);
+
+ memory_report_total_memory.add(p_callback_data->size);
+ memory_report_mem_usage[obj_type].add(p_callback_data->size);
+
+ memory_report_table[p_callback_data->memoryObjectId] = p_callback_data->size;
+ } else {
+ memory_report_table[obj_id] = p_callback_data->size;
+
+ memory_report_total_alloc_count.increment();
+ memory_report_allocation_count[obj_type].increment();
+ memory_report_mem_usage[obj_type].add(p_callback_data->size);
+ memory_report_total_memory.add(p_callback_data->size);
+ }
+ } else if (p_callback_data->type == VK_DEVICE_MEMORY_REPORT_EVENT_TYPE_FREE_EXT) {
+ if (memory_report_table.has(obj_id)) {
+ memory_report_total_alloc_count.decrement();
+ memory_report_allocation_count[obj_type].decrement();
+ memory_report_mem_usage[obj_type].sub(p_callback_data->size);
+ memory_report_total_memory.sub(p_callback_data->size);
+
+ memory_report_table.remove(memory_report_table.find(obj_id));
+ }
+ }
+}
+#endif
+
+VkAllocationCallbacks *RenderingContextDriverVulkan::get_allocation_callbacks(VkObjectType p_type) {
+#if !defined(VK_TRACK_DRIVER_MEMORY)
+ return nullptr;
+#else
+ if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
+ return nullptr;
+ }
+
+#ifdef _MSC_VER
+#define LAMBDA_VK_CALL_CONV
+#else
+#define LAMBDA_VK_CALL_CONV VKAPI_PTR
+#endif
+
+ struct TrackedMemHeader {
+ size_t size;
+ VkSystemAllocationScope allocation_scope;
+ VkTrackedObjectType type;
+ };
+ VkAllocationCallbacks tracking_callbacks = {
+ // Allocation function
+ nullptr,
+ [](
+ void *p_user_data,
+ size_t size,
+ size_t alignment,
+ VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV -> void * {
+ static constexpr size_t tracking_data_size = 32;
+ VkTrackedObjectType type = static_cast<VkTrackedObjectType>(*reinterpret_cast<VkTrackedObjectType *>(p_user_data));
+
+ driver_memory_total_memory.add(size);
+ driver_memory_total_alloc_count.increment();
+ driver_memory_tracker[type][allocation_scope].add(size);
+ driver_memory_allocation_count[type][allocation_scope].increment();
+
+ alignment = MAX(alignment, tracking_data_size);
+
+ uint8_t *ret = reinterpret_cast<uint8_t *>(Memory::alloc_aligned_static(size + alignment, alignment));
+ if (ret == nullptr) {
+ return nullptr;
+ }
+
+ // Track allocation
+ TrackedMemHeader *header = reinterpret_cast<TrackedMemHeader *>(ret);
+ header->size = size;
+ header->allocation_scope = allocation_scope;
+ header->type = type;
+ *reinterpret_cast<size_t *>(ret + alignment - sizeof(size_t)) = alignment;
+
+ // Return first available chunk of memory
+ return ret + alignment;
+ },
+
+ // Reallocation function
+ [](
+ void *p_user_data,
+ void *p_original,
+ size_t size,
+ size_t alignment,
+ VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV -> void * {
+ if (p_original == nullptr) {
+ VkObjectType type = static_cast<VkObjectType>(*reinterpret_cast<uint32_t *>(p_user_data));
+ return get_allocation_callbacks(type)->pfnAllocation(p_user_data, size, alignment, allocation_scope);
+ }
+
+ uint8_t *mem = reinterpret_cast<uint8_t *>(p_original);
+ // Retrieve alignment
+ alignment = *reinterpret_cast<size_t *>(mem - sizeof(size_t));
+ // Retrieve allocation data
+ TrackedMemHeader *header = reinterpret_cast<TrackedMemHeader *>(mem - alignment);
+
+ // Update allocation size
+ driver_memory_total_memory.sub(header->size);
+ driver_memory_total_memory.add(size);
+ driver_memory_tracker[header->type][header->allocation_scope].sub(header->size);
+ driver_memory_tracker[header->type][header->allocation_scope].add(size);
+
+ uint8_t *ret = reinterpret_cast<uint8_t *>(Memory::realloc_aligned_static(header, size + alignment, header->size + alignment, alignment));
+ if (ret == nullptr) {
+ return nullptr;
+ }
+ // Update tracker
+ header = reinterpret_cast<TrackedMemHeader *>(ret);
+ header->size = size;
+ return ret + alignment;
+ },
+
+ // Free function
+ [](
+ void *p_user_data,
+ void *p_memory) LAMBDA_VK_CALL_CONV {
+ if (!p_memory) {
+ return;
+ }
+
+ uint8_t *mem = reinterpret_cast<uint8_t *>(p_memory);
+ size_t alignment = *reinterpret_cast<size_t *>(mem - sizeof(size_t));
+ TrackedMemHeader *header = reinterpret_cast<TrackedMemHeader *>(mem - alignment);
+
+ driver_memory_total_alloc_count.decrement();
+ driver_memory_total_memory.sub(header->size);
+ driver_memory_tracker[header->type][header->allocation_scope].sub(header->size);
+ driver_memory_allocation_count[header->type][header->allocation_scope].decrement();
+
+ Memory::free_aligned_static(header);
+ },
+ // Internal allocation / deallocation. We don't track them as they cannot really be controlled or optimized by the programmer.
+ [](
+ void *p_user_data,
+ size_t size,
+ VkInternalAllocationType allocation_type,
+ VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV {
+ },
+ [](
+ void *p_user_data,
+ size_t size,
+ VkInternalAllocationType allocation_type,
+ VkSystemAllocationScope allocation_scope) LAMBDA_VK_CALL_CONV {
+ },
+ };
+
+ // Create a callback per object type
+ static VkAllocationCallbacks object_callbacks[VK_TRACKED_OBJECT_TYPE_COUNT] = {};
+ static uint32_t object_user_data[VK_TRACKED_OBJECT_TYPE_COUNT] = {};
+
+ // Only build the first time
+ if (!object_callbacks[0].pfnAllocation) {
+ for (uint32_t c = 0; c < VK_TRACKED_OBJECT_TYPE_COUNT; ++c) {
+ object_callbacks[c] = tracking_callbacks;
+ object_user_data[c] = c;
+ object_callbacks[c].pUserData = &object_user_data[c];
+
+ for (uint32_t i = 0; i < VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT; i++) {
+ driver_memory_tracker[c][i].set(0);
+ driver_memory_allocation_count[c][i].set(0);
+ }
+ }
+ }
+
+ uint32_t type_index = vk_object_to_tracked_object(p_type);
+ return &object_callbacks[type_index];
+#endif
+}
+
RenderingContextDriverVulkan::RenderingContextDriverVulkan() {
// Empty constructor.
}
RenderingContextDriverVulkan::~RenderingContextDriverVulkan() {
if (debug_messenger != VK_NULL_HANDLE && functions.DestroyDebugUtilsMessengerEXT != nullptr) {
- functions.DestroyDebugUtilsMessengerEXT(instance, debug_messenger, nullptr);
+ functions.DestroyDebugUtilsMessengerEXT(instance, debug_messenger, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT));
}
if (debug_report != VK_NULL_HANDLE && functions.DestroyDebugReportCallbackEXT != nullptr) {
- functions.DestroyDebugReportCallbackEXT(instance, debug_report, nullptr);
+ functions.DestroyDebugReportCallbackEXT(instance, debug_report, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT));
}
if (instance != VK_NULL_HANDLE) {
- vkDestroyInstance(instance, nullptr);
+ vkDestroyInstance(instance, get_allocation_callbacks(VK_OBJECT_TYPE_INSTANCE));
}
}
@@ -102,6 +436,10 @@ Error RenderingContextDriverVulkan::_initialize_instance_extensions() {
// This extension allows us to use the properties2 features to query additional device capabilities.
_register_requested_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false);
+#if defined(USE_VOLK) && (defined(MACOS_ENABLED) || defined(IOS_ENABLED))
+ _register_requested_instance_extension(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, true);
+#endif
+
// Only enable debug utils in verbose mode or DEV_ENABLED.
// End users would get spammed with messages of varying verbosity due to the
// mess that thirdparty layers/extensions and drivers seem to leave in their
@@ -360,6 +698,11 @@ Error RenderingContextDriverVulkan::_initialize_instance() {
VkInstanceCreateInfo instance_info = {};
instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+
+#if defined(USE_VOLK) && (defined(MACOS_ENABLED) || defined(IOS_ENABLED))
+ instance_info.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
+#endif
+
instance_info.pApplicationInfo = &app_info;
instance_info.enabledExtensionCount = enabled_extension_names.size();
instance_info.ppEnabledExtensionNames = enabled_extension_names.ptr();
@@ -432,7 +775,7 @@ Error RenderingContextDriverVulkan::_initialize_instance() {
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "GetProcAddr: Failed to init VK_EXT_debug_utils\nGetProcAddr: Failure");
}
- VkResult res = functions.CreateDebugUtilsMessengerEXT(instance, &debug_messenger_create_info, nullptr, &debug_messenger);
+ VkResult res = functions.CreateDebugUtilsMessengerEXT(instance, &debug_messenger_create_info, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT), &debug_messenger);
switch (res) {
case VK_SUCCESS:
break;
@@ -452,7 +795,7 @@ Error RenderingContextDriverVulkan::_initialize_instance() {
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "GetProcAddr: Failed to init VK_EXT_debug_report\nGetProcAddr: Failure");
}
- VkResult res = functions.CreateDebugReportCallbackEXT(instance, &debug_report_callback_create_info, nullptr, &debug_report);
+ VkResult res = functions.CreateDebugReportCallbackEXT(instance, &debug_report_callback_create_info, get_allocation_callbacks(VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT), &debug_report);
switch (res) {
case VK_SUCCESS:
break;
@@ -551,7 +894,7 @@ Error RenderingContextDriverVulkan::_create_vulkan_instance(const VkInstanceCrea
if (VulkanHooks::get_singleton() != nullptr) {
return VulkanHooks::get_singleton()->create_vulkan_instance(p_create_info, r_instance) ? OK : ERR_CANT_CREATE;
} else {
- VkResult err = vkCreateInstance(p_create_info, nullptr, r_instance);
+ VkResult err = vkCreateInstance(p_create_info, get_allocation_callbacks(VK_OBJECT_TYPE_INSTANCE), r_instance);
ERR_FAIL_COND_V_MSG(err == VK_ERROR_INCOMPATIBLE_DRIVER, ERR_CANT_CREATE,
"Cannot find a compatible Vulkan installable client driver (ICD).\n\n"
"vkCreateInstance Failure");
@@ -670,7 +1013,7 @@ bool RenderingContextDriverVulkan::surface_get_needs_resize(SurfaceID p_surface)
void RenderingContextDriverVulkan::surface_destroy(SurfaceID p_surface) {
Surface *surface = (Surface *)(p_surface);
- vkDestroySurfaceKHR(instance, surface->vk_surface, nullptr);
+ vkDestroySurfaceKHR(instance, surface->vk_surface, get_allocation_callbacks(VK_OBJECT_TYPE_SURFACE_KHR));
memdelete(surface);
}
diff --git a/drivers/vulkan/rendering_context_driver_vulkan.h b/drivers/vulkan/rendering_context_driver_vulkan.h
index f1d4021e32..4fbca012c6 100644
--- a/drivers/vulkan/rendering_context_driver_vulkan.h
+++ b/drivers/vulkan/rendering_context_driver_vulkan.h
@@ -35,6 +35,11 @@
#include "servers/rendering/rendering_context_driver.h"
+#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
+#define VK_TRACK_DRIVER_MEMORY
+#define VK_TRACK_DEVICE_MEMORY
+#endif
+
#ifdef USE_VOLK
#include <volk.h>
#else
@@ -77,6 +82,12 @@ public:
PFN_vkDebugReportMessageEXT DebugReportMessageEXT = nullptr;
PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT = nullptr;
+ // Debug marker extensions.
+ PFN_vkCmdDebugMarkerBeginEXT CmdDebugMarkerBeginEXT = nullptr;
+ PFN_vkCmdDebugMarkerEndEXT CmdDebugMarkerEndEXT = nullptr;
+ PFN_vkCmdDebugMarkerInsertEXT CmdDebugMarkerInsertEXT = nullptr;
+ PFN_vkDebugMarkerSetObjectNameEXT DebugMarkerSetObjectNameEXT = nullptr;
+
bool debug_report_functions_available() const {
return CreateDebugReportCallbackEXT != nullptr &&
DebugReportMessageEXT != nullptr &&
@@ -110,6 +121,8 @@ private:
// Static callbacks.
static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data);
static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_report_callback(VkDebugReportFlagsEXT p_flags, VkDebugReportObjectTypeEXT p_object_type, uint64_t p_object, size_t p_location, int32_t p_message_code, const char *p_layer_prefix, const char *p_message, void *p_user_data);
+ // Debug marker extensions.
+ VkDebugReportObjectTypeEXT _convert_to_debug_report_objectType(VkObjectType p_object_type);
protected:
Error _find_validation_layers(TightLocalVector<const char *> &r_layer_names) const;
@@ -153,6 +166,45 @@ public:
bool queue_family_supports_present(VkPhysicalDevice p_physical_device, uint32_t p_queue_family_index, SurfaceID p_surface) const;
const Functions &functions_get() const;
+ static VkAllocationCallbacks *get_allocation_callbacks(VkObjectType p_type);
+
+#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY)
+ enum VkTrackedObjectType{
+ VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_COMMAND_POOL + 1,
+ VK_TRACKED_OBJECT_TYPE_SURFACE,
+ VK_TRACKED_OBJECT_TYPE_SWAPCHAIN,
+ VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT,
+ VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT,
+ VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE,
+ VK_TRACKED_OBJECT_TYPE_VMA,
+ VK_TRACKED_OBJECT_TYPE_COUNT
+ };
+
+ enum VkTrackedSystemAllocationScope{
+ VK_TRACKED_SYSTEM_ALLOCATION_SCOPE_COUNT = VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE + 1
+ };
+#endif
+
+ const char *get_tracked_object_name(uint32_t p_type_index) const override;
+#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY)
+ uint64_t get_tracked_object_type_count() const override;
+#endif
+
+#if defined(VK_TRACK_DRIVER_MEMORY)
+ uint64_t get_driver_total_memory() const override;
+ uint64_t get_driver_allocation_count() const override;
+ uint64_t get_driver_memory_by_object_type(uint32_t p_type) const override;
+ uint64_t get_driver_allocs_by_object_type(uint32_t p_type) const override;
+#endif
+
+#if defined(VK_TRACK_DEVICE_MEMORY)
+ uint64_t get_device_total_memory() const override;
+ uint64_t get_device_allocation_count() const override;
+ uint64_t get_device_memory_by_object_type(uint32_t p_type) const override;
+ uint64_t get_device_allocs_by_object_type(uint32_t p_type) const override;
+ static VKAPI_ATTR void VKAPI_CALL memory_report_callback(const VkDeviceMemoryReportCallbackDataEXT *p_callback_data, void *p_user_data);
+#endif
+
RenderingContextDriverVulkan();
virtual ~RenderingContextDriverVulkan() override;
};
diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp
index 97fd156584..4ea46e8214 100644
--- a/drivers/vulkan/rendering_device_driver_vulkan.cpp
+++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp
@@ -497,11 +497,32 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() {
_register_requested_device_extension(VK_KHR_MAINTENANCE_2_EXTENSION_NAME, false);
_register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false);
_register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
+ _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false);
if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) {
_register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true);
}
+#if defined(VK_TRACK_DEVICE_MEMORY)
+ if (Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
+ _register_requested_device_extension(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME, false);
+ }
+#endif
+ _register_requested_device_extension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME, false);
+
+ {
+ // Debug marker extensions.
+ // Should be last element in the array.
+#ifdef DEV_ENABLED
+ bool want_debug_markers = true;
+#else
+ bool want_debug_markers = OS::get_singleton()->is_stdout_verbose();
+#endif
+ if (want_debug_markers) {
+ _register_requested_device_extension(VK_EXT_DEBUG_MARKER_EXTENSION_NAME, false);
+ }
+ }
+
uint32_t device_extension_count = 0;
VkResult err = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &device_extension_count, nullptr);
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
@@ -745,6 +766,15 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() {
if (enabled_device_extension_names.has(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) {
pipeline_cache_control_support = pipeline_cache_control_features.pipelineCreationCacheControl;
}
+
+ if (enabled_device_extension_names.has(VK_EXT_DEVICE_FAULT_EXTENSION_NAME)) {
+ device_fault_support = true;
+ }
+#if defined(VK_TRACK_DEVICE_MEMORY)
+ if (enabled_device_extension_names.has(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME)) {
+ device_memory_report_support = true;
+ }
+#endif
}
if (functions.GetPhysicalDeviceProperties2 != nullptr) {
@@ -913,6 +943,26 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDevice
create_info_next = &pipeline_cache_control_features;
}
+ VkPhysicalDeviceFaultFeaturesEXT device_fault_features = {};
+ if (device_fault_support) {
+ device_fault_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT;
+ device_fault_features.pNext = create_info_next;
+ create_info_next = &device_fault_features;
+ }
+
+#if defined(VK_TRACK_DEVICE_MEMORY)
+ VkDeviceDeviceMemoryReportCreateInfoEXT memory_report_info = {};
+ if (device_memory_report_support) {
+ memory_report_info.sType = VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT;
+ memory_report_info.pfnUserCallback = RenderingContextDriverVulkan::memory_report_callback;
+ memory_report_info.pNext = create_info_next;
+ memory_report_info.flags = 0;
+ memory_report_info.pUserData = this;
+
+ create_info_next = &memory_report_info;
+ }
+#endif
+
VkPhysicalDeviceVulkan11Features vulkan_1_1_features = {};
VkPhysicalDevice16BitStorageFeaturesKHR storage_features = {};
VkPhysicalDeviceMultiviewFeatures multiview_features = {};
@@ -968,7 +1018,7 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDevice
bool device_created = VulkanHooks::get_singleton()->create_vulkan_device(&create_info, &vk_device);
ERR_FAIL_COND_V(!device_created, ERR_CANT_CREATE);
} else {
- VkResult err = vkCreateDevice(physical_device, &create_info, nullptr, &vk_device);
+ VkResult err = vkCreateDevice(physical_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DEVICE), &vk_device);
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
}
@@ -989,6 +1039,19 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDevice
if (enabled_device_extension_names.has(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) {
device_functions.CreateRenderPass2KHR = PFN_vkCreateRenderPass2KHR(functions.GetDeviceProcAddr(vk_device, "vkCreateRenderPass2KHR"));
}
+
+ // Debug marker extensions.
+ if (enabled_device_extension_names.has(VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) {
+ device_functions.CmdDebugMarkerBeginEXT = (PFN_vkCmdDebugMarkerBeginEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerBeginEXT");
+ device_functions.CmdDebugMarkerEndEXT = (PFN_vkCmdDebugMarkerEndEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerEndEXT");
+ device_functions.CmdDebugMarkerInsertEXT = (PFN_vkCmdDebugMarkerInsertEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerInsertEXT");
+ device_functions.DebugMarkerSetObjectNameEXT = (PFN_vkDebugMarkerSetObjectNameEXT)functions.GetDeviceProcAddr(vk_device, "vkDebugMarkerSetObjectNameEXT");
+ }
+
+ // Debug device fault extension.
+ if (device_fault_support) {
+ device_functions.GetDeviceFaultInfoEXT = (PFN_vkGetDeviceFaultInfoEXT)functions.GetDeviceProcAddr(vk_device, "vkGetDeviceFaultInfoEXT");
+ }
}
return OK;
@@ -1148,17 +1211,102 @@ bool RenderingDeviceDriverVulkan::_recreate_image_semaphore(CommandQueue *p_comm
VkSemaphore semaphore;
VkSemaphoreCreateInfo create_info = {};
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
- VkResult err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore);
+ VkResult err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore);
ERR_FAIL_COND_V(err != VK_SUCCESS, false);
// Indicate the semaphore is free again and destroy the previous one before storing the new one.
- vkDestroySemaphore(vk_device, p_command_queue->image_semaphores[p_semaphore_index], nullptr);
+ vkDestroySemaphore(vk_device, p_command_queue->image_semaphores[p_semaphore_index], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
p_command_queue->image_semaphores[p_semaphore_index] = semaphore;
p_command_queue->free_image_semaphores.push_back(p_semaphore_index);
return true;
}
+// Debug marker extensions.
+VkDebugReportObjectTypeEXT RenderingDeviceDriverVulkan::_convert_to_debug_report_objectType(VkObjectType p_object_type) {
+ switch (p_object_type) {
+ case VK_OBJECT_TYPE_UNKNOWN:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT;
+ case VK_OBJECT_TYPE_INSTANCE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT;
+ case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT;
+ case VK_OBJECT_TYPE_DEVICE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT;
+ case VK_OBJECT_TYPE_QUEUE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT;
+ case VK_OBJECT_TYPE_SEMAPHORE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT;
+ case VK_OBJECT_TYPE_COMMAND_BUFFER:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT;
+ case VK_OBJECT_TYPE_FENCE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT;
+ case VK_OBJECT_TYPE_DEVICE_MEMORY:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT;
+ case VK_OBJECT_TYPE_BUFFER:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT;
+ case VK_OBJECT_TYPE_IMAGE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT;
+ case VK_OBJECT_TYPE_EVENT:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT;
+ case VK_OBJECT_TYPE_QUERY_POOL:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT;
+ case VK_OBJECT_TYPE_BUFFER_VIEW:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT;
+ case VK_OBJECT_TYPE_IMAGE_VIEW:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT;
+ case VK_OBJECT_TYPE_SHADER_MODULE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT;
+ case VK_OBJECT_TYPE_PIPELINE_CACHE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT;
+ case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT;
+ case VK_OBJECT_TYPE_RENDER_PASS:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT;
+ case VK_OBJECT_TYPE_PIPELINE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT;
+ case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT;
+ case VK_OBJECT_TYPE_SAMPLER:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT;
+ case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT;
+ case VK_OBJECT_TYPE_DESCRIPTOR_SET:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT;
+ case VK_OBJECT_TYPE_FRAMEBUFFER:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT;
+ case VK_OBJECT_TYPE_COMMAND_POOL:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT;
+ case VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT;
+ case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT;
+ case VK_OBJECT_TYPE_SURFACE_KHR:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT;
+ case VK_OBJECT_TYPE_SWAPCHAIN_KHR:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT;
+ case VK_OBJECT_TYPE_DISPLAY_KHR:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT;
+ case VK_OBJECT_TYPE_DISPLAY_MODE_KHR:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT;
+ case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT;
+ case VK_OBJECT_TYPE_CU_MODULE_NVX:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_CU_MODULE_NVX_EXT;
+ case VK_OBJECT_TYPE_CU_FUNCTION_NVX:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_CU_FUNCTION_NVX_EXT;
+ case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR_EXT;
+ case VK_OBJECT_TYPE_VALIDATION_CACHE_EXT:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT;
+ case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV:
+ return VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT;
+ default:
+ break;
+ }
+
+ return VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT;
+}
void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) {
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
@@ -1171,6 +1319,16 @@ void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, u
name_info.objectHandle = p_object_handle;
name_info.pObjectName = obj_data.get_data();
functions.SetDebugUtilsObjectNameEXT(vk_device, &name_info);
+ } else if (functions.DebugMarkerSetObjectNameEXT != nullptr) {
+ // Debug marker extensions.
+ CharString obj_data = p_object_name.utf8();
+ VkDebugMarkerObjectNameInfoEXT name_info;
+ name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
+ name_info.pNext = nullptr;
+ name_info.objectType = _convert_to_debug_report_objectType(p_object_type);
+ name_info.object = p_object_handle;
+ name_info.pObjectName = obj_data.get_data();
+ functions.DebugMarkerSetObjectNameEXT(vk_device, &name_info);
}
}
@@ -1211,6 +1369,7 @@ Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t
ERR_FAIL_COND_V(err != OK, err);
max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool");
+ breadcrumb_buffer = buffer_create(sizeof(uint32_t), BufferUsageBits::BUFFER_USAGE_TRANSFER_TO_BIT, MemoryAllocationType::MEMORY_ALLOCATION_TYPE_CPU);
return OK;
}
@@ -1279,11 +1438,10 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie
// Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads.
alloc_create_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
}
- alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
alloc_create_info.requiredFlags = (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
} break;
case MEMORY_ALLOCATION_TYPE_GPU: {
- alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
+ alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
if (p_size <= SMALL_ALLOCATION_MAX_SIZE) {
uint32_t mem_type_index = 0;
vmaFindMemoryTypeIndexForBufferInfo(allocator, &create_info, &alloc_create_info, &mem_type_index);
@@ -1295,11 +1453,15 @@ RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitFie
VkBuffer vk_buffer = VK_NULL_HANDLE;
VmaAllocation allocation = nullptr;
VmaAllocationInfo alloc_info = {};
- VkResult err = vmaCreateBuffer(allocator, &create_info, &alloc_create_info, &vk_buffer, &allocation, &alloc_info);
+
+ VkResult err = vkCreateBuffer(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER), &vk_buffer);
ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
+ err = vmaAllocateMemoryForBuffer(allocator, vk_buffer, &alloc_create_info, &allocation, &alloc_info);
+ ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't allocate memory for buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
+ err = vmaBindBufferMemory2(allocator, allocation, 0, vk_buffer, NULL);
+ ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't bind memory to buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
// Bookkeep.
-
BufferInfo *buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
buf_info->vk_buffer = vk_buffer;
buf_info->allocation.handle = allocation;
@@ -1320,7 +1482,7 @@ bool RenderingDeviceDriverVulkan::buffer_set_texel_format(BufferID p_buffer, Dat
view_create_info.format = RD_TO_VK_FORMAT[p_format];
view_create_info.range = buf_info->allocation.size;
- VkResult res = vkCreateBufferView(vk_device, &view_create_info, nullptr, &buf_info->vk_view);
+ VkResult res = vkCreateBufferView(vk_device, &view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW), &buf_info->vk_view);
ERR_FAIL_COND_V_MSG(res, false, "Unable to create buffer view, error " + itos(res) + ".");
return true;
@@ -1329,9 +1491,12 @@ bool RenderingDeviceDriverVulkan::buffer_set_texel_format(BufferID p_buffer, Dat
void RenderingDeviceDriverVulkan::buffer_free(BufferID p_buffer) {
BufferInfo *buf_info = (BufferInfo *)p_buffer.id;
if (buf_info->vk_view) {
- vkDestroyBufferView(vk_device, buf_info->vk_view, nullptr);
+ vkDestroyBufferView(vk_device, buf_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW));
}
- vmaDestroyBuffer(allocator, buf_info->vk_buffer, buf_info->allocation.handle);
+
+ vkDestroyBuffer(vk_device, buf_info->vk_buffer, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER));
+ vmaFreeMemory(allocator, buf_info->allocation.handle);
+
VersatileResource::free(resources_allocator, buf_info);
}
@@ -1502,7 +1667,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat &
VmaAllocationCreateInfo alloc_create_info = {};
alloc_create_info.flags = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT : 0;
- alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
+ alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
if (image_size <= SMALL_ALLOCATION_MAX_SIZE) {
uint32_t mem_type_index = 0;
vmaFindMemoryTypeIndexForImageInfo(allocator, &create_info, &alloc_create_info, &mem_type_index);
@@ -1514,8 +1679,13 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat &
VkImage vk_image = VK_NULL_HANDLE;
VmaAllocation allocation = nullptr;
VmaAllocationInfo alloc_info = {};
- VkResult err = vmaCreateImage(allocator, &create_info, &alloc_create_info, &vk_image, &allocation, &alloc_info);
- ERR_FAIL_COND_V_MSG(err, TextureID(), "vmaCreateImage failed with error " + itos(err) + ".");
+
+ VkResult err = vkCreateImage(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE), &vk_image);
+ ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImage failed with error " + itos(err) + ".");
+ err = vmaAllocateMemoryForImage(allocator, vk_image, &alloc_create_info, &allocation, &alloc_info);
+ ERR_FAIL_COND_V_MSG(err, TextureID(), "Can't allocate memory for image, error: " + itos(err) + ".");
+ err = vmaBindImageMemory2(allocator, allocation, 0, vk_image, NULL);
+ ERR_FAIL_COND_V_MSG(err, TextureID(), "Can't bind memory to image, error: " + itos(err) + ".");
// Create view.
@@ -1536,16 +1706,28 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat &
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
+ VkImageViewASTCDecodeModeEXT decode_mode;
+ if (enabled_device_extension_names.has(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME)) {
+ if (image_view_create_info.format >= VK_FORMAT_ASTC_4x4_UNORM_BLOCK && image_view_create_info.format <= VK_FORMAT_ASTC_12x12_SRGB_BLOCK) {
+ decode_mode.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT;
+ decode_mode.pNext = nullptr;
+ decode_mode.decodeMode = VK_FORMAT_R8G8B8A8_UNORM;
+ image_view_create_info.pNext = &decode_mode;
+ }
+ }
+
VkImageView vk_image_view = VK_NULL_HANDLE;
- err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &vk_image_view);
+ err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &vk_image_view);
if (err) {
- vmaDestroyImage(allocator, vk_image, allocation);
+ vkDestroyImage(vk_device, vk_image, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE));
+ vmaFreeMemory(allocator, allocation);
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
}
// Bookkeep.
TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);
+ tex_info->vk_image = vk_image;
tex_info->vk_view = vk_image_view;
tex_info->rd_format = p_format.format;
tex_info->vk_create_info = create_info;
@@ -1579,7 +1761,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_from_extension(uint64
image_view_create_info.subresourceRange.aspectMask = p_depth_stencil ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
VkImageView vk_image_view = VK_NULL_HANDLE;
- VkResult err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &vk_image_view);
+ VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &vk_image_view);
if (err) {
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
}
@@ -1634,7 +1816,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or
}
VkImageView new_vk_image_view = VK_NULL_HANDLE;
- VkResult err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &new_vk_image_view);
+ VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &new_vk_image_view);
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
// Bookkeep.
@@ -1687,7 +1869,7 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex
image_view_create_info.subresourceRange.layerCount = p_layers;
VkImageView new_vk_image_view = VK_NULL_HANDLE;
- VkResult err = vkCreateImageView(vk_device, &image_view_create_info, nullptr, &new_vk_image_view);
+ VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &new_vk_image_view);
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
// Bookkeep.
@@ -1707,9 +1889,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex
void RenderingDeviceDriverVulkan::texture_free(TextureID p_texture) {
TextureInfo *tex_info = (TextureInfo *)p_texture.id;
- vkDestroyImageView(vk_device, tex_info->vk_view, nullptr);
+ vkDestroyImageView(vk_device, tex_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW));
if (tex_info->allocation.handle) {
- vmaDestroyImage(allocator, tex_info->vk_view_create_info.image, tex_info->allocation.handle);
+ vkDestroyImage(vk_device, tex_info->vk_image, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER));
+ vmaFreeMemory(allocator, tex_info->allocation.handle);
}
VersatileResource::free(resources_allocator, tex_info);
}
@@ -1788,7 +1971,7 @@ uint8_t *RenderingDeviceDriverVulkan::texture_map(TextureID p_texture, const Tex
void RenderingDeviceDriverVulkan::texture_unmap(TextureID p_texture) {
const TextureInfo *tex_info = (const TextureInfo *)p_texture.id;
- vkUnmapMemory(vk_device, tex_info->allocation.info.deviceMemory);
+ vmaUnmapMemory(allocator, tex_info->allocation.handle);
}
BitField<RDD::TextureUsageBits> RenderingDeviceDriverVulkan::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) {
@@ -1869,14 +2052,14 @@ RDD::SamplerID RenderingDeviceDriverVulkan::sampler_create(const SamplerState &p
sampler_create_info.unnormalizedCoordinates = p_state.unnormalized_uvw;
VkSampler vk_sampler = VK_NULL_HANDLE;
- VkResult res = vkCreateSampler(vk_device, &sampler_create_info, nullptr, &vk_sampler);
+ VkResult res = vkCreateSampler(vk_device, &sampler_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SAMPLER), &vk_sampler);
ERR_FAIL_COND_V_MSG(res, SamplerID(), "vkCreateSampler failed with error " + itos(res) + ".");
return SamplerID(vk_sampler);
}
void RenderingDeviceDriverVulkan::sampler_free(SamplerID p_sampler) {
- vkDestroySampler(vk_device, (VkSampler)p_sampler.id, nullptr);
+ vkDestroySampler(vk_device, (VkSampler)p_sampler.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SAMPLER));
}
bool RenderingDeviceDriverVulkan::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) {
@@ -2051,7 +2234,7 @@ RDD::FenceID RenderingDeviceDriverVulkan::fence_create() {
VkFence vk_fence = VK_NULL_HANDLE;
VkFenceCreateInfo create_info = {};
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
- VkResult err = vkCreateFence(vk_device, &create_info, nullptr, &vk_fence);
+ VkResult err = vkCreateFence(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FENCE), &vk_fence);
ERR_FAIL_COND_V(err != VK_SUCCESS, FenceID());
Fence *fence = memnew(Fence);
@@ -2062,10 +2245,13 @@ RDD::FenceID RenderingDeviceDriverVulkan::fence_create() {
Error RenderingDeviceDriverVulkan::fence_wait(FenceID p_fence) {
Fence *fence = (Fence *)(p_fence.id);
- VkResult err = vkWaitForFences(vk_device, 1, &fence->vk_fence, VK_TRUE, UINT64_MAX);
- ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
+ VkResult fence_status = vkGetFenceStatus(vk_device, fence->vk_fence);
+ if (fence_status == VK_NOT_READY) {
+ VkResult err = vkWaitForFences(vk_device, 1, &fence->vk_fence, VK_TRUE, UINT64_MAX);
+ ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
+ }
- err = vkResetFences(vk_device, 1, &fence->vk_fence);
+ VkResult err = vkResetFences(vk_device, 1, &fence->vk_fence);
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
if (fence->queue_signaled_from != nullptr) {
@@ -2090,7 +2276,7 @@ Error RenderingDeviceDriverVulkan::fence_wait(FenceID p_fence) {
void RenderingDeviceDriverVulkan::fence_free(FenceID p_fence) {
Fence *fence = (Fence *)(p_fence.id);
- vkDestroyFence(vk_device, fence->vk_fence, nullptr);
+ vkDestroyFence(vk_device, fence->vk_fence, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FENCE));
memdelete(fence);
}
@@ -2102,14 +2288,14 @@ RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create() {
VkSemaphore semaphore = VK_NULL_HANDLE;
VkSemaphoreCreateInfo create_info = {};
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
- VkResult err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore);
+ VkResult err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore);
ERR_FAIL_COND_V(err != VK_SUCCESS, SemaphoreID());
return SemaphoreID(semaphore);
}
void RenderingDeviceDriverVulkan::semaphore_free(SemaphoreID p_semaphore) {
- vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), nullptr);
+ vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
}
/******************/
@@ -2236,7 +2422,7 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
for (uint32_t i = 0; i < frame_count; i++) {
- err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore);
+ err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore);
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
command_queue->present_semaphores.push_back(semaphore);
}
@@ -2263,6 +2449,11 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu
device_queue.submit_mutex.lock();
err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence);
device_queue.submit_mutex.unlock();
+
+ if (err == VK_ERROR_DEVICE_LOST) {
+ print_lost_device_info();
+ CRASH_NOW_MSG("Vulkan device was lost.");
+ }
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) {
@@ -2354,12 +2545,12 @@ void RenderingDeviceDriverVulkan::command_queue_free(CommandQueueID p_cmd_queue)
// Erase all the semaphores used for presentation.
for (VkSemaphore semaphore : command_queue->present_semaphores) {
- vkDestroySemaphore(vk_device, semaphore, nullptr);
+ vkDestroySemaphore(vk_device, semaphore, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
}
// Erase all the semaphores used for image acquisition.
for (VkSemaphore semaphore : command_queue->image_semaphores) {
- vkDestroySemaphore(vk_device, semaphore, nullptr);
+ vkDestroySemaphore(vk_device, semaphore, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
}
// Retrieve the queue family corresponding to the virtual queue.
@@ -2387,7 +2578,7 @@ RDD::CommandPoolID RenderingDeviceDriverVulkan::command_pool_create(CommandQueue
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkCommandPool vk_command_pool = VK_NULL_HANDLE;
- VkResult res = vkCreateCommandPool(vk_device, &cmd_pool_info, nullptr, &vk_command_pool);
+ VkResult res = vkCreateCommandPool(vk_device, &cmd_pool_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL), &vk_command_pool);
ERR_FAIL_COND_V_MSG(res, CommandPoolID(), "vkCreateCommandPool failed with error " + itos(res) + ".");
CommandPool *command_pool = memnew(CommandPool);
@@ -2400,7 +2591,7 @@ void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) {
DEV_ASSERT(p_cmd_pool);
CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id);
- vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, nullptr);
+ vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL));
memdelete(command_pool);
}
@@ -2480,7 +2671,7 @@ void RenderingDeviceDriverVulkan::_swap_chain_release(SwapChain *swap_chain) {
}
for (VkImageView view : swap_chain->image_views) {
- vkDestroyImageView(vk_device, view, nullptr);
+ vkDestroyImageView(vk_device, view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW));
}
swap_chain->image_index = UINT_MAX;
@@ -2489,7 +2680,7 @@ void RenderingDeviceDriverVulkan::_swap_chain_release(SwapChain *swap_chain) {
swap_chain->framebuffers.clear();
if (swap_chain->vk_swapchain != VK_NULL_HANDLE) {
- device_functions.DestroySwapchainKHR(vk_device, swap_chain->vk_swapchain, nullptr);
+ device_functions.DestroySwapchainKHR(vk_device, swap_chain->vk_swapchain, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SWAPCHAIN_KHR));
swap_chain->vk_swapchain = VK_NULL_HANDLE;
}
@@ -2571,7 +2762,7 @@ RenderingDeviceDriver::SwapChainID RenderingDeviceDriverVulkan::swap_chain_creat
pass_info.pSubpasses = &subpass;
VkRenderPass render_pass = VK_NULL_HANDLE;
- err = _create_render_pass(vk_device, &pass_info, nullptr, &render_pass);
+ err = _create_render_pass(vk_device, &pass_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &render_pass);
ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID());
SwapChain *swap_chain = memnew(SwapChain);
@@ -2714,7 +2905,7 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue,
swap_create_info.compositeAlpha = composite_alpha;
swap_create_info.presentMode = present_mode;
swap_create_info.clipped = true;
- err = device_functions.CreateSwapchainKHR(vk_device, &swap_create_info, nullptr, &swap_chain->vk_swapchain);
+ err = device_functions.CreateSwapchainKHR(vk_device, &swap_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SWAPCHAIN_KHR), &swap_chain->vk_swapchain);
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
uint32_t image_count = 0;
@@ -2742,7 +2933,7 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue,
VkImageView image_view;
for (uint32_t i = 0; i < image_count; i++) {
view_create_info.image = swap_chain->images[i];
- err = vkCreateImageView(vk_device, &view_create_info, nullptr, &image_view);
+ err = vkCreateImageView(vk_device, &view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &image_view);
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
swap_chain->image_views.push_back(image_view);
@@ -2761,7 +2952,7 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue,
VkFramebuffer framebuffer;
for (uint32_t i = 0; i < image_count; i++) {
fb_create_info.pAttachments = &swap_chain->image_views[i];
- err = vkCreateFramebuffer(vk_device, &fb_create_info, nullptr, &framebuffer);
+ err = vkCreateFramebuffer(vk_device, &fb_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &framebuffer);
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
swap_chain->framebuffers.push_back(RDD::FramebufferID(framebuffer));
@@ -2792,7 +2983,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::swap_chain_acquire_framebuffer(C
// Add a new semaphore if none are free.
VkSemaphoreCreateInfo create_info = {};
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
- err = vkCreateSemaphore(vk_device, &create_info, nullptr, &semaphore);
+ err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore);
ERR_FAIL_COND_V(err != VK_SUCCESS, FramebufferID());
semaphore_index = command_queue->image_semaphores.size();
@@ -2864,7 +3055,7 @@ void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) {
_swap_chain_release(swap_chain);
if (swap_chain->render_pass.id != 0) {
- vkDestroyRenderPass(vk_device, VkRenderPass(swap_chain->render_pass.id), nullptr);
+ vkDestroyRenderPass(vk_device, VkRenderPass(swap_chain->render_pass.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS));
}
memdelete(swap_chain);
@@ -2890,7 +3081,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID
framebuffer_create_info.layers = 1;
VkFramebuffer vk_framebuffer = VK_NULL_HANDLE;
- VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, nullptr, &vk_framebuffer);
+ VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &vk_framebuffer);
ERR_FAIL_COND_V_MSG(err, FramebufferID(), "vkCreateFramebuffer failed with error " + itos(err) + ".");
#if PRINT_NATIVE_COMMANDS
@@ -2905,7 +3096,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID
}
void RenderingDeviceDriverVulkan::framebuffer_free(FramebufferID p_framebuffer) {
- vkDestroyFramebuffer(vk_device, (VkFramebuffer)p_framebuffer.id, nullptr);
+ vkDestroyFramebuffer(vk_device, (VkFramebuffer)p_framebuffer.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER));
}
/****************/
@@ -3282,7 +3473,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
shader_module_create_info.pCode = (const uint32_t *)stages_spirv[i].ptr();
VkShaderModule vk_module = VK_NULL_HANDLE;
- VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, nullptr, &vk_module);
+ VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module);
if (res) {
error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[r_shader_desc.stages[i]]);
break;
@@ -3309,7 +3500,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
layout_create_info.pBindings = vk_set_bindings[i].ptr();
VkDescriptorSetLayout layout = VK_NULL_HANDLE;
- VkResult res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, nullptr, &layout);
+ VkResult res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout);
if (res) {
error_text = "Error (" + itos(res) + ") creating descriptor set layout for set " + itos(i);
break;
@@ -3336,7 +3527,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
pipeline_layout_create_info.pPushConstantRanges = push_constant_range;
}
- VkResult err = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, nullptr, &shader_info.vk_pipeline_layout);
+ VkResult err = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout);
if (err) {
error_text = "Error (" + itos(err) + ") creating pipeline layout.";
}
@@ -3345,10 +3536,10 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec
if (!error_text.is_empty()) {
// Clean up if failed.
for (uint32_t i = 0; i < shader_info.vk_stages_create_info.size(); i++) {
- vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, nullptr);
+ vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE));
}
for (uint32_t i = 0; i < binary_data.set_count; i++) {
- vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], nullptr);
+ vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT));
}
ERR_FAIL_V_MSG(ShaderID(), error_text);
@@ -3365,18 +3556,29 @@ void RenderingDeviceDriverVulkan::shader_free(ShaderID p_shader) {
ShaderInfo *shader_info = (ShaderInfo *)p_shader.id;
for (uint32_t i = 0; i < shader_info->vk_descriptor_set_layouts.size(); i++) {
- vkDestroyDescriptorSetLayout(vk_device, shader_info->vk_descriptor_set_layouts[i], nullptr);
+ vkDestroyDescriptorSetLayout(vk_device, shader_info->vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT));
}
- vkDestroyPipelineLayout(vk_device, shader_info->vk_pipeline_layout, nullptr);
+ vkDestroyPipelineLayout(vk_device, shader_info->vk_pipeline_layout, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT));
- for (uint32_t i = 0; i < shader_info->vk_stages_create_info.size(); i++) {
- vkDestroyShaderModule(vk_device, shader_info->vk_stages_create_info[i].module, nullptr);
- }
+ shader_destroy_modules(p_shader);
VersatileResource::free(resources_allocator, shader_info);
}
+void RenderingDeviceDriverVulkan::shader_destroy_modules(ShaderID p_shader) {
+ ShaderInfo *si = (ShaderInfo *)p_shader.id;
+
+ for (uint32_t i = 0; i < si->vk_stages_create_info.size(); i++) {
+ if (si->vk_stages_create_info[i].module) {
+ vkDestroyShaderModule(vk_device, si->vk_stages_create_info[i].module,
+ VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE));
+ si->vk_stages_create_info[i].module = VK_NULL_HANDLE;
+ }
+ }
+ si->vk_stages_create_info.clear();
+}
+
/*********************/
/**** UNIFORM SET ****/
/*********************/
@@ -3474,7 +3676,7 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_find_or_creat
descriptor_set_pool_create_info.pPoolSizes = vk_sizes;
VkDescriptorPool vk_pool = VK_NULL_HANDLE;
- VkResult res = vkCreateDescriptorPool(vk_device, &descriptor_set_pool_create_info, nullptr, &vk_pool);
+ VkResult res = vkCreateDescriptorPool(vk_device, &descriptor_set_pool_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL), &vk_pool);
if (res) {
ERR_FAIL_COND_V_MSG(res, VK_NULL_HANDLE, "vkCreateDescriptorPool failed with error " + itos(res) + ".");
}
@@ -3494,7 +3696,7 @@ void RenderingDeviceDriverVulkan::_descriptor_set_pool_unreference(DescriptorSet
HashMap<VkDescriptorPool, uint32_t>::Iterator pool_rcs_it = p_pool_sets_it->value.find(p_vk_descriptor_pool);
pool_rcs_it->value--;
if (pool_rcs_it->value == 0) {
- vkDestroyDescriptorPool(vk_device, p_vk_descriptor_pool, nullptr);
+ vkDestroyDescriptorPool(vk_device, p_vk_descriptor_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL));
p_pool_sets_it->value.erase(p_vk_descriptor_pool);
if (p_pool_sets_it->value.is_empty()) {
descriptor_set_pools.remove(p_pool_sets_it);
@@ -3839,7 +4041,7 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID
/******************/
void RenderingDeviceDriverVulkan::pipeline_free(PipelineID p_pipeline) {
- vkDestroyPipeline(vk_device, (VkPipeline)p_pipeline.id, nullptr);
+ vkDestroyPipeline(vk_device, (VkPipeline)p_pipeline.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE));
}
// ----- BINDING -----
@@ -3904,7 +4106,7 @@ bool RenderingDeviceDriverVulkan::pipeline_cache_create(const Vector<uint8_t> &p
cache_info.flags = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
}
- VkResult err = vkCreatePipelineCache(vk_device, &cache_info, nullptr, &pipelines_cache.vk_cache);
+ VkResult err = vkCreatePipelineCache(vk_device, &cache_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_CACHE), &pipelines_cache.vk_cache);
if (err != VK_SUCCESS) {
WARN_PRINT("vkCreatePipelinecache failed with error " + itos(err) + ".");
return false;
@@ -3917,7 +4119,7 @@ bool RenderingDeviceDriverVulkan::pipeline_cache_create(const Vector<uint8_t> &p
void RenderingDeviceDriverVulkan::pipeline_cache_free() {
DEV_ASSERT(pipelines_cache.vk_cache);
- vkDestroyPipelineCache(vk_device, pipelines_cache.vk_cache, nullptr);
+ vkDestroyPipelineCache(vk_device, pipelines_cache.vk_cache, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_CACHE));
pipelines_cache.vk_cache = VK_NULL_HANDLE;
DEV_ASSERT(caching_instance_count > 0);
@@ -4101,14 +4303,14 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Att
}
VkRenderPass vk_render_pass = VK_NULL_HANDLE;
- VkResult res = _create_render_pass(vk_device, &create_info, nullptr, &vk_render_pass);
+ VkResult res = _create_render_pass(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass);
ERR_FAIL_COND_V_MSG(res, RenderPassID(), "vkCreateRenderPass2KHR failed with error " + itos(res) + ".");
return RenderPassID(vk_render_pass);
}
void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) {
- vkDestroyRenderPass(vk_device, (VkRenderPass)p_render_pass.id, nullptr);
+ vkDestroyRenderPass(vk_device, (VkRenderPass)p_render_pass.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS));
}
// ----- COMMANDS -----
@@ -4550,6 +4752,8 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create(
pipeline_create_info.pNext = graphics_pipeline_nextptr;
pipeline_create_info.stageCount = shader_info->vk_stages_create_info.size();
+ ERR_FAIL_COND_V_MSG(pipeline_create_info.stageCount == 0, PipelineID(),
+ "Cannot create pipeline without shader module, please make sure shader modules are destroyed only after all associated pipelines are created.");
VkPipelineShaderStageCreateInfo *vk_pipeline_stages = ALLOCA_ARRAY(VkPipelineShaderStageCreateInfo, shader_info->vk_stages_create_info.size());
for (uint32_t i = 0; i < shader_info->vk_stages_create_info.size(); i++) {
@@ -4592,7 +4796,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create(
// ---
VkPipeline vk_pipeline = VK_NULL_HANDLE;
- VkResult err = vkCreateGraphicsPipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &vk_pipeline);
+ VkResult err = vkCreateGraphicsPipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE), &vk_pipeline);
ERR_FAIL_COND_V_MSG(err, PipelineID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + ".");
return PipelineID(vk_pipeline);
@@ -4653,7 +4857,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::compute_pipeline_create(ShaderID p_
}
VkPipeline vk_pipeline = VK_NULL_HANDLE;
- VkResult err = vkCreateComputePipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &vk_pipeline);
+ VkResult err = vkCreateComputePipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE), &vk_pipeline);
ERR_FAIL_COND_V_MSG(err, PipelineID(), "vkCreateComputePipelines failed with error " + itos(err) + ".");
return PipelineID(vk_pipeline);
@@ -4672,12 +4876,12 @@ RDD::QueryPoolID RenderingDeviceDriverVulkan::timestamp_query_pool_create(uint32
query_pool_create_info.queryCount = p_query_count;
VkQueryPool vk_query_pool = VK_NULL_HANDLE;
- vkCreateQueryPool(vk_device, &query_pool_create_info, nullptr, &vk_query_pool);
+ vkCreateQueryPool(vk_device, &query_pool_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_QUERY_POOL), &vk_query_pool);
return RDD::QueryPoolID(vk_query_pool);
}
void RenderingDeviceDriverVulkan::timestamp_query_pool_free(QueryPoolID p_pool_id) {
- vkDestroyQueryPool(vk_device, (VkQueryPool)p_pool_id.id, nullptr);
+ vkDestroyQueryPool(vk_device, (VkQueryPool)p_pool_id.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_QUERY_POOL));
}
void RenderingDeviceDriverVulkan::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) {
@@ -4732,6 +4936,21 @@ void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_
void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) {
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
+ if (!functions.CmdBeginDebugUtilsLabelEXT) {
+ if (functions.CmdDebugMarkerBeginEXT) {
+ // Debug marker extensions.
+ VkDebugMarkerMarkerInfoEXT marker;
+ marker.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
+ marker.pNext = nullptr;
+ marker.pMarkerName = p_label_name;
+ marker.color[0] = p_color[0];
+ marker.color[1] = p_color[1];
+ marker.color[2] = p_color[2];
+ marker.color[3] = p_color[3];
+ functions.CmdDebugMarkerBeginEXT((VkCommandBuffer)p_cmd_buffer.id, &marker);
+ }
+ return;
+ }
VkDebugUtilsLabelEXT label;
label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
label.pNext = nullptr;
@@ -4745,9 +4964,167 @@ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buff
void RenderingDeviceDriverVulkan::command_end_label(CommandBufferID p_cmd_buffer) {
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
+ if (!functions.CmdEndDebugUtilsLabelEXT) {
+ if (functions.CmdDebugMarkerEndEXT) {
+ // Debug marker extensions.
+ functions.CmdDebugMarkerEndEXT((VkCommandBuffer)p_cmd_buffer.id);
+ }
+ return;
+ }
functions.CmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id);
}
+/****************/
+/**** DEBUG *****/
+/****************/
+void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) {
+ if (p_data == BreadcrumbMarker::NONE) {
+ return;
+ }
+ vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, 0, sizeof(uint32_t), p_data);
+}
+
+void RenderingDeviceDriverVulkan::on_device_lost() const {
+ if (device_functions.GetDeviceFaultInfoEXT == nullptr) {
+ _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "VK_EXT_device_fault not available.");
+ return;
+ }
+
+ VkDeviceFaultCountsEXT fault_counts = {};
+ fault_counts.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT;
+ VkResult vkres = device_functions.GetDeviceFaultInfoEXT(vk_device, &fault_counts, nullptr);
+
+ if (vkres != VK_SUCCESS) {
+ _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "vkGetDeviceFaultInfoEXT returned " + itos(vkres) + " when getting fault count, skipping VK_EXT_device_fault report...");
+ return;
+ }
+
+ String err_msg;
+ VkDeviceFaultInfoEXT fault_info = {};
+ fault_info.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT;
+ fault_info.pVendorInfos = fault_counts.vendorInfoCount
+ ? (VkDeviceFaultVendorInfoEXT *)memalloc(fault_counts.vendorInfoCount * sizeof(VkDeviceFaultVendorInfoEXT))
+ : nullptr;
+ fault_info.pAddressInfos =
+ fault_counts.addressInfoCount
+ ? (VkDeviceFaultAddressInfoEXT *)memalloc(fault_counts.addressInfoCount * sizeof(VkDeviceFaultAddressInfoEXT))
+ : nullptr;
+ fault_counts.vendorBinarySize = 0;
+ vkres = device_functions.GetDeviceFaultInfoEXT(vk_device, &fault_counts, &fault_info);
+ if (vkres != VK_SUCCESS) {
+ _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "vkGetDeviceFaultInfoEXT returned " + itos(vkres) + " when getting fault info, skipping VK_EXT_device_fault report...");
+ } else {
+ err_msg += "** Report from VK_EXT_device_fault **";
+ err_msg += "\nDescription: " + String(fault_info.description);
+ err_msg += "\nVendor infos:";
+ for (uint32_t vd = 0; vd < fault_counts.vendorInfoCount; ++vd) {
+ const VkDeviceFaultVendorInfoEXT *vendor_info = &fault_info.pVendorInfos[vd];
+ err_msg += "\nInfo " + itos(vd);
+ err_msg += "\n Description: " + String(vendor_info->description);
+ err_msg += "\n Fault code : " + itos(vendor_info->vendorFaultCode);
+ err_msg += "\n Fault data : " + itos(vendor_info->vendorFaultData);
+ }
+
+ static constexpr const char *addressTypeNames[] = {
+ "NONE",
+ "READ_INVALID",
+ "WRITE_INVALID",
+ "EXECUTE_INVALID",
+ "INSTRUCTION_POINTER_UNKNOWN",
+ "INSTRUCTION_POINTER_INVALID",
+ "INSTRUCTION_POINTER_FAULT",
+ };
+ err_msg += "\nAddresses info:";
+ for (uint32_t ad = 0; ad < fault_counts.addressInfoCount; ++ad) {
+ const VkDeviceFaultAddressInfoEXT *addr_info = &fault_info.pAddressInfos[ad];
+ // From https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDeviceFaultAddressInfoEXT.html
+ const VkDeviceAddress lower = (addr_info->reportedAddress & ~(addr_info->addressPrecision - 1));
+ const VkDeviceAddress upper = (addr_info->reportedAddress | (addr_info->addressPrecision - 1));
+ err_msg += "\nInfo " + itos(ad);
+ err_msg += "\n Type : " + String(addressTypeNames[addr_info->addressType]);
+ err_msg += "\n Reported address: " + itos(addr_info->reportedAddress);
+ err_msg += "\n Lower address : " + itos(lower);
+ err_msg += "\n Upper address : " + itos(upper);
+ err_msg += "\n Precision : " + itos(addr_info->addressPrecision);
+ }
+ }
+
+ _err_print_error(FUNCTION_STR, __FILE__, __LINE__, err_msg);
+
+ if (fault_info.pVendorInfos) {
+ memfree(fault_info.pVendorInfos);
+ }
+ if (fault_info.pAddressInfos) {
+ memfree(fault_info.pAddressInfos);
+ }
+
+ _err_print_error(FUNCTION_STR, __FILE__, __LINE__, context_driver->get_driver_and_device_memory_report());
+}
+
+void RenderingDeviceDriverVulkan::print_lost_device_info() {
+#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
+ void *breadcrumb_ptr;
+ vmaFlushAllocation(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, 0, sizeof(uint32_t));
+ vmaInvalidateAllocation(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, 0, sizeof(uint32_t));
+
+ vmaMapMemory(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, &breadcrumb_ptr);
+ uint32_t last_breadcrumb = *(uint32_t *)breadcrumb_ptr;
+ vmaUnmapMemory(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle);
+ uint32_t phase = last_breadcrumb & uint32_t(~((1 << 16) - 1));
+ uint32_t user_data = last_breadcrumb & ((1 << 16) - 1);
+ String error_msg = "Last known breadcrumb: ";
+
+ switch (phase) {
+ case BreadcrumbMarker::ALPHA_PASS:
+ error_msg += "ALPHA_PASS";
+ break;
+ case BreadcrumbMarker::BLIT_PASS:
+ error_msg += "BLIT_PASS";
+ break;
+ case BreadcrumbMarker::DEBUG_PASS:
+ error_msg += "DEBUG_PASS";
+ break;
+ case BreadcrumbMarker::LIGHTMAPPER_PASS:
+ error_msg += "LIGHTMAPPER_PASS";
+ break;
+ case BreadcrumbMarker::OPAQUE_PASS:
+ error_msg += "OPAQUE_PASS";
+ break;
+ case BreadcrumbMarker::POST_PROCESSING_PASS:
+ error_msg += "POST_PROCESSING_PASS";
+ break;
+ case BreadcrumbMarker::REFLECTION_PROBES:
+ error_msg += "REFLECTION_PROBES";
+ break;
+ case BreadcrumbMarker::SHADOW_PASS_CUBE:
+ error_msg += "SHADOW_PASS_CUBE";
+ break;
+ case BreadcrumbMarker::SHADOW_PASS_DIRECTIONAL:
+ error_msg += "SHADOW_PASS_DIRECTIONAL";
+ break;
+ case BreadcrumbMarker::SKY_PASS:
+ error_msg += "SKY_PASS";
+ break;
+ case BreadcrumbMarker::TRANSPARENT_PASS:
+ error_msg += "TRANSPARENT_PASS";
+ break;
+ case BreadcrumbMarker::UI_PASS:
+ error_msg += "UI_PASS";
+ break;
+ default:
+ error_msg += "UNKNOWN_BREADCRUMB(" + itos((uint32_t)phase) + ')';
+ break;
+ }
+
+ if (user_data != 0) {
+ error_msg += " | User data: " + itos(user_data);
+ }
+
+ _err_print_error(FUNCTION_STR, __FILE__, __LINE__, error_msg);
+#endif
+ on_device_lost();
+}
+
/********************/
/**** SUBMISSION ****/
/********************/
@@ -5010,9 +5387,12 @@ RenderingDeviceDriverVulkan::RenderingDeviceDriverVulkan(RenderingContextDriverV
DEV_ASSERT(p_context_driver != nullptr);
context_driver = p_context_driver;
+ max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool");
}
RenderingDeviceDriverVulkan::~RenderingDeviceDriverVulkan() {
+ buffer_free(breadcrumb_buffer);
+
while (small_allocs_pools.size()) {
HashMap<uint32_t, VmaPool>::Iterator E = small_allocs_pools.begin();
vmaDestroyPool(allocator, E->value);
@@ -5021,6 +5401,6 @@ RenderingDeviceDriverVulkan::~RenderingDeviceDriverVulkan() {
vmaDestroyAllocator(allocator);
if (vk_device != VK_NULL_HANDLE) {
- vkDestroyDevice(vk_device, nullptr);
+ vkDestroyDevice(vk_device, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DEVICE));
}
}
diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h
index 6847ae00be..2615d9824d 100644
--- a/drivers/vulkan/rendering_device_driver_vulkan.h
+++ b/drivers/vulkan/rendering_device_driver_vulkan.h
@@ -111,7 +111,18 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver {
PFN_vkAcquireNextImageKHR AcquireNextImageKHR = nullptr;
PFN_vkQueuePresentKHR QueuePresentKHR = nullptr;
PFN_vkCreateRenderPass2KHR CreateRenderPass2KHR = nullptr;
+
+ // Debug marker extensions.
+ PFN_vkCmdDebugMarkerBeginEXT CmdDebugMarkerBeginEXT = nullptr;
+ PFN_vkCmdDebugMarkerEndEXT CmdDebugMarkerEndEXT = nullptr;
+ PFN_vkCmdDebugMarkerInsertEXT CmdDebugMarkerInsertEXT = nullptr;
+ PFN_vkDebugMarkerSetObjectNameEXT DebugMarkerSetObjectNameEXT = nullptr;
+
+ // Debug device fault.
+ PFN_vkGetDeviceFaultInfoEXT GetDeviceFaultInfoEXT = nullptr;
};
+ // Debug marker extensions.
+ VkDebugReportObjectTypeEXT _convert_to_debug_report_objectType(VkObjectType p_object_type);
VkDevice vk_device = VK_NULL_HANDLE;
RenderingContextDriverVulkan *context_driver = nullptr;
@@ -132,6 +143,10 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver {
ShaderCapabilities shader_capabilities;
StorageBufferCapabilities storage_buffer_capabilities;
bool pipeline_cache_control_support = false;
+ bool device_fault_support = false;
+#if defined(VK_TRACK_DEVICE_MEMORY)
+ bool device_memory_report_support = false;
+#endif
DeviceFunctions device_functions;
void _register_requested_device_extension(const CharString &p_extension_name, bool p_required);
@@ -160,10 +175,13 @@ private:
VmaPool _find_or_create_small_allocs_pool(uint32_t p_mem_type_index);
+private:
+ BufferID breadcrumb_buffer;
+
+public:
/*****************/
/**** BUFFERS ****/
/*****************/
-private:
struct BufferInfo {
VkBuffer vk_buffer = VK_NULL_HANDLE;
struct {
@@ -174,7 +192,6 @@ private:
VkBufferView vk_view = VK_NULL_HANDLE; // For texel buffers.
};
-public:
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) override final;
virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) override final;
virtual void buffer_free(BufferID p_buffer) override final;
@@ -187,6 +204,7 @@ public:
/*****************/
struct TextureInfo {
+ VkImage vk_image = VK_NULL_HANDLE;
VkImageView vk_view = VK_NULL_HANDLE;
DataFormat rd_format = DATA_FORMAT_MAX;
VkImageCreateInfo vk_create_info = {};
@@ -405,6 +423,7 @@ public:
virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final;
virtual void shader_free(ShaderID p_shader) override final;
+ virtual void shader_destroy_modules(ShaderID p_shader) override final;
/*********************/
/**** UNIFORM SET ****/
/*********************/
@@ -606,6 +625,13 @@ public:
virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final;
virtual void command_end_label(CommandBufferID p_cmd_buffer) override final;
+ /****************/
+ /**** DEBUG *****/
+ /****************/
+ virtual void command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) override final;
+ void print_lost_device_info();
+ void on_device_lost() const;
+
/********************/
/**** SUBMISSION ****/
/********************/
@@ -620,6 +646,7 @@ public:
virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final;
virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final;
virtual uint64_t get_total_memory_used() override final;
+
virtual uint64_t limit_get(Limit p_limit) override final;
virtual uint64_t api_trait_get(ApiTrait p_trait) override final;
virtual bool has_feature(Features p_feature) override final;
@@ -651,4 +678,6 @@ public:
virtual ~RenderingDeviceDriverVulkan();
};
+using VKC = RenderingContextDriverVulkan;
+
#endif // RENDERING_DEVICE_DRIVER_VULKAN_H
diff --git a/drivers/windows/dir_access_windows.cpp b/drivers/windows/dir_access_windows.cpp
index 63ba6a6c96..f7632842ed 100644
--- a/drivers/windows/dir_access_windows.cpp
+++ b/drivers/windows/dir_access_windows.cpp
@@ -35,6 +35,7 @@
#include "core/config/project_settings.h"
#include "core/os/memory.h"
+#include "core/os/os.h"
#include "core/string/print_string.h"
#include <stdio.h>
@@ -69,9 +70,19 @@ struct DirAccessWindowsPrivate {
};
String DirAccessWindows::fix_path(const String &p_path) const {
- String r_path = DirAccess::fix_path(p_path);
- if (r_path.is_absolute_path() && !r_path.is_network_share_path() && r_path.length() > MAX_PATH) {
- r_path = "\\\\?\\" + r_path.replace("/", "\\");
+ String r_path = DirAccess::fix_path(p_path.trim_prefix(R"(\\?\)").replace("\\", "/"));
+ if (r_path.ends_with(":")) {
+ r_path += "/";
+ }
+ if (r_path.is_relative_path()) {
+ r_path = current_dir.trim_prefix(R"(\\?\)").replace("\\", "/").path_join(r_path);
+ } else if (r_path == ".") {
+ r_path = current_dir.trim_prefix(R"(\\?\)").replace("\\", "/");
+ }
+ r_path = r_path.simplify_path();
+ r_path = r_path.replace("/", "\\");
+ if (!r_path.is_network_share_path() && !r_path.begins_with(R"(\\?\)")) {
+ r_path = R"(\\?\)" + r_path;
}
return r_path;
}
@@ -140,28 +151,33 @@ String DirAccessWindows::get_drive(int p_drive) {
Error DirAccessWindows::change_dir(String p_dir) {
GLOBAL_LOCK_FUNCTION
- p_dir = fix_path(p_dir);
+ String dir = fix_path(p_dir);
- WCHAR real_current_dir_name[2048];
- GetCurrentDirectoryW(2048, real_current_dir_name);
- String prev_dir = String::utf16((const char16_t *)real_current_dir_name);
+ Char16String real_current_dir_name;
+ size_t str_len = GetCurrentDirectoryW(0, nullptr);
+ real_current_dir_name.resize(str_len + 1);
+ GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw());
+ String prev_dir = String::utf16((const char16_t *)real_current_dir_name.get_data());
SetCurrentDirectoryW((LPCWSTR)(current_dir.utf16().get_data()));
- bool worked = (SetCurrentDirectoryW((LPCWSTR)(p_dir.utf16().get_data())) != 0);
+ bool worked = (SetCurrentDirectoryW((LPCWSTR)(dir.utf16().get_data())) != 0);
String base = _get_root_path();
if (!base.is_empty()) {
- GetCurrentDirectoryW(2048, real_current_dir_name);
- String new_dir = String::utf16((const char16_t *)real_current_dir_name).replace("\\", "/");
+ str_len = GetCurrentDirectoryW(0, nullptr);
+ real_current_dir_name.resize(str_len + 1);
+ GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw());
+ String new_dir = String::utf16((const char16_t *)real_current_dir_name.get_data()).trim_prefix(R"(\\?\)").replace("\\", "/");
if (!new_dir.begins_with(base)) {
worked = false;
}
}
if (worked) {
- GetCurrentDirectoryW(2048, real_current_dir_name);
- current_dir = String::utf16((const char16_t *)real_current_dir_name);
- current_dir = current_dir.replace("\\", "/");
+ str_len = GetCurrentDirectoryW(0, nullptr);
+ real_current_dir_name.resize(str_len + 1);
+ GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw());
+ current_dir = String::utf16((const char16_t *)real_current_dir_name.get_data());
}
SetCurrentDirectoryW((LPCWSTR)(prev_dir.utf16().get_data()));
@@ -172,12 +188,6 @@ Error DirAccessWindows::change_dir(String p_dir) {
Error DirAccessWindows::make_dir(String p_dir) {
GLOBAL_LOCK_FUNCTION
- p_dir = fix_path(p_dir);
- if (p_dir.is_relative_path()) {
- p_dir = current_dir.path_join(p_dir);
- p_dir = fix_path(p_dir);
- }
-
if (FileAccessWindows::is_path_invalid(p_dir)) {
#ifdef DEBUG_ENABLED
WARN_PRINT("The path :" + p_dir + " is a reserved Windows system pipe, so it can't be used for creating directories.");
@@ -185,12 +195,12 @@ Error DirAccessWindows::make_dir(String p_dir) {
return ERR_INVALID_PARAMETER;
}
- p_dir = p_dir.simplify_path().replace("/", "\\");
+ String dir = fix_path(p_dir);
bool success;
int err;
- success = CreateDirectoryW((LPCWSTR)(p_dir.utf16().get_data()), nullptr);
+ success = CreateDirectoryW((LPCWSTR)(dir.utf16().get_data()), nullptr);
err = GetLastError();
if (success) {
@@ -205,9 +215,10 @@ Error DirAccessWindows::make_dir(String p_dir) {
}
String DirAccessWindows::get_current_dir(bool p_include_drive) const {
+ String cdir = current_dir.trim_prefix(R"(\\?\)").replace("\\", "/");
String base = _get_root_path();
if (!base.is_empty()) {
- String bd = current_dir.replace("\\", "/").replace_first(base, "");
+ String bd = cdir.replace_first(base, "");
if (bd.begins_with("/")) {
return _get_root_string() + bd.substr(1, bd.length());
} else {
@@ -216,30 +227,25 @@ String DirAccessWindows::get_current_dir(bool p_include_drive) const {
}
if (p_include_drive) {
- return current_dir;
+ return cdir;
} else {
if (_get_root_string().is_empty()) {
- int pos = current_dir.find(":");
+ int pos = cdir.find(":");
if (pos != -1) {
- return current_dir.substr(pos + 1);
+ return cdir.substr(pos + 1);
}
}
- return current_dir;
+ return cdir;
}
}
bool DirAccessWindows::file_exists(String p_file) {
GLOBAL_LOCK_FUNCTION
- if (!p_file.is_absolute_path()) {
- p_file = get_current_dir().path_join(p_file);
- }
-
- p_file = fix_path(p_file);
+ String file = fix_path(p_file);
DWORD fileAttr;
-
- fileAttr = GetFileAttributesW((LPCWSTR)(p_file.utf16().get_data()));
+ fileAttr = GetFileAttributesW((LPCWSTR)(file.utf16().get_data()));
if (INVALID_FILE_ATTRIBUTES == fileAttr) {
return false;
}
@@ -250,14 +256,10 @@ bool DirAccessWindows::file_exists(String p_file) {
bool DirAccessWindows::dir_exists(String p_dir) {
GLOBAL_LOCK_FUNCTION
- if (p_dir.is_relative_path()) {
- p_dir = get_current_dir().path_join(p_dir);
- }
-
- p_dir = fix_path(p_dir);
+ String dir = fix_path(p_dir);
DWORD fileAttr;
- fileAttr = GetFileAttributesW((LPCWSTR)(p_dir.utf16().get_data()));
+ fileAttr = GetFileAttributesW((LPCWSTR)(dir.utf16().get_data()));
if (INVALID_FILE_ATTRIBUTES == fileAttr) {
return false;
}
@@ -265,66 +267,63 @@ bool DirAccessWindows::dir_exists(String p_dir) {
}
Error DirAccessWindows::rename(String p_path, String p_new_path) {
- if (p_path.is_relative_path()) {
- p_path = get_current_dir().path_join(p_path);
- }
-
- p_path = fix_path(p_path);
-
- if (p_new_path.is_relative_path()) {
- p_new_path = get_current_dir().path_join(p_new_path);
- }
-
- p_new_path = fix_path(p_new_path);
+ String path = fix_path(p_path);
+ String new_path = fix_path(p_new_path);
// If we're only changing file name case we need to do a little juggling
- if (p_path.to_lower() == p_new_path.to_lower()) {
- if (dir_exists(p_path)) {
+ if (path.to_lower() == new_path.to_lower()) {
+ if (dir_exists(path)) {
// The path is a dir; just rename
- return ::_wrename((LPCWSTR)(p_path.utf16().get_data()), (LPCWSTR)(p_new_path.utf16().get_data())) == 0 ? OK : FAILED;
+ return MoveFileW((LPCWSTR)(path.utf16().get_data()), (LPCWSTR)(new_path.utf16().get_data())) != 0 ? OK : FAILED;
}
// The path is a file; juggle
- WCHAR tmpfile[MAX_PATH];
-
- if (!GetTempFileNameW((LPCWSTR)(fix_path(get_current_dir()).utf16().get_data()), nullptr, 0, tmpfile)) {
- return FAILED;
+ // Note: do not use GetTempFileNameW, it's not long path aware!
+ Char16String tmpfile_utf16;
+ uint64_t id = OS::get_singleton()->get_ticks_usec();
+ while (true) {
+ tmpfile_utf16 = (path + itos(id++) + ".tmp").utf16();
+ HANDLE handle = CreateFileW((LPCWSTR)tmpfile_utf16.get_data(), GENERIC_WRITE, 0, NULL, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0);
+ if (handle != INVALID_HANDLE_VALUE) {
+ CloseHandle(handle);
+ break;
+ }
+ if (GetLastError() != ERROR_FILE_EXISTS && GetLastError() != ERROR_SHARING_VIOLATION) {
+ return FAILED;
+ }
}
- if (!::ReplaceFileW(tmpfile, (LPCWSTR)(p_path.utf16().get_data()), nullptr, 0, nullptr, nullptr)) {
- DeleteFileW(tmpfile);
+ if (!::ReplaceFileW((LPCWSTR)tmpfile_utf16.get_data(), (LPCWSTR)(path.utf16().get_data()), nullptr, 0, nullptr, nullptr)) {
+ DeleteFileW((LPCWSTR)tmpfile_utf16.get_data());
return FAILED;
}
- return ::_wrename(tmpfile, (LPCWSTR)(p_new_path.utf16().get_data())) == 0 ? OK : FAILED;
+ return MoveFileW((LPCWSTR)tmpfile_utf16.get_data(), (LPCWSTR)(new_path.utf16().get_data())) != 0 ? OK : FAILED;
} else {
- if (file_exists(p_new_path)) {
- if (remove(p_new_path) != OK) {
+ if (file_exists(new_path)) {
+ if (remove(new_path) != OK) {
return FAILED;
}
}
- return ::_wrename((LPCWSTR)(p_path.utf16().get_data()), (LPCWSTR)(p_new_path.utf16().get_data())) == 0 ? OK : FAILED;
+ return MoveFileW((LPCWSTR)(path.utf16().get_data()), (LPCWSTR)(new_path.utf16().get_data())) != 0 ? OK : FAILED;
}
}
Error DirAccessWindows::remove(String p_path) {
- if (p_path.is_relative_path()) {
- p_path = get_current_dir().path_join(p_path);
- }
-
- p_path = fix_path(p_path);
+ String path = fix_path(p_path);
+ const Char16String &path_utf16 = path.utf16();
DWORD fileAttr;
- fileAttr = GetFileAttributesW((LPCWSTR)(p_path.utf16().get_data()));
+ fileAttr = GetFileAttributesW((LPCWSTR)(path_utf16.get_data()));
if (INVALID_FILE_ATTRIBUTES == fileAttr) {
return FAILED;
}
if ((fileAttr & FILE_ATTRIBUTE_DIRECTORY)) {
- return ::_wrmdir((LPCWSTR)(p_path.utf16().get_data())) == 0 ? OK : FAILED;
+ return RemoveDirectoryW((LPCWSTR)(path_utf16.get_data())) != 0 ? OK : FAILED;
} else {
- return ::_wunlink((LPCWSTR)(p_path.utf16().get_data())) == 0 ? OK : FAILED;
+ return DeleteFileW((LPCWSTR)(path_utf16.get_data())) != 0 ? OK : FAILED;
}
}
@@ -339,16 +338,16 @@ uint64_t DirAccessWindows::get_space_left() {
}
String DirAccessWindows::get_filesystem_type() const {
- String path = fix_path(const_cast<DirAccessWindows *>(this)->get_current_dir());
-
- int unit_end = path.find(":");
- ERR_FAIL_COND_V(unit_end == -1, String());
- String unit = path.substr(0, unit_end + 1) + "\\";
+ String path = current_dir.trim_prefix(R"(\\?\)");
if (path.is_network_share_path()) {
return "Network Share";
}
+ int unit_end = path.find(":");
+ ERR_FAIL_COND_V(unit_end == -1, String());
+ String unit = path.substr(0, unit_end + 1) + "\\";
+
WCHAR szVolumeName[100];
WCHAR szFileSystemName[10];
DWORD dwSerialNumber = 0;
@@ -370,11 +369,7 @@ String DirAccessWindows::get_filesystem_type() const {
}
bool DirAccessWindows::is_case_sensitive(const String &p_path) const {
- String f = p_path;
- if (!f.is_absolute_path()) {
- f = get_current_dir().path_join(f);
- }
- f = fix_path(f);
+ String f = fix_path(p_path);
HANDLE h_file = ::CreateFileW((LPCWSTR)(f.utf16().get_data()), 0,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
@@ -397,12 +392,7 @@ bool DirAccessWindows::is_case_sensitive(const String &p_path) const {
}
bool DirAccessWindows::is_link(String p_file) {
- String f = p_file;
-
- if (!f.is_absolute_path()) {
- f = get_current_dir().path_join(f);
- }
- f = fix_path(f);
+ String f = fix_path(p_file);
DWORD attr = GetFileAttributesW((LPCWSTR)(f.utf16().get_data()));
if (attr == INVALID_FILE_ATTRIBUTES) {
@@ -413,12 +403,7 @@ bool DirAccessWindows::is_link(String p_file) {
}
String DirAccessWindows::read_link(String p_file) {
- String f = p_file;
-
- if (!f.is_absolute_path()) {
- f = get_current_dir().path_join(f);
- }
- f = fix_path(f);
+ String f = fix_path(p_file);
HANDLE hfile = CreateFileW((LPCWSTR)(f.utf16().get_data()), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, nullptr);
if (hfile == INVALID_HANDLE_VALUE) {
@@ -434,22 +419,18 @@ String DirAccessWindows::read_link(String p_file) {
GetFinalPathNameByHandleW(hfile, (LPWSTR)cs.ptrw(), ret, VOLUME_NAME_DOS | FILE_NAME_NORMALIZED);
CloseHandle(hfile);
- return String::utf16((const char16_t *)cs.ptr(), ret).trim_prefix(R"(\\?\)");
+ return String::utf16((const char16_t *)cs.ptr(), ret).trim_prefix(R"(\\?\)").replace("\\", "/");
}
Error DirAccessWindows::create_link(String p_source, String p_target) {
- if (p_target.is_relative_path()) {
- p_target = get_current_dir().path_join(p_target);
- }
+ String source = fix_path(p_source);
+ String target = fix_path(p_target);
- p_source = fix_path(p_source);
- p_target = fix_path(p_target);
-
- DWORD file_attr = GetFileAttributesW((LPCWSTR)(p_source.utf16().get_data()));
+ DWORD file_attr = GetFileAttributesW((LPCWSTR)(source.utf16().get_data()));
bool is_dir = (file_attr & FILE_ATTRIBUTE_DIRECTORY);
DWORD flags = ((is_dir) ? SYMBOLIC_LINK_FLAG_DIRECTORY : 0) | SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE;
- if (CreateSymbolicLinkW((LPCWSTR)p_target.utf16().get_data(), (LPCWSTR)p_source.utf16().get_data(), flags) != 0) {
+ if (CreateSymbolicLinkW((LPCWSTR)target.utf16().get_data(), (LPCWSTR)source.utf16().get_data(), flags) != 0) {
return OK;
} else {
return FAILED;
@@ -459,7 +440,12 @@ Error DirAccessWindows::create_link(String p_source, String p_target) {
DirAccessWindows::DirAccessWindows() {
p = memnew(DirAccessWindowsPrivate);
p->h = INVALID_HANDLE_VALUE;
- current_dir = ".";
+
+ Char16String real_current_dir_name;
+ size_t str_len = GetCurrentDirectoryW(0, nullptr);
+ real_current_dir_name.resize(str_len + 1);
+ GetCurrentDirectoryW(real_current_dir_name.size(), (LPWSTR)real_current_dir_name.ptrw());
+ current_dir = String::utf16((const char16_t *)real_current_dir_name.get_data());
DWORD mask = GetLogicalDrives();
diff --git a/drivers/windows/file_access_windows.cpp b/drivers/windows/file_access_windows.cpp
index 9885d9d7ee..0243d863f8 100644
--- a/drivers/windows/file_access_windows.cpp
+++ b/drivers/windows/file_access_windows.cpp
@@ -73,8 +73,18 @@ bool FileAccessWindows::is_path_invalid(const String &p_path) {
String FileAccessWindows::fix_path(const String &p_path) const {
String r_path = FileAccess::fix_path(p_path);
- if (r_path.is_absolute_path() && !r_path.is_network_share_path() && r_path.length() > MAX_PATH) {
- r_path = "\\\\?\\" + r_path.replace("/", "\\");
+
+ if (r_path.is_relative_path()) {
+ Char16String current_dir_name;
+ size_t str_len = GetCurrentDirectoryW(0, nullptr);
+ current_dir_name.resize(str_len + 1);
+ GetCurrentDirectoryW(current_dir_name.size(), (LPWSTR)current_dir_name.ptrw());
+ r_path = String::utf16((const char16_t *)current_dir_name.get_data()).trim_prefix(R"(\\?\)").replace("\\", "/").path_join(r_path);
+ }
+ r_path = r_path.simplify_path();
+ r_path = r_path.replace("/", "\\");
+ if (!r_path.is_network_share_path() && !r_path.begins_with(R"(\\?\)")) {
+ r_path = R"(\\?\)" + r_path;
}
return r_path;
}
@@ -108,9 +118,6 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) {
return ERR_INVALID_PARAMETER;
}
- /* Pretty much every implementation that uses fopen as primary
- backend supports utf8 encoding. */
-
struct _stat st;
if (_wstat((LPCWSTR)(path.utf16().get_data()), &st) == 0) {
if (!S_ISREG(st.st_mode)) {
@@ -125,7 +132,7 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) {
// platforms), we only check for relative paths, or paths in res:// or user://,
// other paths aren't likely to be portable anyway.
if (p_mode_flags == READ && (p_path.is_relative_path() || get_access_type() != ACCESS_FILESYSTEM)) {
- String base_path = path;
+ String base_path = p_path;
String working_path;
String proper_path;
@@ -144,23 +151,17 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) {
}
proper_path = "user://";
}
+ working_path = fix_path(working_path);
WIN32_FIND_DATAW d;
- Vector<String> parts = base_path.split("/");
+ Vector<String> parts = base_path.simplify_path().split("/");
bool mismatch = false;
for (const String &part : parts) {
- working_path = working_path.path_join(part);
-
- // Skip if relative.
- if (part == "." || part == "..") {
- proper_path = proper_path.path_join(part);
- continue;
- }
+ working_path = working_path + "\\" + part;
HANDLE fnd = FindFirstFileW((LPCWSTR)(working_path.utf16().get_data()), &d);
-
if (fnd == INVALID_HANDLE_VALUE) {
mismatch = false;
break;
@@ -186,12 +187,22 @@ Error FileAccessWindows::open_internal(const String &p_path, int p_mode_flags) {
if (is_backup_save_enabled() && p_mode_flags == WRITE) {
save_path = path;
// Create a temporary file in the same directory as the target file.
- WCHAR tmpFileName[MAX_PATH];
- if (GetTempFileNameW((LPCWSTR)(path.get_base_dir().utf16().get_data()), (LPCWSTR)(path.get_file().utf16().get_data()), 0, tmpFileName) == 0) {
- last_error = ERR_FILE_CANT_OPEN;
- return last_error;
+ // Note: do not use GetTempFileNameW, it's not long path aware!
+ String tmpfile;
+ uint64_t id = OS::get_singleton()->get_ticks_usec();
+ while (true) {
+ tmpfile = path + itos(id++) + ".tmp";
+ HANDLE handle = CreateFileW((LPCWSTR)tmpfile.utf16().get_data(), GENERIC_WRITE, 0, NULL, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0);
+ if (handle != INVALID_HANDLE_VALUE) {
+ CloseHandle(handle);
+ break;
+ }
+ if (GetLastError() != ERROR_FILE_EXISTS && GetLastError() != ERROR_SHARING_VIOLATION) {
+ last_error = ERR_FILE_CANT_WRITE;
+ return FAILED;
+ }
}
- path = tmpFileName;
+ path = tmpfile;
}
f = _wfsopen((LPCWSTR)(path.utf16().get_data()), mode_string, is_backup_save_enabled() ? _SH_SECURE : _SH_DENYNO);
@@ -235,7 +246,7 @@ void FileAccessWindows::_close() {
} else {
// Either the target exists and is locked (temporarily, hopefully)
// or it doesn't exist; let's assume the latter before re-trying.
- rename_error = _wrename((LPCWSTR)(path_utf16.get_data()), (LPCWSTR)(save_path_utf16.get_data())) != 0;
+ rename_error = MoveFileW((LPCWSTR)(path_utf16.get_data()), (LPCWSTR)(save_path_utf16.get_data())) == 0;
}
if (!rename_error) {
@@ -262,7 +273,7 @@ String FileAccessWindows::get_path() const {
}
String FileAccessWindows::get_path_absolute() const {
- return path;
+ return path.trim_prefix(R"(\\?\)").replace("\\", "/");
}
bool FileAccessWindows::is_open() const {
@@ -312,93 +323,9 @@ bool FileAccessWindows::eof_reached() const {
return last_error == ERR_FILE_EOF;
}
-uint8_t FileAccessWindows::get_8() const {
- ERR_FAIL_NULL_V(f, 0);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == WRITE) {
- fflush(f);
- }
- prev_op = READ;
- }
- uint8_t b;
- if (fread(&b, 1, 1, f) == 0) {
- check_errors();
- b = '\0';
- }
-
- return b;
-}
-
-uint16_t FileAccessWindows::get_16() const {
- ERR_FAIL_NULL_V(f, 0);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == WRITE) {
- fflush(f);
- }
- prev_op = READ;
- }
-
- uint16_t b = 0;
- if (fread(&b, 1, 2, f) != 2) {
- check_errors();
- }
-
- if (big_endian) {
- b = BSWAP16(b);
- }
-
- return b;
-}
-
-uint32_t FileAccessWindows::get_32() const {
- ERR_FAIL_NULL_V(f, 0);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == WRITE) {
- fflush(f);
- }
- prev_op = READ;
- }
-
- uint32_t b = 0;
- if (fread(&b, 1, 4, f) != 4) {
- check_errors();
- }
-
- if (big_endian) {
- b = BSWAP32(b);
- }
-
- return b;
-}
-
-uint64_t FileAccessWindows::get_64() const {
- ERR_FAIL_NULL_V(f, 0);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == WRITE) {
- fflush(f);
- }
- prev_op = READ;
- }
-
- uint64_t b = 0;
- if (fread(&b, 1, 8, f) != 8) {
- check_errors();
- }
-
- if (big_endian) {
- b = BSWAP64(b);
- }
-
- return b;
-}
-
uint64_t FileAccessWindows::get_buffer(uint8_t *p_dst, uint64_t p_length) const {
- ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
ERR_FAIL_NULL_V(f, -1);
+ ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
if (flags == READ_WRITE || flags == WRITE_READ) {
if (prev_op == WRITE) {
@@ -406,8 +333,10 @@ uint64_t FileAccessWindows::get_buffer(uint8_t *p_dst, uint64_t p_length) const
}
prev_op = READ;
}
+
uint64_t read = fread(p_dst, 1, p_length, f);
check_errors();
+
return read;
}
@@ -442,77 +371,6 @@ void FileAccessWindows::flush() {
}
}
-void FileAccessWindows::store_8(uint8_t p_dest) {
- ERR_FAIL_NULL(f);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == READ) {
- if (last_error != ERR_FILE_EOF) {
- fseek(f, 0, SEEK_CUR);
- }
- }
- prev_op = WRITE;
- }
- fwrite(&p_dest, 1, 1, f);
-}
-
-void FileAccessWindows::store_16(uint16_t p_dest) {
- ERR_FAIL_NULL(f);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == READ) {
- if (last_error != ERR_FILE_EOF) {
- fseek(f, 0, SEEK_CUR);
- }
- }
- prev_op = WRITE;
- }
-
- if (big_endian) {
- p_dest = BSWAP16(p_dest);
- }
-
- fwrite(&p_dest, 1, 2, f);
-}
-
-void FileAccessWindows::store_32(uint32_t p_dest) {
- ERR_FAIL_NULL(f);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == READ) {
- if (last_error != ERR_FILE_EOF) {
- fseek(f, 0, SEEK_CUR);
- }
- }
- prev_op = WRITE;
- }
-
- if (big_endian) {
- p_dest = BSWAP32(p_dest);
- }
-
- fwrite(&p_dest, 1, 4, f);
-}
-
-void FileAccessWindows::store_64(uint64_t p_dest) {
- ERR_FAIL_NULL(f);
-
- if (flags == READ_WRITE || flags == WRITE_READ) {
- if (prev_op == READ) {
- if (last_error != ERR_FILE_EOF) {
- fseek(f, 0, SEEK_CUR);
- }
- }
- prev_op = WRITE;
- }
-
- if (big_endian) {
- p_dest = BSWAP64(p_dest);
- }
-
- fwrite(&p_dest, 1, 8, f);
-}
-
void FileAccessWindows::store_buffer(const uint8_t *p_src, uint64_t p_length) {
ERR_FAIL_NULL(f);
ERR_FAIL_COND(!p_src && p_length > 0);
@@ -525,6 +383,7 @@ void FileAccessWindows::store_buffer(const uint8_t *p_src, uint64_t p_length) {
}
prev_op = WRITE;
}
+
ERR_FAIL_COND(fwrite(p_src, 1, p_length, f) != (size_t)p_length);
}
@@ -549,7 +408,7 @@ uint64_t FileAccessWindows::_get_modified_time(const String &p_file) {
}
String file = fix_path(p_file);
- if (file.ends_with("/") && file != "/") {
+ if (file.ends_with("\\") && file != "\\") {
file = file.substr(0, file.length() - 1);
}
@@ -582,14 +441,15 @@ bool FileAccessWindows::_get_hidden_attribute(const String &p_file) {
Error FileAccessWindows::_set_hidden_attribute(const String &p_file, bool p_hidden) {
String file = fix_path(p_file);
+ const Char16String &file_utf16 = file.utf16();
- DWORD attrib = GetFileAttributesW((LPCWSTR)file.utf16().get_data());
+ DWORD attrib = GetFileAttributesW((LPCWSTR)file_utf16.get_data());
ERR_FAIL_COND_V_MSG(attrib == INVALID_FILE_ATTRIBUTES, FAILED, "Failed to get attributes for: " + p_file);
BOOL ok;
if (p_hidden) {
- ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib | FILE_ATTRIBUTE_HIDDEN);
+ ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib | FILE_ATTRIBUTE_HIDDEN);
} else {
- ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib & ~FILE_ATTRIBUTE_HIDDEN);
+ ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib & ~FILE_ATTRIBUTE_HIDDEN);
}
ERR_FAIL_COND_V_MSG(!ok, FAILED, "Failed to set attributes for: " + p_file);
@@ -606,14 +466,15 @@ bool FileAccessWindows::_get_read_only_attribute(const String &p_file) {
Error FileAccessWindows::_set_read_only_attribute(const String &p_file, bool p_ro) {
String file = fix_path(p_file);
+ const Char16String &file_utf16 = file.utf16();
- DWORD attrib = GetFileAttributesW((LPCWSTR)file.utf16().get_data());
+ DWORD attrib = GetFileAttributesW((LPCWSTR)file_utf16.get_data());
ERR_FAIL_COND_V_MSG(attrib == INVALID_FILE_ATTRIBUTES, FAILED, "Failed to get attributes for: " + p_file);
BOOL ok;
if (p_ro) {
- ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib | FILE_ATTRIBUTE_READONLY);
+ ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib | FILE_ATTRIBUTE_READONLY);
} else {
- ok = SetFileAttributesW((LPCWSTR)file.utf16().get_data(), attrib & ~FILE_ATTRIBUTE_READONLY);
+ ok = SetFileAttributesW((LPCWSTR)file_utf16.get_data(), attrib & ~FILE_ATTRIBUTE_READONLY);
}
ERR_FAIL_COND_V_MSG(!ok, FAILED, "Failed to set attributes for: " + p_file);
diff --git a/drivers/windows/file_access_windows.h b/drivers/windows/file_access_windows.h
index a25bbcfb3a..f458ff9c6c 100644
--- a/drivers/windows/file_access_windows.h
+++ b/drivers/windows/file_access_windows.h
@@ -69,20 +69,12 @@ public:
virtual bool eof_reached() const override; ///< reading passed EOF
- virtual uint8_t get_8() const override; ///< get a byte
- virtual uint16_t get_16() const override;
- virtual uint32_t get_32() const override;
- virtual uint64_t get_64() const override;
virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override;
virtual Error get_error() const override; ///< get last error
virtual Error resize(int64_t p_length) override;
virtual void flush() override;
- virtual void store_8(uint8_t p_dest) override; ///< store a byte
- virtual void store_16(uint16_t p_dest) override;
- virtual void store_32(uint32_t p_dest) override;
- virtual void store_64(uint64_t p_dest) override;
virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes
virtual bool file_exists(const String &p_name) override; ///< return true if a file exists
diff --git a/drivers/windows/file_access_windows_pipe.cpp b/drivers/windows/file_access_windows_pipe.cpp
index 7902c8e1d8..0c953b14aa 100644
--- a/drivers/windows/file_access_windows_pipe.cpp
+++ b/drivers/windows/file_access_windows_pipe.cpp
@@ -96,22 +96,9 @@ String FileAccessWindowsPipe::get_path_absolute() const {
return path_src;
}
-uint8_t FileAccessWindowsPipe::get_8() const {
- ERR_FAIL_COND_V_MSG(fd[0] == 0, 0, "Pipe must be opened before use.");
-
- uint8_t b;
- if (!ReadFile(fd[0], &b, 1, nullptr, nullptr)) {
- last_error = ERR_FILE_CANT_READ;
- b = '\0';
- } else {
- last_error = OK;
- }
- return b;
-}
-
uint64_t FileAccessWindowsPipe::get_buffer(uint8_t *p_dst, uint64_t p_length) const {
- ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
ERR_FAIL_COND_V_MSG(fd[0] == 0, -1, "Pipe must be opened before use.");
+ ERR_FAIL_COND_V(!p_dst && p_length > 0, -1);
DWORD read = -1;
if (!ReadFile(fd[0], p_dst, p_length, &read, nullptr) || read != p_length) {
@@ -126,15 +113,6 @@ Error FileAccessWindowsPipe::get_error() const {
return last_error;
}
-void FileAccessWindowsPipe::store_8(uint8_t p_src) {
- ERR_FAIL_COND_MSG(fd[1] == 0, "Pipe must be opened before use.");
- if (!WriteFile(fd[1], &p_src, 1, nullptr, nullptr)) {
- last_error = ERR_FILE_CANT_WRITE;
- } else {
- last_error = OK;
- }
-}
-
void FileAccessWindowsPipe::store_buffer(const uint8_t *p_src, uint64_t p_length) {
ERR_FAIL_COND_MSG(fd[1] == 0, "Pipe must be opened before use.");
ERR_FAIL_COND(!p_src && p_length > 0);
diff --git a/drivers/windows/file_access_windows_pipe.h b/drivers/windows/file_access_windows_pipe.h
index b885ef78e6..4e9bd036ae 100644
--- a/drivers/windows/file_access_windows_pipe.h
+++ b/drivers/windows/file_access_windows_pipe.h
@@ -64,14 +64,12 @@ public:
virtual bool eof_reached() const override { return false; }
- virtual uint8_t get_8() const override; ///< get a byte
virtual uint64_t get_buffer(uint8_t *p_dst, uint64_t p_length) const override;
virtual Error get_error() const override; ///< get last error
virtual Error resize(int64_t p_length) override { return ERR_UNAVAILABLE; }
virtual void flush() override {}
- virtual void store_8(uint8_t p_src) override; ///< store a byte
virtual void store_buffer(const uint8_t *p_src, uint64_t p_length) override; ///< store an array of bytes
virtual bool file_exists(const String &p_name) override { return false; }
diff --git a/drivers/winmidi/midi_driver_winmidi.cpp b/drivers/winmidi/midi_driver_winmidi.cpp
index 07f0226c5d..0f37f63ccd 100644
--- a/drivers/winmidi/midi_driver_winmidi.cpp
+++ b/drivers/winmidi/midi_driver_winmidi.cpp
@@ -36,26 +36,42 @@
void MIDIDriverWinMidi::read(HMIDIIN hMidiIn, UINT wMsg, DWORD_PTR dwInstance, DWORD_PTR dwParam1, DWORD_PTR dwParam2) {
if (wMsg == MIM_DATA) {
- receive_input_packet((int)dwInstance, (uint64_t)dwParam2, (uint8_t *)&dwParam1, 3);
+ // For MIM_DATA: dwParam1 = wMidiMessage, dwParam2 = dwTimestamp.
+ // Windows implementation has already unpacked running status and dropped any SysEx,
+ // so we can just forward straight to the event.
+ const uint8_t *midi_msg = (uint8_t *)&dwParam1;
+ send_event((int)dwInstance, midi_msg[0], &midi_msg[1], 2);
}
}
Error MIDIDriverWinMidi::open() {
+ int device_index = 0;
for (UINT i = 0; i < midiInGetNumDevs(); i++) {
HMIDIIN midi_in;
+ MIDIINCAPS caps;
- MMRESULT res = midiInOpen(&midi_in, i, (DWORD_PTR)read, (DWORD_PTR)i, CALLBACK_FUNCTION);
- if (res == MMSYSERR_NOERROR) {
+ MMRESULT open_res = midiInOpen(&midi_in, i, (DWORD_PTR)read,
+ (DWORD_PTR)device_index, CALLBACK_FUNCTION);
+ MMRESULT caps_res = midiInGetDevCaps(i, &caps, sizeof(MIDIINCAPS));
+
+ if (open_res == MMSYSERR_NOERROR) {
midiInStart(midi_in);
- connected_sources.insert(i, midi_in);
+ connected_sources.push_back(midi_in);
+ if (caps_res == MMSYSERR_NOERROR) {
+ connected_input_names.push_back(caps.szPname);
+ } else {
+ // Should push something even if we don't get a name,
+ // so that the IDs line up correctly on the script side.
+ connected_input_names.push_back("ERROR");
+ }
+ // Only increment device index for successfully connected devices.
+ device_index++;
} else {
char err[256];
- midiInGetErrorText(res, err, 256);
+ midiInGetErrorText(open_res, err, 256);
ERR_PRINT("midiInOpen error: " + String(err));
- MIDIINCAPS caps;
- res = midiInGetDevCaps(i, &caps, sizeof(MIDIINCAPS));
- if (res == MMSYSERR_NOERROR) {
+ if (caps_res == MMSYSERR_NOERROR) {
ERR_PRINT("Can't open MIDI device \"" + String(caps.szPname) + "\", is it being used by another application?");
}
}
@@ -64,25 +80,6 @@ Error MIDIDriverWinMidi::open() {
return OK;
}
-PackedStringArray MIDIDriverWinMidi::get_connected_inputs() {
- PackedStringArray list;
-
- for (int i = 0; i < connected_sources.size(); i++) {
- HMIDIIN midi_in = connected_sources[i];
- UINT id = 0;
- MMRESULT res = midiInGetID(midi_in, &id);
- if (res == MMSYSERR_NOERROR) {
- MIDIINCAPS caps;
- res = midiInGetDevCaps(i, &caps, sizeof(MIDIINCAPS));
- if (res == MMSYSERR_NOERROR) {
- list.push_back(caps.szPname);
- }
- }
- }
-
- return list;
-}
-
void MIDIDriverWinMidi::close() {
for (int i = 0; i < connected_sources.size(); i++) {
HMIDIIN midi_in = connected_sources[i];
@@ -90,9 +87,7 @@ void MIDIDriverWinMidi::close() {
midiInClose(midi_in);
}
connected_sources.clear();
-}
-
-MIDIDriverWinMidi::MIDIDriverWinMidi() {
+ connected_input_names.clear();
}
MIDIDriverWinMidi::~MIDIDriverWinMidi() {
diff --git a/drivers/winmidi/midi_driver_winmidi.h b/drivers/winmidi/midi_driver_winmidi.h
index f3e016f378..7a75252233 100644
--- a/drivers/winmidi/midi_driver_winmidi.h
+++ b/drivers/winmidi/midi_driver_winmidi.h
@@ -48,12 +48,10 @@ class MIDIDriverWinMidi : public MIDIDriver {
static void CALLBACK read(HMIDIIN hMidiIn, UINT wMsg, DWORD_PTR dwInstance, DWORD_PTR dwParam1, DWORD_PTR dwParam2);
public:
- virtual Error open();
- virtual void close();
+ virtual Error open() override;
+ virtual void close() override;
- virtual PackedStringArray get_connected_inputs();
-
- MIDIDriverWinMidi();
+ MIDIDriverWinMidi() = default;
virtual ~MIDIDriverWinMidi();
};