diff options
338 files changed, 22925 insertions, 4085 deletions
diff --git a/SConstruct b/SConstruct index 63cff4fe16..785cc0b1a3 100644 --- a/SConstruct +++ b/SConstruct @@ -799,8 +799,8 @@ if env["lto"] != "none": # This needs to come after `configure`, otherwise we don't have env.msvc. if not env.msvc: # Specifying GNU extensions support explicitly, which are supported by - # both GCC and Clang. Both currently default to gnu11 and gnu++17. - env.Prepend(CFLAGS=["-std=gnu11"]) + # both GCC and Clang. Both currently default to gnu17 and gnu++17. + env.Prepend(CFLAGS=["-std=gnu17"]) env.Prepend(CXXFLAGS=["-std=gnu++17"]) else: # MSVC started offering C standard support with Visual Studio 2019 16.8, which covers all @@ -809,7 +809,7 @@ else: if cc_version_major < 16: print_warning("Visual Studio 2017 cannot specify a C-Standard.") else: - env.Prepend(CFLAGS=["/std:c11"]) + env.Prepend(CFLAGS=["/std:c17"]) # MSVC is non-conforming with the C++ standard by default, so we enable more conformance. # Note that this is still not complete conformance, as certain Windows-related headers # don't compile under complete conformance. diff --git a/core/config/project_settings.cpp b/core/config/project_settings.cpp index 01f15f9c8e..b389e5a58e 100644 --- a/core/config/project_settings.cpp +++ b/core/config/project_settings.cpp @@ -494,6 +494,7 @@ bool ProjectSettings::_load_resource_pack(const String &p_pack, bool p_replace_f } void ProjectSettings::_convert_to_last_version(int p_from_version) { +#ifndef DISABLE_DEPRECATED if (p_from_version <= 3) { // Converts the actions from array to dictionary (array of events to dictionary with deadzone + events) for (KeyValue<StringName, ProjectSettings::VariantContainer> &E : props) { @@ -507,6 +508,22 @@ void ProjectSettings::_convert_to_last_version(int p_from_version) { } } } + if (p_from_version <= 5) { + // Converts the device in events from -1 (emulated events) to -3 (all events). + for (KeyValue<StringName, ProjectSettings::VariantContainer> &E : props) { + if (String(E.key).begins_with("input/")) { + Dictionary action = E.value.variant; + Array events = action["events"]; + for (int i = 0; i < events.size(); i++) { + Ref<InputEvent> ev = events[i]; + if (ev.is_valid() && ev->get_device() == -1) { // -1 was the previous value (GH-97707). + ev->set_device(InputEvent::DEVICE_ID_ALL_DEVICES); + } + } + } + } + } +#endif // DISABLE_DEPRECATED } /* @@ -1460,6 +1477,7 @@ ProjectSettings::ProjectSettings() { GLOBAL_DEF("display/window/size/transparent", false); GLOBAL_DEF("display/window/size/extend_to_title", false); GLOBAL_DEF("display/window/size/no_focus", false); + GLOBAL_DEF("display/window/size/sharp_corners", false); GLOBAL_DEF(PropertyInfo(Variant::INT, "display/window/size/window_width_override", PROPERTY_HINT_RANGE, "0,7680,1,or_greater"), 0); // 8K resolution GLOBAL_DEF(PropertyInfo(Variant::INT, "display/window/size/window_height_override", PROPERTY_HINT_RANGE, "0,4320,1,or_greater"), 0); // 8K resolution diff --git a/core/core_bind.h b/core/core_bind.h index ce0bde3c05..d59a2c55f1 100644 --- a/core/core_bind.h +++ b/core/core_bind.h @@ -32,7 +32,6 @@ #define CORE_BIND_H #include "core/debugger/engine_profiler.h" -#include "core/io/image.h" #include "core/io/resource_loader.h" #include "core/io/resource_saver.h" #include "core/object/script_language.h" diff --git a/core/extension/gdextension_interface.cpp b/core/extension/gdextension_interface.cpp index 66b0161160..91f0b4a2de 100644 --- a/core/extension/gdextension_interface.cpp +++ b/core/extension/gdextension_interface.cpp @@ -34,6 +34,7 @@ #include "core/extension/gdextension.h" #include "core/extension/gdextension_compat_hashes.h" #include "core/io/file_access.h" +#include "core/io/image.h" #include "core/io/xml_parser.h" #include "core/object/class_db.h" #include "core/object/script_language_extension.h" diff --git a/core/input/input.cpp b/core/input/input.cpp index eba7ded267..6261a435fa 100644 --- a/core/input/input.cpp +++ b/core/input/input.cpp @@ -690,6 +690,7 @@ void Input::_parse_input_event_impl(const Ref<InputEvent> &p_event, bool p_is_em button_event->set_canceled(st->is_canceled()); button_event->set_button_index(MouseButton::LEFT); button_event->set_double_click(st->is_double_tap()); + button_event->set_window_id(st->get_window_id()); BitField<MouseButtonMask> ev_bm = mouse_button_mask; if (st->is_pressed()) { @@ -727,6 +728,7 @@ void Input::_parse_input_event_impl(const Ref<InputEvent> &p_event, bool p_is_em motion_event->set_velocity(sd->get_velocity()); motion_event->set_screen_velocity(sd->get_screen_velocity()); motion_event->set_button_mask(mouse_button_mask); + motion_event->set_window_id(sd->get_window_id()); _parse_input_event_impl(motion_event, true); } diff --git a/core/input/input_event.cpp b/core/input/input_event.cpp index 905526bbbd..d125bad252 100644 --- a/core/input/input_event.cpp +++ b/core/input/input_event.cpp @@ -35,9 +35,6 @@ #include "core/os/keyboard.h" #include "core/os/os.h" -const int InputEvent::DEVICE_ID_EMULATION = -1; -const int InputEvent::DEVICE_ID_INTERNAL = -2; - void InputEvent::set_device(int p_device) { device = p_device; emit_changed(); diff --git a/core/input/input_event.h b/core/input/input_event.h index 19176f748e..80bca28fbf 100644 --- a/core/input/input_event.h +++ b/core/input/input_event.h @@ -62,8 +62,9 @@ protected: static void _bind_methods(); public: - static const int DEVICE_ID_EMULATION; - static const int DEVICE_ID_INTERNAL; + inline static constexpr int DEVICE_ID_EMULATION = -1; + inline static constexpr int DEVICE_ID_INTERNAL = -2; + inline static constexpr int DEVICE_ID_ALL_DEVICES = -3; // Signify that a given Action can be triggered by any device. void set_device(int p_device); int get_device() const; diff --git a/core/input/input_map.cpp b/core/input/input_map.cpp index 27a50c79f6..5b9377fe59 100644 --- a/core/input/input_map.cpp +++ b/core/input/input_map.cpp @@ -39,8 +39,6 @@ InputMap *InputMap::singleton = nullptr; -int InputMap::ALL_DEVICES = -1; - void InputMap::_bind_methods() { ClassDB::bind_method(D_METHOD("has_action", "action"), &InputMap::has_action); ClassDB::bind_method(D_METHOD("get_actions"), &InputMap::_get_actions); @@ -163,7 +161,7 @@ List<Ref<InputEvent>>::Element *InputMap::_find_event(Action &p_action, const Re int i = 0; for (List<Ref<InputEvent>>::Element *E = p_action.inputs.front(); E; E = E->next()) { int device = E->get()->get_device(); - if (device == ALL_DEVICES || device == p_event->get_device()) { + if (device == InputEvent::DEVICE_ID_ALL_DEVICES || device == p_event->get_device()) { if (E->get()->action_match(p_event, p_exact_match, p_action.deadzone, r_pressed, r_strength, r_raw_strength)) { if (r_event_index) { *r_event_index = i; diff --git a/core/input/input_map.h b/core/input/input_map.h index b29687d144..45798490f7 100644 --- a/core/input/input_map.h +++ b/core/input/input_map.h @@ -43,11 +43,6 @@ class InputMap : public Object { GDCLASS(InputMap, Object); public: - /** - * A special value used to signify that a given Action can be triggered by any device - */ - static int ALL_DEVICES; - struct Action { int id; float deadzone; diff --git a/core/io/image.cpp b/core/io/image.cpp index aa391b77dd..d782af931f 100644 --- a/core/io/image.cpp +++ b/core/io/image.cpp @@ -2751,7 +2751,7 @@ Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels case COMPRESS_S3TC: { // BC3 is unsupported currently. - if ((p_channels == USED_CHANNELS_RGB || p_channels == USED_CHANNELS_L) && _image_compress_bc_rd_func) { + if ((p_channels == USED_CHANNELS_R || p_channels == USED_CHANNELS_RGB || p_channels == USED_CHANNELS_L) && _image_compress_bc_rd_func) { Error result = _image_compress_bc_rd_func(this, p_channels); // If the image was compressed successfully, we return here. If not, we fall back to the default compression scheme. diff --git a/core/io/json.cpp b/core/io/json.cpp index 664ff7857b..22219fca29 100644 --- a/core/io/json.cpp +++ b/core/io/json.cpp @@ -121,7 +121,7 @@ String JSON::_stringify(const Variant &p_var, const String &p_indent, int p_cur_ d.get_key_list(&keys); if (p_sort_keys) { - keys.sort(); + keys.sort_custom<StringLikeVariantOrder>(); } bool first_key = true; diff --git a/core/io/resource_format_binary.cpp b/core/io/resource_format_binary.cpp index 109999d612..ecbb9c0104 100644 --- a/core/io/resource_format_binary.cpp +++ b/core/io/resource_format_binary.cpp @@ -1268,6 +1268,11 @@ void ResourceFormatLoaderBinary::get_recognized_extensions_for_type(const String return; } + // res files not supported for GDExtension. + if (p_type == "GDExtension") { + return; + } + List<String> extensions; ClassDB::get_extensions_for_type(p_type, &extensions); diff --git a/core/io/translation_loader_po.cpp b/core/io/translation_loader_po.cpp index 578cd91c52..812fbc774e 100644 --- a/core/io/translation_loader_po.cpp +++ b/core/io/translation_loader_po.cpp @@ -31,7 +31,6 @@ #include "translation_loader_po.h" #include "core/io/file_access.h" -#include "core/string/translation.h" #include "core/string/translation_po.h" Ref<Resource> TranslationLoaderPO::load_translation(Ref<FileAccess> f, Error *r_error) { @@ -361,7 +360,7 @@ void TranslationLoaderPO::get_recognized_extensions(List<String> *p_extensions) } bool TranslationLoaderPO::handles_type(const String &p_type) const { - return (p_type == "Translation"); + return (p_type == "Translation") || (p_type == "TranslationPO"); } String TranslationLoaderPO::get_resource_type(const String &p_path) const { diff --git a/core/os/os.h b/core/os/os.h index 30d2a4266f..c42a39e0a4 100644 --- a/core/os/os.h +++ b/core/os/os.h @@ -32,7 +32,6 @@ #define OS_H #include "core/config/engine.h" -#include "core/io/image.h" #include "core/io/logger.h" #include "core/io/remote_filesystem_client.h" #include "core/os/time_enums.h" diff --git a/core/string/translation_domain.cpp b/core/string/translation_domain.cpp index 6a5e1b2af8..53b9ce8379 100644 --- a/core/string/translation_domain.cpp +++ b/core/string/translation_domain.cpp @@ -389,6 +389,10 @@ void TranslationDomain::set_pseudolocalization_suffix(const String &p_suffix) { } StringName TranslationDomain::pseudolocalize(const StringName &p_message) const { + if (p_message.is_empty()) { + return p_message; + } + String message = p_message; int length = message.length(); if (pseudolocalization.override_enabled) { diff --git a/core/string/translation_server.cpp b/core/string/translation_server.cpp index 89b37d0b8a..92b473b61f 100644 --- a/core/string/translation_server.cpp +++ b/core/string/translation_server.cpp @@ -228,32 +228,41 @@ int TranslationServer::compare_locales(const String &p_locale_a, const String &p return 10; } + const String cache_key = p_locale_a + "|" + p_locale_b; + const int *cached_result = locale_compare_cache.getptr(cache_key); + if (cached_result) { + return *cached_result; + } + String locale_a = _standardize_locale(p_locale_a, true); String locale_b = _standardize_locale(p_locale_b, true); if (locale_a == locale_b) { // Exact match. + locale_compare_cache.insert(cache_key, 10); return 10; } Vector<String> locale_a_elements = locale_a.split("_"); Vector<String> locale_b_elements = locale_b.split("_"); - if (locale_a_elements[0] == locale_b_elements[0]) { - // Matching language, both locales have extra parts. - // Return number of matching elements. - int matching_elements = 1; - for (int i = 1; i < locale_a_elements.size(); i++) { - for (int j = 1; j < locale_b_elements.size(); j++) { - if (locale_a_elements[i] == locale_b_elements[j]) { - matching_elements++; - } - } - } - return matching_elements; - } else { + if (locale_a_elements[0] != locale_b_elements[0]) { // No match. + locale_compare_cache.insert(cache_key, 0); return 0; } + + // Matching language, both locales have extra parts. + // Return number of matching elements. + int matching_elements = 1; + for (int i = 1; i < locale_a_elements.size(); i++) { + for (int j = 1; j < locale_b_elements.size(); j++) { + if (locale_a_elements[i] == locale_b_elements[j]) { + matching_elements++; + } + } + } + locale_compare_cache.insert(cache_key, matching_elements); + return matching_elements; } String TranslationServer::get_locale_name(const String &p_locale) const { diff --git a/core/string/translation_server.h b/core/string/translation_server.h index a09230c019..2438349a69 100644 --- a/core/string/translation_server.h +++ b/core/string/translation_server.h @@ -46,6 +46,8 @@ class TranslationServer : public Object { Ref<TranslationDomain> doc_domain; HashMap<StringName, Ref<TranslationDomain>> custom_domains; + mutable HashMap<String, int> locale_compare_cache; + bool enabled = true; static TranslationServer *singleton; diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index e6f7492a18..4e9eb922f6 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -1850,6 +1850,8 @@ String String::num(double p_num, int p_decimals) { } String String::num_int64(int64_t p_num, int base, bool capitalize_hex) { + ERR_FAIL_COND_V_MSG(base < 2 || base > 36, "", "Cannot convert to base " + itos(base) + ", since the value is " + (base < 2 ? "less than 2." : "greater than 36.")); + bool sign = p_num < 0; int64_t n = p_num; @@ -1888,6 +1890,8 @@ String String::num_int64(int64_t p_num, int base, bool capitalize_hex) { } String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) { + ERR_FAIL_COND_V_MSG(base < 2 || base > 36, "", "Cannot convert to base " + itos(base) + ", since the value is " + (base < 2 ? "less than 2." : "greater than 36.")); + uint64_t n = p_num; int chars = 0; diff --git a/editor/plugins/cpu_particles_3d_editor_plugin.h b/core/templates/a_hash_map.cpp index 2a1ea93ac8..04a14c261a 100644 --- a/editor/plugins/cpu_particles_3d_editor_plugin.h +++ b/core/templates/a_hash_map.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* cpu_particles_3d_editor_plugin.h */ +/* a_hash_map.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -28,58 +28,12 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /**************************************************************************/ -#ifndef CPU_PARTICLES_3D_EDITOR_PLUGIN_H -#define CPU_PARTICLES_3D_EDITOR_PLUGIN_H +#include "a_hash_map.h" +#include "core/variant/variant.h" -#include "editor/plugins/gpu_particles_3d_editor_plugin.h" -#include "scene/3d/cpu_particles_3d.h" - -class CPUParticles3DEditor : public GPUParticles3DEditorBase { - GDCLASS(CPUParticles3DEditor, GPUParticles3DEditorBase); - - enum Menu { - MENU_OPTION_GENERATE_AABB, - MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE, - MENU_OPTION_CLEAR_EMISSION_VOLUME, - MENU_OPTION_RESTART, - MENU_OPTION_CONVERT_TO_GPU_PARTICLES, - }; - - ConfirmationDialog *generate_aabb = nullptr; - SpinBox *generate_seconds = nullptr; - CPUParticles3D *node = nullptr; - - void _generate_aabb(); - - void _menu_option(int); - - friend class CPUParticles3DEditorPlugin; - - virtual void _generate_emission_points() override; - -protected: - void _notification(int p_notification); - void _node_removed(Node *p_node); - -public: - void edit(CPUParticles3D *p_particles); - CPUParticles3DEditor(); -}; - -class CPUParticles3DEditorPlugin : public EditorPlugin { - GDCLASS(CPUParticles3DEditorPlugin, EditorPlugin); - - CPUParticles3DEditor *particles_editor = nullptr; - -public: - virtual String get_name() const override { return "CPUParticles3D"; } - bool has_main_screen() const override { return false; } - virtual void edit(Object *p_object) override; - virtual bool handles(Object *p_object) const override; - virtual void make_visible(bool p_visible) override; - - CPUParticles3DEditorPlugin(); - ~CPUParticles3DEditorPlugin(); -}; - -#endif // CPU_PARTICLES_3D_EDITOR_PLUGIN_H +// Explicit instantiation. +template class AHashMap<int, int>; +template class AHashMap<String, int>; +template class AHashMap<StringName, StringName>; +template class AHashMap<StringName, Variant>; +template class AHashMap<StringName, int>; diff --git a/core/templates/a_hash_map.h b/core/templates/a_hash_map.h new file mode 100644 index 0000000000..29983ea268 --- /dev/null +++ b/core/templates/a_hash_map.h @@ -0,0 +1,732 @@ +/**************************************************************************/ +/* a_hash_map.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef A_HASH_MAP_H +#define A_HASH_MAP_H + +#include "core/templates/hash_map.h" + +struct HashMapData { + union { + struct + { + uint32_t hash; + uint32_t hash_to_key; + }; + uint64_t data; + }; +}; + +static_assert(sizeof(HashMapData) == 8); + +/** + * An array-based implementation of a hash map. It is very efficient in terms of performance and + * memory usage. Works like a dynamic array, adding elements to the end of the array, and + * allows you to access array elements by their index by using `get_by_index` method. + * Example: + * ``` + * AHashMap<int, Object *> map; + * + * int get_object_id_by_number(int p_number) { + * int id = map.get_index(p_number); + * return id; + * } + * + * Object *get_object_by_id(int p_id) { + * map.get_by_index(p_id).value; + * } + * ``` + * Still, don`t erase the elements because ID can break. + * + * When an element erase, its place is taken by the element from the end. + * + * <------------- + * | | + * 6 8 X 9 32 -1 5 -10 7 X X X + * 6 8 7 9 32 -1 5 -10 X X X X + * + * + * Use RBMap if you need to iterate over sorted elements. + * + * Use HashMap if: + * - You need to keep an iterator or const pointer to Key and you intend to add/remove elements in the meantime. + * - You need to preserve the insertion order when using erase. + * + * It is recommended to use `HashMap` if `KeyValue` size is very large. + */ +template <typename TKey, typename TValue, + typename Hasher = HashMapHasherDefault, + typename Comparator = HashMapComparatorDefault<TKey>> +class AHashMap { +public: + // Must be a power of two. + static constexpr uint32_t INITIAL_CAPACITY = 16; + static constexpr uint32_t EMPTY_HASH = 0; + static_assert(EMPTY_HASH == 0, "EMPTY_HASH must always be 0 for the memcpy() optimization."); + +private: + typedef KeyValue<TKey, TValue> MapKeyValue; + MapKeyValue *elements = nullptr; + HashMapData *map_data = nullptr; + + // Due to optimization, this is `capacity - 1`. Use + 1 to get normal capacity. + uint32_t capacity = 0; + uint32_t num_elements = 0; + + uint32_t _hash(const TKey &p_key) const { + uint32_t hash = Hasher::hash(p_key); + + if (unlikely(hash == EMPTY_HASH)) { + hash = EMPTY_HASH + 1; + } + + return hash; + } + + static _FORCE_INLINE_ uint32_t _get_resize_count(uint32_t p_capacity) { + return p_capacity ^ (p_capacity + 1) >> 2; // = get_capacity() * 0.75 - 1; Works only if p_capacity = 2^n - 1. + } + + static _FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_local_capacity) { + const uint32_t original_pos = p_hash & p_local_capacity; + return (p_pos - original_pos + p_local_capacity + 1) & p_local_capacity; + } + + bool _lookup_pos(const TKey &p_key, uint32_t &r_pos, uint32_t &r_hash_pos) const { + if (unlikely(elements == nullptr)) { + return false; // Failed lookups, no elements. + } + return _lookup_pos_with_hash(p_key, r_pos, r_hash_pos, _hash(p_key)); + } + + bool _lookup_pos_with_hash(const TKey &p_key, uint32_t &r_pos, uint32_t &r_hash_pos, uint32_t p_hash) const { + if (unlikely(elements == nullptr)) { + return false; // Failed lookups, no elements. + } + + uint32_t pos = p_hash & capacity; + HashMapData data = map_data[pos]; + if (data.hash == p_hash && Comparator::compare(elements[data.hash_to_key].key, p_key)) { + r_pos = data.hash_to_key; + r_hash_pos = pos; + return true; + } + + if (data.data == EMPTY_HASH) { + return false; + } + + // A collision occurred. + pos = (pos + 1) & capacity; + uint32_t distance = 1; + while (true) { + data = map_data[pos]; + if (data.hash == p_hash && Comparator::compare(elements[data.hash_to_key].key, p_key)) { + r_pos = data.hash_to_key; + r_hash_pos = pos; + return true; + } + + if (data.data == EMPTY_HASH) { + return false; + } + + if (distance > _get_probe_length(pos, data.hash, capacity)) { + return false; + } + + pos = (pos + 1) & capacity; + distance++; + } + } + + uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) { + uint32_t pos = p_hash & capacity; + + if (map_data[pos].data == EMPTY_HASH) { + uint64_t data = ((uint64_t)p_index << 32) | p_hash; + map_data[pos].data = data; + return pos; + } + + uint32_t distance = 1; + pos = (pos + 1) & capacity; + HashMapData c_data; + c_data.hash = p_hash; + c_data.hash_to_key = p_index; + + while (true) { + if (map_data[pos].data == EMPTY_HASH) { +#ifdef DEV_ENABLED + if (unlikely(distance > 12)) { + WARN_PRINT("Excessive collision count (" + + itos(distance) + "), is the right hash function being used?"); + } +#endif + map_data[pos] = c_data; + return pos; + } + + // Not an empty slot, let's check the probing length of the existing one. + uint32_t existing_probe_len = _get_probe_length(pos, map_data[pos].hash, capacity); + if (existing_probe_len < distance) { + SWAP(c_data, map_data[pos]); + distance = existing_probe_len; + } + + pos = (pos + 1) & capacity; + distance++; + } + } + + void _resize_and_rehash(uint32_t p_new_capacity) { + uint32_t real_old_capacity = capacity + 1; + // Capacity can't be 0 and must be 2^n - 1. + capacity = MAX(4u, p_new_capacity); + uint32_t real_capacity = next_power_of_2(capacity); + capacity = real_capacity - 1; + + HashMapData *old_map_data = map_data; + + map_data = reinterpret_cast<HashMapData *>(Memory::alloc_static(sizeof(HashMapData) * real_capacity)); + elements = reinterpret_cast<MapKeyValue *>(Memory::realloc_static(elements, sizeof(MapKeyValue) * (_get_resize_count(capacity) + 1))); + + memset(map_data, EMPTY_HASH, real_capacity * sizeof(HashMapData)); + + if (num_elements != 0) { + for (uint32_t i = 0; i < real_old_capacity; i++) { + HashMapData data = old_map_data[i]; + if (data.data != EMPTY_HASH) { + _insert_with_hash(data.hash, data.hash_to_key); + } + } + } + + Memory::free_static(old_map_data); + } + + int32_t _insert_element(const TKey &p_key, const TValue &p_value, uint32_t p_hash) { + if (unlikely(elements == nullptr)) { + // Allocate on demand to save memory. + + uint32_t real_capacity = capacity + 1; + map_data = reinterpret_cast<HashMapData *>(Memory::alloc_static(sizeof(HashMapData) * real_capacity)); + elements = reinterpret_cast<MapKeyValue *>(Memory::alloc_static(sizeof(MapKeyValue) * (_get_resize_count(capacity) + 1))); + + memset(map_data, EMPTY_HASH, real_capacity * sizeof(HashMapData)); + } + + if (unlikely(num_elements > _get_resize_count(capacity))) { + _resize_and_rehash(capacity * 2); + } + + memnew_placement(&elements[num_elements], MapKeyValue(p_key, p_value)); + + _insert_with_hash(p_hash, num_elements); + num_elements++; + return num_elements - 1; + } + + void _init_from(const AHashMap &p_other) { + capacity = p_other.capacity; + uint32_t real_capacity = capacity + 1; + num_elements = p_other.num_elements; + + if (p_other.num_elements == 0) { + return; + } + + map_data = reinterpret_cast<HashMapData *>(Memory::alloc_static(sizeof(HashMapData) * real_capacity)); + elements = reinterpret_cast<MapKeyValue *>(Memory::alloc_static(sizeof(MapKeyValue) * (_get_resize_count(capacity) + 1))); + + if constexpr (std::is_trivially_copyable_v<TKey> && std::is_trivially_copyable_v<TValue>) { + void *destination = elements; + const void *source = p_other.elements; + memcpy(destination, source, sizeof(MapKeyValue) * num_elements); + } else { + for (uint32_t i = 0; i < num_elements; i++) { + memnew_placement(&elements[i], MapKeyValue(p_other.elements[i])); + } + } + + memcpy(map_data, p_other.map_data, sizeof(HashMapData) * real_capacity); + } + +public: + /* Standard Godot Container API */ + + _FORCE_INLINE_ uint32_t get_capacity() const { return capacity + 1; } + _FORCE_INLINE_ uint32_t size() const { return num_elements; } + + _FORCE_INLINE_ bool is_empty() const { + return num_elements == 0; + } + + void clear() { + if (elements == nullptr || num_elements == 0) { + return; + } + + memset(map_data, EMPTY_HASH, (capacity + 1) * sizeof(HashMapData)); + if constexpr (!(std::is_trivially_destructible_v<TKey> && std::is_trivially_destructible_v<TValue>)) { + for (uint32_t i = 0; i < num_elements; i++) { + elements[i].key.~TKey(); + elements[i].value.~TValue(); + } + } + + num_elements = 0; + } + + TValue &get(const TKey &p_key) { + uint32_t pos = 0; + uint32_t hash_pos = 0; + bool exists = _lookup_pos(p_key, pos, hash_pos); + CRASH_COND_MSG(!exists, "AHashMap key not found."); + return elements[pos].value; + } + + const TValue &get(const TKey &p_key) const { + uint32_t pos = 0; + uint32_t hash_pos = 0; + bool exists = _lookup_pos(p_key, pos, hash_pos); + CRASH_COND_MSG(!exists, "AHashMap key not found."); + return elements[pos].value; + } + + const TValue *getptr(const TKey &p_key) const { + uint32_t pos = 0; + uint32_t hash_pos = 0; + bool exists = _lookup_pos(p_key, pos, hash_pos); + + if (exists) { + return &elements[pos].value; + } + return nullptr; + } + + TValue *getptr(const TKey &p_key) { + uint32_t pos = 0; + uint32_t hash_pos = 0; + bool exists = _lookup_pos(p_key, pos, hash_pos); + + if (exists) { + return &elements[pos].value; + } + return nullptr; + } + + bool has(const TKey &p_key) const { + uint32_t _pos = 0; + uint32_t h_pos = 0; + return _lookup_pos(p_key, _pos, h_pos); + } + + bool erase(const TKey &p_key) { + uint32_t pos = 0; + uint32_t element_pos = 0; + bool exists = _lookup_pos(p_key, element_pos, pos); + + if (!exists) { + return false; + } + + uint32_t next_pos = (pos + 1) & capacity; + while (map_data[next_pos].hash != EMPTY_HASH && _get_probe_length(next_pos, map_data[next_pos].hash, capacity) != 0) { + SWAP(map_data[next_pos], map_data[pos]); + + pos = next_pos; + next_pos = (next_pos + 1) & capacity; + } + + map_data[pos].data = EMPTY_HASH; + elements[element_pos].key.~TKey(); + elements[element_pos].value.~TValue(); + num_elements--; + + if (element_pos < num_elements) { + void *destination = &elements[element_pos]; + const void *source = &elements[num_elements]; + memcpy(destination, source, sizeof(MapKeyValue)); + uint32_t h_pos = 0; + _lookup_pos(elements[num_elements].key, pos, h_pos); + map_data[h_pos].hash_to_key = element_pos; + } + + return true; + } + + // Replace the key of an entry in-place, without invalidating iterators or changing the entries position during iteration. + // p_old_key must exist in the map and p_new_key must not, unless it is equal to p_old_key. + bool replace_key(const TKey &p_old_key, const TKey &p_new_key) { + if (p_old_key == p_new_key) { + return true; + } + uint32_t pos = 0; + uint32_t element_pos = 0; + ERR_FAIL_COND_V(_lookup_pos(p_new_key, element_pos, pos), false); + ERR_FAIL_COND_V(!_lookup_pos(p_old_key, element_pos, pos), false); + MapKeyValue &element = elements[element_pos]; + const_cast<TKey &>(element.key) = p_new_key; + + uint32_t next_pos = (pos + 1) & capacity; + while (map_data[next_pos].hash != EMPTY_HASH && _get_probe_length(next_pos, map_data[next_pos].hash, capacity) != 0) { + SWAP(map_data[next_pos], map_data[pos]); + + pos = next_pos; + next_pos = (next_pos + 1) & capacity; + } + + map_data[pos].data = EMPTY_HASH; + + uint32_t hash = _hash(p_new_key); + _insert_with_hash(hash, element_pos); + + return true; + } + + // Reserves space for a number of elements, useful to avoid many resizes and rehashes. + // If adding a known (possibly large) number of elements at once, must be larger than old capacity. + void reserve(uint32_t p_new_capacity) { + ERR_FAIL_COND_MSG(p_new_capacity < get_capacity(), "It is impossible to reserve less capacity than is currently available."); + if (elements == nullptr) { + capacity = MAX(4u, p_new_capacity); + capacity = next_power_of_2(capacity) - 1; + return; // Unallocated yet. + } + _resize_and_rehash(p_new_capacity); + } + + /** Iterator API **/ + + struct ConstIterator { + _FORCE_INLINE_ const MapKeyValue &operator*() const { + return *pair; + } + _FORCE_INLINE_ const MapKeyValue *operator->() const { + return pair; + } + _FORCE_INLINE_ ConstIterator &operator++() { + pair++; + return *this; + } + + _FORCE_INLINE_ ConstIterator &operator--() { + pair--; + if (pair < begin) { + pair = end; + } + return *this; + } + + _FORCE_INLINE_ bool operator==(const ConstIterator &b) const { return pair == b.pair; } + _FORCE_INLINE_ bool operator!=(const ConstIterator &b) const { return pair != b.pair; } + + _FORCE_INLINE_ explicit operator bool() const { + return pair != end; + } + + _FORCE_INLINE_ ConstIterator(MapKeyValue *p_key, MapKeyValue *p_begin, MapKeyValue *p_end) { + pair = p_key; + begin = p_begin; + end = p_end; + } + _FORCE_INLINE_ ConstIterator() {} + _FORCE_INLINE_ ConstIterator(const ConstIterator &p_it) { + pair = p_it.pair; + begin = p_it.begin; + end = p_it.end; + } + _FORCE_INLINE_ void operator=(const ConstIterator &p_it) { + pair = p_it.pair; + begin = p_it.begin; + end = p_it.end; + } + + private: + MapKeyValue *pair = nullptr; + MapKeyValue *begin = nullptr; + MapKeyValue *end = nullptr; + }; + + struct Iterator { + _FORCE_INLINE_ MapKeyValue &operator*() const { + return *pair; + } + _FORCE_INLINE_ MapKeyValue *operator->() const { + return pair; + } + _FORCE_INLINE_ Iterator &operator++() { + pair++; + return *this; + } + _FORCE_INLINE_ Iterator &operator--() { + pair--; + if (pair < begin) { + pair = end; + } + return *this; + } + + _FORCE_INLINE_ bool operator==(const Iterator &b) const { return pair == b.pair; } + _FORCE_INLINE_ bool operator!=(const Iterator &b) const { return pair != b.pair; } + + _FORCE_INLINE_ explicit operator bool() const { + return pair != end; + } + + _FORCE_INLINE_ Iterator(MapKeyValue *p_key, MapKeyValue *p_begin, MapKeyValue *p_end) { + pair = p_key; + begin = p_begin; + end = p_end; + } + _FORCE_INLINE_ Iterator() {} + _FORCE_INLINE_ Iterator(const Iterator &p_it) { + pair = p_it.pair; + begin = p_it.begin; + end = p_it.end; + } + _FORCE_INLINE_ void operator=(const Iterator &p_it) { + pair = p_it.pair; + begin = p_it.begin; + end = p_it.end; + } + + operator ConstIterator() const { + return ConstIterator(pair, begin, end); + } + + private: + MapKeyValue *pair = nullptr; + MapKeyValue *begin = nullptr; + MapKeyValue *end = nullptr; + }; + + _FORCE_INLINE_ Iterator begin() { + return Iterator(elements, elements, elements + num_elements); + } + _FORCE_INLINE_ Iterator end() { + return Iterator(elements + num_elements, elements, elements + num_elements); + } + _FORCE_INLINE_ Iterator last() { + if (unlikely(num_elements == 0)) { + return Iterator(nullptr, nullptr, nullptr); + } + return Iterator(elements + num_elements - 1, elements, elements + num_elements); + } + + Iterator find(const TKey &p_key) { + uint32_t pos = 0; + uint32_t h_pos = 0; + bool exists = _lookup_pos(p_key, pos, h_pos); + if (!exists) { + return end(); + } + return Iterator(elements + pos, elements, elements + num_elements); + } + + void remove(const Iterator &p_iter) { + if (p_iter) { + erase(p_iter->key); + } + } + + _FORCE_INLINE_ ConstIterator begin() const { + return ConstIterator(elements, elements, elements + num_elements); + } + _FORCE_INLINE_ ConstIterator end() const { + return ConstIterator(elements + num_elements, elements, elements + num_elements); + } + _FORCE_INLINE_ ConstIterator last() const { + if (unlikely(num_elements == 0)) { + return ConstIterator(nullptr, nullptr, nullptr); + } + return ConstIterator(elements + num_elements - 1, elements, elements + num_elements); + } + + ConstIterator find(const TKey &p_key) const { + uint32_t pos = 0; + uint32_t h_pos = 0; + bool exists = _lookup_pos(p_key, pos, h_pos); + if (!exists) { + return end(); + } + return ConstIterator(elements + pos, elements, elements + num_elements); + } + + /* Indexing */ + + const TValue &operator[](const TKey &p_key) const { + uint32_t pos = 0; + uint32_t h_pos = 0; + bool exists = _lookup_pos(p_key, pos, h_pos); + CRASH_COND(!exists); + return elements[pos].value; + } + + TValue &operator[](const TKey &p_key) { + uint32_t pos = 0; + uint32_t h_pos = 0; + uint32_t hash = _hash(p_key); + bool exists = _lookup_pos_with_hash(p_key, pos, h_pos, hash); + + if (exists) { + return elements[pos].value; + } else { + pos = _insert_element(p_key, TValue(), hash); + return elements[pos].value; + } + } + + /* Insert */ + + Iterator insert(const TKey &p_key, const TValue &p_value) { + uint32_t pos = 0; + uint32_t h_pos = 0; + uint32_t hash = _hash(p_key); + bool exists = _lookup_pos_with_hash(p_key, pos, h_pos, hash); + + if (!exists) { + pos = _insert_element(p_key, p_value, hash); + } else { + elements[pos].value = p_value; + } + return Iterator(elements + pos, elements, elements + num_elements); + } + + // Inserts an element without checking if it already exists. + void insert_new(const TKey &p_key, const TValue &p_value) { + DEV_ASSERT(!has(p_key)); + uint32_t hash = _hash(p_key); + _insert_element(p_key, p_value, hash); + } + + /* Array methods. */ + + // Unsafe. Changing keys and going outside the bounds of an array can lead to undefined behavior. + KeyValue<TKey, TValue> *get_elements_ptr() { + return elements; + } + + // Returns the element index. If not found, returns -1. + int get_index(const TKey &p_key) { + uint32_t pos = 0; + uint32_t h_pos = 0; + bool exists = _lookup_pos(p_key, pos, h_pos); + if (!exists) { + return -1; + } + return pos; + } + + KeyValue<TKey, TValue> &get_by_index(uint32_t p_index) { + CRASH_BAD_UNSIGNED_INDEX(p_index, num_elements); + return elements[p_index]; + } + + bool erase_by_index(uint32_t p_index) { + if (p_index >= size()) { + return false; + } + return erase(elements[p_index].key); + } + + /* Constructors */ + + AHashMap(const AHashMap &p_other) { + _init_from(p_other); + } + + AHashMap(const HashMap<TKey, TValue> &p_other) { + reserve(p_other.size()); + for (const KeyValue<TKey, TValue> &E : p_other) { + uint32_t hash = _hash(E.key); + _insert_element(E.key, E.value, hash); + } + } + + void operator=(const AHashMap &p_other) { + if (this == &p_other) { + return; // Ignore self assignment. + } + + reset(); + + _init_from(p_other); + } + + void operator=(const HashMap<TKey, TValue> &p_other) { + reset(); + if (p_other.size() > get_capacity()) { + reserve(p_other.size()); + } + for (const KeyValue<TKey, TValue> &E : p_other) { + uint32_t hash = _hash(E.key); + _insert_element(E.key, E.value, hash); + } + } + + AHashMap(uint32_t p_initial_capacity) { + // Capacity can't be 0 and must be 2^n - 1. + capacity = MAX(4u, p_initial_capacity); + capacity = next_power_of_2(capacity) - 1; + } + AHashMap() : + capacity(INITIAL_CAPACITY - 1) { + } + + void reset() { + if (elements != nullptr) { + if constexpr (!(std::is_trivially_destructible_v<TKey> && std::is_trivially_destructible_v<TValue>)) { + for (uint32_t i = 0; i < num_elements; i++) { + elements[i].key.~TKey(); + elements[i].value.~TValue(); + } + } + Memory::free_static(elements); + Memory::free_static(map_data); + elements = nullptr; + } + capacity = INITIAL_CAPACITY - 1; + num_elements = 0; + } + + ~AHashMap() { + reset(); + } +}; + +extern template class AHashMap<int, int>; +extern template class AHashMap<String, int>; +extern template class AHashMap<StringName, StringName>; +extern template class AHashMap<StringName, Variant>; +extern template class AHashMap<StringName, int>; + +#endif // A_HASH_MAP_H diff --git a/core/templates/hashfuncs.h b/core/templates/hashfuncs.h index 21eef10297..7818ed0706 100644 --- a/core/templates/hashfuncs.h +++ b/core/templates/hashfuncs.h @@ -393,6 +393,13 @@ struct HashMapHasherDefault { } }; +struct HashHasher { + static _FORCE_INLINE_ uint32_t hash(const int32_t hash) { return hash; } + static _FORCE_INLINE_ uint32_t hash(const uint32_t hash) { return hash; } + static _FORCE_INLINE_ uint64_t hash(const int64_t hash) { return hash; } + static _FORCE_INLINE_ uint64_t hash(const uint64_t hash) { return hash; } +}; + // TODO: Fold this into HashMapHasherDefault once C++20 concepts are allowed template <typename T> struct HashableHasher { diff --git a/core/variant/variant.h b/core/variant/variant.h index c76b849abd..3b1924e8ea 100644 --- a/core/variant/variant.h +++ b/core/variant/variant.h @@ -854,6 +854,19 @@ struct StringLikeVariantComparator { static bool compare(const Variant &p_lhs, const Variant &p_rhs); }; +struct StringLikeVariantOrder { + static _ALWAYS_INLINE_ bool compare(const Variant &p_lhs, const Variant &p_rhs) { + if (p_lhs.is_string() && p_rhs.is_string()) { + return p_lhs.operator String() < p_rhs.operator String(); + } + return p_lhs < p_rhs; + } + + _ALWAYS_INLINE_ bool operator()(const Variant &p_lhs, const Variant &p_rhs) const { + return compare(p_lhs, p_rhs); + } +}; + Variant::ObjData &Variant::_get_obj() { return *reinterpret_cast<ObjData *>(&_data._mem[0]); } diff --git a/core/variant/variant_op.cpp b/core/variant/variant_op.cpp index d2c1cde970..ce27fbdf67 100644 --- a/core/variant/variant_op.cpp +++ b/core/variant/variant_op.cpp @@ -980,6 +980,7 @@ void Variant::_register_variant_operators() { register_op<OperatorEvaluatorInDictionaryHas<Color>>(Variant::OP_IN, Variant::COLOR, Variant::DICTIONARY); register_op<OperatorEvaluatorInDictionaryHas<StringName>>(Variant::OP_IN, Variant::STRING_NAME, Variant::DICTIONARY); register_op<OperatorEvaluatorInDictionaryHas<NodePath>>(Variant::OP_IN, Variant::NODE_PATH, Variant::DICTIONARY); + register_op<OperatorEvaluatorInDictionaryHas<::RID>>(Variant::OP_IN, Variant::RID, Variant::DICTIONARY); register_op<OperatorEvaluatorInDictionaryHasObject>(Variant::OP_IN, Variant::OBJECT, Variant::DICTIONARY); register_op<OperatorEvaluatorInDictionaryHas<Callable>>(Variant::OP_IN, Variant::CALLABLE, Variant::DICTIONARY); register_op<OperatorEvaluatorInDictionaryHas<Signal>>(Variant::OP_IN, Variant::SIGNAL, Variant::DICTIONARY); @@ -1021,6 +1022,7 @@ void Variant::_register_variant_operators() { register_op<OperatorEvaluatorInArrayFind<Color, Array>>(Variant::OP_IN, Variant::COLOR, Variant::ARRAY); register_op<OperatorEvaluatorInArrayFind<StringName, Array>>(Variant::OP_IN, Variant::STRING_NAME, Variant::ARRAY); register_op<OperatorEvaluatorInArrayFind<NodePath, Array>>(Variant::OP_IN, Variant::NODE_PATH, Variant::ARRAY); + register_op<OperatorEvaluatorInArrayFind<::RID, Array>>(Variant::OP_IN, Variant::RID, Variant::ARRAY); register_op<OperatorEvaluatorInArrayFindObject>(Variant::OP_IN, Variant::OBJECT, Variant::ARRAY); register_op<OperatorEvaluatorInArrayFind<Callable, Array>>(Variant::OP_IN, Variant::CALLABLE, Variant::ARRAY); register_op<OperatorEvaluatorInArrayFind<Signal, Array>>(Variant::OP_IN, Variant::SIGNAL, Variant::ARRAY); diff --git a/core/variant/variant_parser.cpp b/core/variant/variant_parser.cpp index f5f96456d3..f05b9cd83a 100644 --- a/core/variant/variant_parser.cpp +++ b/core/variant/variant_parser.cpp @@ -2245,7 +2245,7 @@ Error VariantWriter::write(const Variant &p_variant, StoreStringFunc p_store_str } else { List<Variant> keys; dict.get_key_list(&keys); - keys.sort(); + keys.sort_custom<StringLikeVariantOrder>(); if (keys.is_empty()) { // Avoid unnecessary line break. diff --git a/doc/classes/@GlobalScope.xml b/doc/classes/@GlobalScope.xml index 55d00b6cf9..66f15f7494 100644 --- a/doc/classes/@GlobalScope.xml +++ b/doc/classes/@GlobalScope.xml @@ -6,7 +6,7 @@ <description> A list of global scope enumerated constants and built-in functions. This is all that resides in the globals, constants regarding error codes, keycodes, property hints, etc. Singletons are also documented here, since they can be accessed from anywhere. - For the entries related to GDScript which can be accessed in any script see [@GDScript]. + For the entries that can only be accessed from scripts written in GDScript, see [@GDScript]. </description> <tutorials> <link title="Random number generation">$DOCS_URL/tutorials/math/random_number_generation.html</link> diff --git a/doc/classes/AtlasTexture.xml b/doc/classes/AtlasTexture.xml index 45877f4003..287f2cc19a 100644 --- a/doc/classes/AtlasTexture.xml +++ b/doc/classes/AtlasTexture.xml @@ -6,7 +6,7 @@ <description> [Texture2D] resource that draws only part of its [member atlas] texture, as defined by the [member region]. An additional [member margin] can also be set, which is useful for small adjustments. Multiple [AtlasTexture] resources can be cropped from the same [member atlas]. Packing many smaller textures into a singular large texture helps to optimize video memory costs and render calls. - [b]Note:[/b] [AtlasTexture] cannot be used in an [AnimatedTexture], and may not tile properly in nodes such as [TextureRect], when inside other [AtlasTexture] resources. + [b]Note:[/b] [AtlasTexture] cannot be used in an [AnimatedTexture], and will not tile properly in nodes such as [TextureRect] or [Sprite2D]. To tile an [AtlasTexture], modify its [member region] instead. </description> <tutorials> </tutorials> diff --git a/doc/classes/AudioEffectFilter.xml b/doc/classes/AudioEffectFilter.xml index e5c1f4ccf4..18540de736 100644 --- a/doc/classes/AudioEffectFilter.xml +++ b/doc/classes/AudioEffectFilter.xml @@ -14,6 +14,7 @@ Threshold frequency for the filter, in Hz. </member> <member name="db" type="int" setter="set_db" getter="get_db" enum="AudioEffectFilter.FilterDB" default="0"> + Steepness of the cutoff curve in dB per octave, also known as the order of the filter. Higher orders have a more aggressive cutoff. </member> <member name="gain" type="float" setter="set_gain" getter="get_gain" default="1.0"> Gain amount of the frequencies after the filter. @@ -24,12 +25,16 @@ </members> <constants> <constant name="FILTER_6DB" value="0" enum="FilterDB"> + Cutting off at 6dB per octave. </constant> <constant name="FILTER_12DB" value="1" enum="FilterDB"> + Cutting off at 12dB per octave. </constant> <constant name="FILTER_18DB" value="2" enum="FilterDB"> + Cutting off at 18dB per octave. </constant> <constant name="FILTER_24DB" value="3" enum="FilterDB"> + Cutting off at 24dB per octave. </constant> </constants> </class> diff --git a/doc/classes/AudioEffectStereoEnhance.xml b/doc/classes/AudioEffectStereoEnhance.xml index f009bec5bb..459ae3ebc6 100644 --- a/doc/classes/AudioEffectStereoEnhance.xml +++ b/doc/classes/AudioEffectStereoEnhance.xml @@ -11,11 +11,13 @@ </tutorials> <members> <member name="pan_pullout" type="float" setter="set_pan_pullout" getter="get_pan_pullout" default="1.0"> - Values greater than 1.0 increase intensity of any panning on audio passing through this effect, whereas values less than 1.0 will decrease the panning intensity. A value of 0.0 will downmix audio to mono. + Amplifies the difference between stereo channels, increasing or decreasing existing panning. A value of 0.0 will downmix stereo to mono. Does not affect a mono signal. </member> <member name="surround" type="float" setter="set_surround" getter="get_surround" default="0.0"> + Widens sound stage through phase shifting in conjunction with [member time_pullout_ms]. Just pans sound to the left channel if [member time_pullout_ms] is 0. </member> <member name="time_pullout_ms" type="float" setter="set_time_pullout" getter="get_time_pullout" default="0.0"> + Widens sound stage through phase shifting in conjunction with [member surround]. Just delays the right channel if [member surround] is 0. </member> </members> </class> diff --git a/doc/classes/AudioServer.xml b/doc/classes/AudioServer.xml index 6830c632cf..4a20736164 100644 --- a/doc/classes/AudioServer.xml +++ b/doc/classes/AudioServer.xml @@ -110,6 +110,12 @@ Returns the volume of the bus at index [param bus_idx] in dB. </description> </method> + <method name="get_driver_name" qualifiers="const"> + <return type="String" /> + <description> + Returns the name of the current audio driver. The default usually depends on the operating system, but may be overridden via the [code]--audio-driver[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url]. [code]--headless[/code] also automatically sets the audio driver to [code]Dummy[/code]. See also [member ProjectSettings.audio/driver/driver]. + </description> + </method> <method name="get_input_device_list"> <return type="PackedStringArray" /> <description> diff --git a/doc/classes/AudioStreamPlayback.xml b/doc/classes/AudioStreamPlayback.xml index 02f3407f79..f01406d0f1 100644 --- a/doc/classes/AudioStreamPlayback.xml +++ b/doc/classes/AudioStreamPlayback.xml @@ -79,12 +79,47 @@ Overridable method. Called whenever the audio stream is mixed if the playback is active and [method AudioServer.set_enable_tagging_used_audio_streams] has been set to [code]true[/code]. Editor plugins may use this method to "tag" the current position along the audio stream and display it in a preview. </description> </method> + <method name="get_loop_count" qualifiers="const"> + <return type="int" /> + <description> + Returns the number of times the stream has looped. + </description> + </method> + <method name="get_playback_position" qualifiers="const"> + <return type="float" /> + <description> + Returns the current position in the stream, in seconds. + </description> + </method> <method name="get_sample_playback" qualifiers="const" experimental=""> <return type="AudioSamplePlayback" /> <description> Returns the [AudioSamplePlayback] associated with this [AudioStreamPlayback] for playing back the audio sample of this stream. </description> </method> + <method name="is_playing" qualifiers="const"> + <return type="bool" /> + <description> + Returns [code]true[/code] if the stream is playing. + </description> + </method> + <method name="mix_audio"> + <return type="PackedVector2Array" /> + <param index="0" name="rate_scale" type="float" /> + <param index="1" name="frames" type="int" /> + <description> + Mixes up to [param frames] of audio from the stream from the current position, at a rate of [param rate_scale], advancing the stream. + Returns a [PackedVector2Array] where each element holds the left and right channel volume levels of each frame. + [b]Note:[/b] Can return fewer frames than requested, make sure to use the size of the return value. + </description> + </method> + <method name="seek"> + <return type="void" /> + <param index="0" name="time" type="float" default="0.0" /> + <description> + Seeks the stream at the given [param time], in seconds. + </description> + </method> <method name="set_sample_playback" experimental=""> <return type="void" /> <param index="0" name="playback_sample" type="AudioSamplePlayback" /> @@ -92,5 +127,18 @@ Associates [AudioSamplePlayback] to this [AudioStreamPlayback] for playing back the audio sample of this stream. </description> </method> + <method name="start"> + <return type="void" /> + <param index="0" name="from_pos" type="float" default="0.0" /> + <description> + Starts the stream from the given [param from_pos], in seconds. + </description> + </method> + <method name="stop"> + <return type="void" /> + <description> + Stops the stream. + </description> + </method> </methods> </class> diff --git a/doc/classes/CPUParticles3D.xml b/doc/classes/CPUParticles3D.xml index d7770f2cd5..04ee95457c 100644 --- a/doc/classes/CPUParticles3D.xml +++ b/doc/classes/CPUParticles3D.xml @@ -11,6 +11,12 @@ <link title="Particle systems (3D)">$DOCS_URL/tutorials/3d/particles/index.html</link> </tutorials> <methods> + <method name="capture_aabb" qualifiers="const"> + <return type="AABB" /> + <description> + Returns the axis-aligned bounding box that contains all the particles that are active in the current frame. + </description> + </method> <method name="convert_from_particles"> <return type="void" /> <param index="0" name="particles" type="Node" /> diff --git a/doc/classes/CanvasItem.xml b/doc/classes/CanvasItem.xml index bc17c8b008..78e9c392db 100644 --- a/doc/classes/CanvasItem.xml +++ b/doc/classes/CanvasItem.xml @@ -533,9 +533,13 @@ </method> <method name="make_canvas_position_local" qualifiers="const"> <return type="Vector2" /> - <param index="0" name="screen_point" type="Vector2" /> + <param index="0" name="viewport_point" type="Vector2" /> <description> - Assigns [param screen_point] as this node's new local transform. + Transforms [param viewport_point] from the viewport's coordinates to this node's local coordinates. + For the opposite operation, use [method get_global_transform_with_canvas]. + [codeblock] + var viewport_point = get_global_transform_with_canvas() * local_point + [/codeblock] </description> </method> <method name="make_input_local" qualifiers="const"> diff --git a/doc/classes/CanvasModulate.xml b/doc/classes/CanvasModulate.xml index 7db0361020..43505498b3 100644 --- a/doc/classes/CanvasModulate.xml +++ b/doc/classes/CanvasModulate.xml @@ -7,6 +7,7 @@ [CanvasModulate] applies a color tint to all nodes on a canvas. Only one can be used to tint a canvas, but [CanvasLayer]s can be used to render things independently. </description> <tutorials> + <link title="2D lights and shadows">$DOCS_URL/tutorials/2d/2d_lights_and_shadows.html</link> </tutorials> <members> <member name="color" type="Color" setter="set_color" getter="get_color" default="Color(1, 1, 1, 1)" keywords="colour"> diff --git a/doc/classes/Control.xml b/doc/classes/Control.xml index 9d36bc657b..516b01bd7d 100644 --- a/doc/classes/Control.xml +++ b/doc/classes/Control.xml @@ -1365,7 +1365,7 @@ <constant name="LAYOUT_DIRECTION_INHERITED" value="0" enum="LayoutDirection"> Automatic layout direction, determined from the parent control layout direction. </constant> - <constant name="LAYOUT_DIRECTION_LOCALE" value="1" enum="LayoutDirection"> + <constant name="LAYOUT_DIRECTION_APPLICATION_LOCALE" value="1" enum="LayoutDirection"> Automatic layout direction, determined from the current locale. </constant> <constant name="LAYOUT_DIRECTION_LTR" value="2" enum="LayoutDirection"> @@ -1374,6 +1374,14 @@ <constant name="LAYOUT_DIRECTION_RTL" value="3" enum="LayoutDirection"> Right-to-left layout direction. </constant> + <constant name="LAYOUT_DIRECTION_SYSTEM_LOCALE" value="4" enum="LayoutDirection"> + Automatic layout direction, determined from the system locale. + </constant> + <constant name="LAYOUT_DIRECTION_MAX" value="5" enum="LayoutDirection"> + Represents the size of the [enum LayoutDirection] enum. + </constant> + <constant name="LAYOUT_DIRECTION_LOCALE" value="1" enum="LayoutDirection" deprecated="Use [constant LAYOUT_DIRECTION_APPLICATION_LOCALE] instead."> + </constant> <constant name="TEXT_DIRECTION_INHERITED" value="3" enum="TextDirection"> Text writing direction is the same as layout direction. </constant> diff --git a/doc/classes/DisplayServer.xml b/doc/classes/DisplayServer.xml index 37bf265d20..674aa148a3 100644 --- a/doc/classes/DisplayServer.xml +++ b/doc/classes/DisplayServer.xml @@ -1683,6 +1683,7 @@ <param index="1" name="window_id" type="int" default="0" /> <description> Sets window mode for the given window to [param mode]. See [enum WindowMode] for possible values and how each mode behaves. + [b]Note:[/b] On Android, setting it to [constant WINDOW_MODE_FULLSCREEN] or [constant WINDOW_MODE_EXCLUSIVE_FULLSCREEN] will enable immersive mode. [b]Note:[/b] Setting the window to full screen forcibly sets the borderless flag to [code]true[/code], so make sure to set it back to [code]false[/code] when not wanted. </description> </method> @@ -2058,6 +2059,7 @@ <constant name="WINDOW_MODE_FULLSCREEN" value="3" enum="WindowMode"> Full screen mode with full multi-window support. Full screen window covers the entire display area of a screen and has no decorations. The display's video mode is not changed. + [b]On Android:[/b] This enables immersive mode. [b]On Windows:[/b] Multi-window full-screen mode has a 1px border of the [member ProjectSettings.rendering/environment/defaults/default_clear_color] color. [b]On macOS:[/b] A new desktop is used to display the running project. [b]Note:[/b] Regardless of the platform, enabling full screen will change the window size to match the monitor's size. Therefore, make sure your project supports [url=$DOCS_URL/tutorials/rendering/multiple_resolutions.html]multiple resolutions[/url] when enabling full screen mode. @@ -2065,6 +2067,7 @@ <constant name="WINDOW_MODE_EXCLUSIVE_FULLSCREEN" value="4" enum="WindowMode"> A single window full screen mode. This mode has less overhead, but only one window can be open on a given screen at a time (opening a child window or application switching will trigger a full screen transition). Full screen window covers the entire display area of a screen and has no border or decorations. The display's video mode is not changed. + [b]On Android:[/b] This enables immersive mode. [b]On Windows:[/b] Depending on video driver, full screen transition might cause screens to go black for a moment. [b]On macOS:[/b] A new desktop is used to display the running project. Exclusive full screen mode prevents Dock and Menu from showing up when the mouse pointer is hovering the edge of the screen. [b]On Linux (X11):[/b] Exclusive full screen mode bypasses compositor. @@ -2099,7 +2102,11 @@ <constant name="WINDOW_FLAG_MOUSE_PASSTHROUGH" value="7" enum="WindowFlags"> All mouse events are passed to the underlying window of the same application. </constant> - <constant name="WINDOW_FLAG_MAX" value="8" enum="WindowFlags"> + <constant name="WINDOW_FLAG_SHARP_CORNERS" value="8" enum="WindowFlags"> + Window style is overridden, forcing sharp corners. + [b]Note:[/b] This flag is implemented only on Windows (11). + </constant> + <constant name="WINDOW_FLAG_MAX" value="9" enum="WindowFlags"> Max value of the [enum WindowFlags]. </constant> <constant name="WINDOW_EVENT_MOUSE_ENTER" value="0" enum="WindowEvent"> diff --git a/doc/classes/FileAccess.xml b/doc/classes/FileAccess.xml index 71a1fc8e09..5888e30339 100644 --- a/doc/classes/FileAccess.xml +++ b/doc/classes/FileAccess.xml @@ -474,7 +474,7 @@ <return type="void" /> <param index="0" name="line" type="String" /> <description> - Appends [param line] to the file followed by a line return character ([code]\n[/code]), encoding the text as UTF-8. + Stores [param line] in the file followed by a newline character ([code]\n[/code]), encoding the text as UTF-8. </description> </method> <method name="store_pascal_string"> @@ -496,7 +496,7 @@ <return type="void" /> <param index="0" name="string" type="String" /> <description> - Appends [param string] to the file without a line return, encoding the text as UTF-8. + Stores [param string] in the file without a newline character ([code]\n[/code]), encoding the text as UTF-8. [b]Note:[/b] This method is intended to be used to write text files. The string is stored as a UTF-8 encoded buffer without string length or terminating zero, which means that it can't be loaded back easily. If you want to store a retrievable string in a binary file, consider using [method store_pascal_string] instead. For retrieving strings from a text file, you can use [code]get_buffer(length).get_string_from_utf8()[/code] (if you know the length) or [method get_as_text]. </description> </method> diff --git a/doc/classes/FileDialog.xml b/doc/classes/FileDialog.xml index 9529fac77e..18b8eb1d39 100644 --- a/doc/classes/FileDialog.xml +++ b/doc/classes/FileDialog.xml @@ -164,7 +164,7 @@ <member name="size" type="Vector2i" setter="set_size" getter="get_size" overrides="Window" default="Vector2i(640, 360)" /> <member name="title" type="String" setter="set_title" getter="get_title" overrides="Window" default=""Save a File"" /> <member name="use_native_dialog" type="bool" setter="set_use_native_dialog" getter="get_use_native_dialog" default="false"> - If [code]true[/code], [member access] is set to [constant ACCESS_FILESYSTEM], and it is supported by the current [DisplayServer], OS native dialog will be used instead of custom one. + If [code]true[/code], and if supported by the current [DisplayServer], OS native dialog will be used instead of custom one. [b]Note:[/b] On Linux and macOS, sandboxed apps always use native dialogs to access the host file system. [b]Note:[/b] On macOS, sandboxed apps will save security-scoped bookmarks to retain access to the opened folders across multiple sessions. Use [method OS.get_granted_permissions] to get a list of saved bookmarks. [b]Note:[/b] Native dialogs are isolated from the base process, file dialog properties can't be modified once the dialog is shown. diff --git a/doc/classes/HTTPRequest.xml b/doc/classes/HTTPRequest.xml index 6efa675a71..a4adf4d1b1 100644 --- a/doc/classes/HTTPRequest.xml +++ b/doc/classes/HTTPRequest.xml @@ -275,6 +275,7 @@ Request successful. </constant> <constant name="RESULT_CHUNKED_BODY_SIZE_MISMATCH" value="1" enum="Result"> + Request failed due to a mismatch between the expected and actual chunked body size during transfer. Possible causes include network errors, server misconfiguration, or issues with chunked encoding. </constant> <constant name="RESULT_CANT_CONNECT" value="2" enum="Result"> Request failed while connecting. @@ -295,6 +296,7 @@ Request exceeded its maximum size limit, see [member body_size_limit]. </constant> <constant name="RESULT_BODY_DECOMPRESS_FAILED" value="8" enum="Result"> + Request failed due to an error while decompressing the response body. Possible causes include unsupported or incorrect compression format, corrupted data, or incomplete transfer. </constant> <constant name="RESULT_REQUEST_FAILED" value="9" enum="Result"> Request failed (currently unused). diff --git a/doc/classes/Light3D.xml b/doc/classes/Light3D.xml index bda5fb69de..966d0fdcb4 100644 --- a/doc/classes/Light3D.xml +++ b/doc/classes/Light3D.xml @@ -115,6 +115,9 @@ <member name="shadow_blur" type="float" setter="set_param" getter="get_param" default="1.0"> Blurs the edges of the shadow. Can be used to hide pixel artifacts in low-resolution shadow maps. A high value can impact performance, make shadows appear grainy and can cause other unwanted artifacts. Try to keep as near default as possible. </member> + <member name="shadow_caster_mask" type="int" setter="set_shadow_caster_mask" getter="get_shadow_caster_mask" default="4294967295"> + The light will only cast shadows using objects in the selected layers. + </member> <member name="shadow_enabled" type="bool" setter="set_shadow" getter="has_shadow" default="false"> If [code]true[/code], the light will cast real-time shadows. This has a significant performance cost. Only enable shadow rendering when it makes a noticeable difference in the scene's appearance, and consider using [member distance_fade_enabled] to hide the light when far away from the [Camera3D]. </member> diff --git a/doc/classes/LineEdit.xml b/doc/classes/LineEdit.xml index 3e0c328dcb..91c9072f73 100644 --- a/doc/classes/LineEdit.xml +++ b/doc/classes/LineEdit.xml @@ -33,6 +33,7 @@ - [kbd]Cmd + E[/kbd]: Same as [kbd]End[/kbd], move the caret to the end of the line - [kbd]Cmd + Left Arrow[/kbd]: Same as [kbd]Home[/kbd], move the caret to the beginning of the line - [kbd]Cmd + Right Arrow[/kbd]: Same as [kbd]End[/kbd], move the caret to the end of the line + [b]Note:[/b] Caret movement shortcuts listed above are not affected by [member shortcut_keys_enabled]. </description> <tutorials> </tutorials> @@ -334,7 +335,7 @@ If [code]false[/code], it's impossible to select the text using mouse nor keyboard. </member> <member name="shortcut_keys_enabled" type="bool" setter="set_shortcut_keys_enabled" getter="is_shortcut_keys_enabled" default="true"> - If [code]false[/code], using shortcuts will be disabled. + If [code]true[/code], shortcut keys for context menu items are enabled, even if the context menu is disabled. </member> <member name="structured_text_bidi_override" type="int" setter="set_structured_text_bidi_override" getter="get_structured_text_bidi_override" enum="TextServer.StructuredTextParser" default="0"> Set BiDi algorithm override for the structured text. diff --git a/doc/classes/Material.xml b/doc/classes/Material.xml index 760773d5d9..94d12018ca 100644 --- a/doc/classes/Material.xml +++ b/doc/classes/Material.xml @@ -45,7 +45,7 @@ <method name="inspect_native_shader_code"> <return type="void" /> <description> - Only available when running in the editor. Opens a popup that visualizes the generated shader code, including all variants and internal shader code. + Only available when running in the editor. Opens a popup that visualizes the generated shader code, including all variants and internal shader code. See also [method Shader.inspect_native_shader_code]. </description> </method> </methods> diff --git a/doc/classes/Node.xml b/doc/classes/Node.xml index 42753f7071..c07948b546 100644 --- a/doc/classes/Node.xml +++ b/doc/classes/Node.xml @@ -73,6 +73,7 @@ <description> Called during the physics processing step of the main loop. Physics processing means that the frame rate is synced to the physics, i.e. the [param delta] variable should be constant. [param delta] is in seconds. It is only called if physics processing is enabled, which is done automatically if this method is overridden, and can be toggled with [method set_physics_process]. + Processing happens in order of [member process_physics_priority], lower priority values are called first. Nodes with the same priority are processed in tree order, or top to bottom as seen in the editor (also known as pre-order traversal). Corresponds to the [constant NOTIFICATION_PHYSICS_PROCESS] notification in [method Object._notification]. [b]Note:[/b] This method is only called if the node is present in the scene tree (i.e. if it's not an orphan). </description> @@ -83,6 +84,7 @@ <description> Called during the processing step of the main loop. Processing happens at every frame and as fast as possible, so the [param delta] time since the previous frame is not constant. [param delta] is in seconds. It is only called if processing is enabled, which is done automatically if this method is overridden, and can be toggled with [method set_process]. + Processing happens in order of [member process_priority], lower priority values are called first. Nodes with the same priority are processed in tree order, or top to bottom as seen in the editor (also known as pre-order traversal). Corresponds to the [constant NOTIFICATION_PROCESS] notification in [method Object._notification]. [b]Note:[/b] This method is only called if the node is present in the scene tree (i.e. if it's not an orphan). </description> @@ -1015,10 +1017,10 @@ The node's processing behavior (see [enum ProcessMode]). To check if the node can process in its current mode, use [method can_process]. </member> <member name="process_physics_priority" type="int" setter="set_physics_process_priority" getter="get_physics_process_priority" default="0"> - Similar to [member process_priority] but for [constant NOTIFICATION_PHYSICS_PROCESS], [method _physics_process] or the internal version. + Similar to [member process_priority] but for [constant NOTIFICATION_PHYSICS_PROCESS], [method _physics_process], or [constant NOTIFICATION_INTERNAL_PHYSICS_PROCESS]. </member> <member name="process_priority" type="int" setter="set_process_priority" getter="get_process_priority" default="0"> - The node's execution order of the process callbacks ([method _process], [method _physics_process], and internal processing). Nodes whose priority value is [i]lower[/i] call their process callbacks first, regardless of tree order. + The node's execution order of the process callbacks ([method _process], [constant NOTIFICATION_PROCESS], and [constant NOTIFICATION_INTERNAL_PROCESS]). Nodes whose priority value is [i]lower[/i] call their process callbacks first, regardless of tree order. </member> <member name="process_thread_group" type="int" setter="set_process_thread_group" getter="get_process_thread_group" enum="Node.ProcessThreadGroup" default="0"> Set the process thread group for this node (basically, whether it receives [constant NOTIFICATION_PROCESS], [constant NOTIFICATION_PHYSICS_PROCESS], [method _process] or [method _physics_process] (and the internal versions) on the main thread or in a sub-thread. diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 4266bab2a1..4d41d2e94a 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -10,6 +10,7 @@ [b]Overriding:[/b] Any project setting can be overridden by creating a file named [code]override.cfg[/code] in the project's root directory. This can also be used in exported projects by placing this file in the same directory as the project binary. Overriding will still take the base project settings' [url=$DOCS_URL/tutorials/export/feature_tags.html]feature tags[/url] in account. Therefore, make sure to [i]also[/i] override the setting with the desired feature tags if you want them to override base project settings on all platforms and configurations. </description> <tutorials> + <link title="Project Settings">$DOCS_URL/tutorials/editor/project_settings.html</link> <link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link> <link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link> <link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link> @@ -376,6 +377,7 @@ <member name="audio/driver/driver" type="String" setter="" getter=""> Specifies the audio driver to use. This setting is platform-dependent as each platform supports different audio drivers. If left empty, the default audio driver will be used. The [code]Dummy[/code] audio driver disables all audio playback and recording, which is useful for non-game applications as it reduces CPU usage. It also prevents the engine from appearing as an application playing audio in the OS' audio mixer. + To query the value that is being used at run-time (which may be overridden by command-line arguments or headless mode), use [method AudioServer.get_driver_name]. [b]Note:[/b] The driver in use can be overridden at runtime via the [code]--audio-driver[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url]. </member> <member name="audio/driver/enable_input" type="bool" setter="" getter="" default="false"> @@ -875,6 +877,10 @@ [b]Note:[/b] Certain window managers can be configured to ignore the non-resizable status of a window. Do not rely on this setting as a guarantee that the window will [i]never[/i] be resizable. [b]Note:[/b] This setting is ignored on iOS. </member> + <member name="display/window/size/sharp_corners" type="bool" setter="" getter="" default="false"> + If [code]true[/code], the main window uses sharp corners by default. + [b]Note:[/b] This property is implemented only on Windows (11). + </member> <member name="display/window/size/transparent" type="bool" setter="" getter="" default="false"> If [code]true[/code], enables a window manager hint that the main window background [i]can[/i] be transparent. This does not make the background actually transparent. For the background to be transparent, the root viewport must also be made transparent by enabling [member rendering/viewport/transparent_background]. [b]Note:[/b] To use a transparent splash screen, set [member application/boot_splash/bg_color] to [code]Color(0, 0, 0, 0)[/code]. @@ -2376,26 +2382,30 @@ [b]Note:[/b] It is not recommended to use this setting together with [member rendering/2d/snap/snap_2d_transforms_to_pixel], as movement may appear even less smooth. Prefer only enabling that setting instead. </member> <member name="rendering/anti_aliasing/quality/msaa_2d" type="int" setter="" getter="" default="0"> - Sets the number of MSAA samples to use for 2D/Canvas rendering (as a power of two). MSAA is used to reduce aliasing around the edges of polygons. A higher MSAA value results in smoother edges but can be significantly slower on some hardware, especially integrated graphics due to their limited memory bandwidth. This has no effect on shader-induced aliasing or texture aliasing. + Sets the number of multisample antialiasing (MSAA) samples to use for 2D/Canvas rendering (as a power of two). MSAA is used to reduce aliasing around the edges of polygons. A higher MSAA value results in smoother edges but can be significantly slower on some hardware, especially integrated graphics due to their limited memory bandwidth. This has no effect on shader-induced aliasing or texture aliasing. [b]Note:[/b] MSAA is only supported in the Forward+ and Mobile rendering methods, not Compatibility. + [b]Note:[/b] This property is only read when the project starts. To set the number of 2D MSAA samples at runtime, set [member Viewport.msaa_2d] or use [method RenderingServer.viewport_set_msaa_2d]. </member> <member name="rendering/anti_aliasing/quality/msaa_3d" type="int" setter="" getter="" default="0"> - Sets the number of MSAA samples to use for 3D rendering (as a power of two). MSAA is used to reduce aliasing around the edges of polygons. A higher MSAA value results in smoother edges but can be significantly slower on some hardware, especially integrated graphics due to their limited memory bandwidth. See also [member rendering/scaling_3d/mode] for supersampling, which provides higher quality but is much more expensive. This has no effect on shader-induced aliasing or texture aliasing. + Sets the number of multisample antialiasing (MSAA) samples to use for 3D rendering (as a power of two). MSAA is used to reduce aliasing around the edges of polygons. A higher MSAA value results in smoother edges but can be significantly slower on some hardware, especially integrated graphics due to their limited memory bandwidth. See also [member rendering/scaling_3d/mode] for supersampling, which provides higher quality but is much more expensive. This has no effect on shader-induced aliasing or texture aliasing. + [b]Note:[/b] This property is only read when the project starts. To set the number of 3D MSAA samples at runtime, set [member Viewport.msaa_3d] or use [method RenderingServer.viewport_set_msaa_3d]. </member> <member name="rendering/anti_aliasing/quality/screen_space_aa" type="int" setter="" getter="" default="0"> Sets the screen-space antialiasing mode for the default screen [Viewport]. Screen-space antialiasing works by selectively blurring edges in a post-process shader. It differs from MSAA which takes multiple coverage samples while rendering objects. Screen-space AA methods are typically faster than MSAA and will smooth out specular aliasing, but tend to make scenes appear blurry. The blurriness is partially counteracted by automatically using a negative mipmap LOD bias (see [member rendering/textures/default_filters/texture_mipmap_bias]). Another way to combat specular aliasing is to enable [member rendering/anti_aliasing/screen_space_roughness_limiter/enabled]. [b]Note:[/b] Screen-space antialiasing is only supported in the Forward+ and Mobile rendering methods, not Compatibility. + [b]Note:[/b] This property is only read when the project starts. To set the screen-space antialiasing mode at runtime, set [member Viewport.screen_space_aa] on the root [Viewport] instead, or use [method RenderingServer.viewport_set_screen_space_aa]. </member> <member name="rendering/anti_aliasing/quality/use_debanding" type="bool" setter="" getter="" default="false"> If [code]true[/code], uses a fast post-processing filter to make banding significantly less visible in 3D. 2D rendering is [i]not[/i] affected by debanding unless the [member Environment.background_mode] is [constant Environment.BG_CANVAS]. In some cases, debanding may introduce a slightly noticeable dithering pattern. It's recommended to enable debanding only when actually needed since the dithering pattern will make lossless-compressed screenshots larger. - [b]Note:[/b] This property is only read when the project starts. To set debanding at run-time, set [member Viewport.use_debanding] on the root [Viewport] instead. + [b]Note:[/b] This property is only read when the project starts. To set debanding at runtime, set [member Viewport.use_debanding] on the root [Viewport] instead, or use [method RenderingServer.viewport_set_use_debanding]. </member> <member name="rendering/anti_aliasing/quality/use_taa" type="bool" setter="" getter="" default="false"> - Enables Temporal Anti-Aliasing for the default screen [Viewport]. TAA works by jittering the camera and accumulating the images of the last rendered frames, motion vector rendering is used to account for camera and object motion. Enabling TAA can make the image blurrier, which is partially counteracted by automatically using a negative mipmap LOD bias (see [member rendering/textures/default_filters/texture_mipmap_bias]). + Enables temporal antialiasing for the default screen [Viewport]. TAA works by jittering the camera and accumulating the images of the last rendered frames, motion vector rendering is used to account for camera and object motion. Enabling TAA can make the image blurrier, which is partially counteracted by automatically using a negative mipmap LOD bias (see [member rendering/textures/default_filters/texture_mipmap_bias]). [b]Note:[/b] The implementation is not complete yet. Some visual instances such as particles and skinned meshes may show ghosting artifacts in motion. [b]Note:[/b] TAA is only supported in the Forward+ rendering method, not Mobile or Compatibility. + [b]Note:[/b] This property is only read when the project starts. To set TAA at runtime, set [member Viewport.use_taa] on the root [Viewport] instead, or use [method RenderingServer.viewport_set_use_taa]. </member> <member name="rendering/anti_aliasing/screen_space_roughness_limiter/amount" type="float" setter="" getter="" default="0.25"> [b]Note:[/b] This property is only read when the project starts. To control the screen-space roughness limiter at runtime, call [method RenderingServer.screen_space_roughness_limiter_set_active] instead. @@ -2900,8 +2910,8 @@ </member> <member name="rendering/textures/vram_compression/compress_with_gpu" type="bool" setter="" getter="" default="true"> If [code]true[/code], the texture importer will utilize the GPU for compressing textures, improving the import time of large images. - [b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend. - [b]Note:[/b] Currently this only affects BC1 and BC6H compression, which are used on Desktop and Console for fully opaque and HDR images respectively. + [b]Note:[/b] This only functions on a device which supports either Vulkan, D3D12, or Metal available as a rendering backend. + [b]Note:[/b] Currently this only affects certain compressed formats (BC1, BC4, and BC6), all of which are exclusive to desktop platforms and consoles. </member> <member name="rendering/textures/vram_compression/import_etc2_astc" type="bool" setter="" getter="" default="false"> If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size). diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml index b73315219b..16a554eded 100644 --- a/doc/classes/RenderingServer.xml +++ b/doc/classes/RenderingServer.xml @@ -2137,6 +2137,14 @@ If [code]true[/code], light will cast shadows. Equivalent to [member Light3D.shadow_enabled]. </description> </method> + <method name="light_set_shadow_caster_mask"> + <return type="void" /> + <param index="0" name="light" type="RID" /> + <param index="1" name="mask" type="int" /> + <description> + Sets the shadow caster mask for this 3D light. Shadows will only be cast using objects in the selected layers. Equivalent to [member Light3D.shadow_caster_mask]. + </description> + </method> <method name="lightmap_create"> <return type="RID" /> <description> @@ -3917,7 +3925,7 @@ <param index="0" name="viewport" type="RID" /> <param index="1" name="msaa" type="int" enum="RenderingServer.ViewportMSAA" /> <description> - Sets the multisample anti-aliasing mode for 2D/Canvas on the specified [param viewport] RID. See [enum ViewportMSAA] for options. + Sets the multisample antialiasing mode for 2D/Canvas on the specified [param viewport] RID. See [enum ViewportMSAA] for options. Equivalent to [member ProjectSettings.rendering/anti_aliasing/quality/msaa_2d] or [member Viewport.msaa_2d]. </description> </method> <method name="viewport_set_msaa_3d"> @@ -3925,7 +3933,7 @@ <param index="0" name="viewport" type="RID" /> <param index="1" name="msaa" type="int" enum="RenderingServer.ViewportMSAA" /> <description> - Sets the multisample anti-aliasing mode for 3D on the specified [param viewport] RID. See [enum ViewportMSAA] for options. + Sets the multisample antialiasing mode for 3D on the specified [param viewport] RID. See [enum ViewportMSAA] for options. Equivalent to [member ProjectSettings.rendering/anti_aliasing/quality/msaa_3d] or [member Viewport.msaa_3d]. </description> </method> <method name="viewport_set_occlusion_culling_build_quality"> @@ -4007,7 +4015,7 @@ <param index="0" name="viewport" type="RID" /> <param index="1" name="mode" type="int" enum="RenderingServer.ViewportScreenSpaceAA" /> <description> - Sets the viewport's screen-space antialiasing mode. + Sets the viewport's screen-space antialiasing mode. Equivalent to [member ProjectSettings.rendering/anti_aliasing/quality/screen_space_aa] or [member Viewport.screen_space_aa]. </description> </method> <method name="viewport_set_sdf_oversize_and_scale"> @@ -4074,7 +4082,7 @@ <param index="0" name="viewport" type="RID" /> <param index="1" name="enable" type="bool" /> <description> - If [code]true[/code], enables debanding on the specified viewport. Equivalent to [member ProjectSettings.rendering/anti_aliasing/quality/use_debanding]. + If [code]true[/code], enables debanding on the specified viewport. Equivalent to [member ProjectSettings.rendering/anti_aliasing/quality/use_debanding] or [member Viewport.use_debanding]. </description> </method> <method name="viewport_set_use_hdr_2d"> @@ -4099,7 +4107,7 @@ <param index="0" name="viewport" type="RID" /> <param index="1" name="enable" type="bool" /> <description> - If [code]true[/code], use Temporal Anti-Aliasing. Equivalent to [member ProjectSettings.rendering/anti_aliasing/quality/use_taa]. + If [code]true[/code], use temporal antialiasing. Equivalent to [member ProjectSettings.rendering/anti_aliasing/quality/use_taa] or [member Viewport.use_taa]. </description> </method> <method name="viewport_set_use_xr"> diff --git a/doc/classes/ResourceImporterOBJ.xml b/doc/classes/ResourceImporterOBJ.xml index a63dddb0e8..084638f62b 100644 --- a/doc/classes/ResourceImporterOBJ.xml +++ b/doc/classes/ResourceImporterOBJ.xml @@ -14,6 +14,19 @@ <member name="force_disable_mesh_compression" type="bool" setter="" getter="" default="false"> If [code]true[/code], mesh compression will not be used. Consider enabling if you notice blocky artifacts in your mesh normals or UVs, or if you have meshes that are larger than a few thousand meters in each direction. </member> + <member name="generate_lightmap_uv2" type="bool" setter="" getter="" default="false"> + If [code]true[/code], generates UV2 on import for [LightmapGI] baking. + </member> + <member name="generate_lightmap_uv2_texel_size" type="float" setter="" getter="" default="0.2"> + Controls the size of each texel on the baked lightmap. A smaller value results in more precise lightmaps, at the cost of larger lightmap sizes and longer bake times. + [b]Note:[/b] Only effective if [member generate_lightmap_uv2] is [code]true[/code]. + </member> + <member name="generate_lods" type="bool" setter="" getter="" default="true"> + If [code]true[/code], generates lower detail variants of the mesh which will be displayed in the distance to improve rendering performance. Not all meshes benefit from LOD, especially if they are never rendered from far away. Disabling this can reduce output file size and speed up importing. See [url=$DOCS_URL/tutorials/3d/mesh_lod.html#doc-mesh-lod]Mesh level of detail (LOD)[/url] for more information. + </member> + <member name="generate_shadow_mesh" type="bool" setter="" getter="" default="true"> + If [code]true[/code], enables the generation of shadow meshes on import. This optimizes shadow rendering without reducing quality by welding vertices together when possible. This in turn reduces the memory bandwidth required to render shadows. Shadow mesh generation currently doesn't support using a lower detail level than the source mesh (but shadow rendering will make use of LODs when relevant). + </member> <member name="generate_tangents" type="bool" setter="" getter="" default="true"> If [code]true[/code], generate vertex tangents using [url=http://www.mikktspace.com/]Mikktspace[/url] if the source mesh doesn't have tangent data. When possible, it's recommended to let the 3D modeling software generate tangents on export instead on relying on this option. Tangents are required for correct display of normal and height maps, along with any material/shader features that require tangents. If you don't need material features that require tangents, disabling this can reduce output file size and speed up importing if the source 3D file doesn't contain tangents. diff --git a/doc/classes/Shader.xml b/doc/classes/Shader.xml index 68176dea14..1e7f9e5ee5 100644 --- a/doc/classes/Shader.xml +++ b/doc/classes/Shader.xml @@ -35,6 +35,12 @@ If argument [param get_groups] is true, parameter grouping hints will be provided. </description> </method> + <method name="inspect_native_shader_code"> + <return type="void" /> + <description> + Only available when running in the editor. Opens a popup that visualizes the generated shader code, including all variants and internal shader code. See also [method Material.inspect_native_shader_code]. + </description> + </method> <method name="set_default_texture_parameter"> <return type="void" /> <param index="0" name="name" type="StringName" /> diff --git a/doc/classes/String.xml b/doc/classes/String.xml index 40f08dafe6..588d0c73f9 100644 --- a/doc/classes/String.xml +++ b/doc/classes/String.xml @@ -6,6 +6,7 @@ <description> This is the built-in string Variant type (and the one used by GDScript). Strings may contain any number of Unicode characters, and expose methods useful for manipulating and generating strings. Strings are reference-counted and use a copy-on-write approach (every modification to a string returns a new [String]), so passing them around is cheap in resources. Some string methods have corresponding variations. Variations suffixed with [code]n[/code] ([method countn], [method findn], [method replacen], etc.) are [b]case-insensitive[/b] (they make no distinction between uppercase and lowercase letters). Method variations prefixed with [code]r[/code] ([method rfind], [method rsplit], etc.) are reversed, and start from the end of the string, instead of the beginning. + To convert any Variant to or from a string, see [method @GlobalScope.str], [method @GlobalScope.str_to_var], and [method @GlobalScope.var_to_str]. [b]Note:[/b] In a boolean context, a string will evaluate to [code]false[/code] if it is empty ([code]""[/code]). Otherwise, a string will always evaluate to [code]true[/code]. </description> <tutorials> diff --git a/doc/classes/Transform2D.xml b/doc/classes/Transform2D.xml index 4158fbe710..756716433e 100644 --- a/doc/classes/Transform2D.xml +++ b/doc/classes/Transform2D.xml @@ -251,14 +251,14 @@ </member> <member name="y" type="Vector2" setter="" getter="" default="Vector2(0, 1)"> The transform basis's Y axis, and the column [code]1[/code] of the matrix. Combined with [member x], this represents the transform's rotation, scale, and skew. - On the identity transform, this vector points up ([constant Vector2.UP]). + On the identity transform, this vector points down ([constant Vector2.DOWN]). </member> </members> <constants> <constant name="IDENTITY" value="Transform2D(1, 0, 0, 1, 0, 0)"> The identity [Transform2D]. A transform with no translation, no rotation, and its scale being [code]1[/code]. When multiplied by another [Variant] such as [Rect2] or another [Transform2D], no transformation occurs. This means that: - The [member x] points right ([constant Vector2.RIGHT]); - - The [member y] points up ([constant Vector2.UP]). + - The [member y] points down ([constant Vector2.DOWN]). [codeblock] var transform = Transform2D.IDENTITY print("| X | Y | Origin") diff --git a/doc/classes/Tree.xml b/doc/classes/Tree.xml index b0cb25fafd..9510d237da 100644 --- a/doc/classes/Tree.xml +++ b/doc/classes/Tree.xml @@ -509,6 +509,12 @@ <theme_item name="font_disabled_color" data_type="color" type="Color" default="Color(0.875, 0.875, 0.875, 0.5)"> Text [Color] for a [constant TreeItem.CELL_MODE_CHECK] mode cell when it's non-editable (see [method TreeItem.set_editable]). </theme_item> + <theme_item name="font_hovered_color" data_type="color" type="Color" default="Color(0.95, 0.95, 0.95, 1)"> + Text [Color] used when the item is hovered. + </theme_item> + <theme_item name="font_hovered_dimmed_color" data_type="color" type="Color" default="Color(0.875, 0.875, 0.875, 1)"> + Text [Color] used when the item is hovered, while a button of the same item is hovered as the same time. + </theme_item> <theme_item name="font_outline_color" data_type="color" type="Color" default="Color(0, 0, 0, 1)"> The tint of text outline of the item. </theme_item> @@ -645,6 +651,9 @@ <theme_item name="updown" data_type="icon" type="Texture2D"> The updown arrow icon to display for the [constant TreeItem.CELL_MODE_RANGE] mode cell. </theme_item> + <theme_item name="button_hover" data_type="style" type="StyleBox"> + [StyleBox] used when a button in the tree is hovered. + </theme_item> <theme_item name="button_pressed" data_type="style" type="StyleBox"> [StyleBox] used when a button in the tree is pressed. </theme_item> @@ -666,6 +675,12 @@ <theme_item name="focus" data_type="style" type="StyleBox"> The focused style for the [Tree], drawn on top of everything. </theme_item> + <theme_item name="hovered" data_type="style" type="StyleBox"> + [StyleBox] for the item being hovered. + </theme_item> + <theme_item name="hovered_dimmed" data_type="style" type="StyleBox"> + [StyleBox] for the item being hovered, while a button of the same item is hovered as the same time. + </theme_item> <theme_item name="panel" data_type="style" type="StyleBox"> The background style for the [Tree]. </theme_item> diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml index 95a1d0f77e..333e61d03f 100644 --- a/doc/classes/Viewport.xml +++ b/doc/classes/Viewport.xml @@ -312,10 +312,12 @@ [b]Note:[/b] [member mesh_lod_threshold] does not affect [GeometryInstance3D] visibility ranges (also known as "manual" LOD or hierarchical LOD). </member> <member name="msaa_2d" type="int" setter="set_msaa_2d" getter="get_msaa_2d" enum="Viewport.MSAA" default="0"> - The multisample anti-aliasing mode for 2D/Canvas rendering. A higher number results in smoother edges at the cost of significantly worse performance. A value of 2 or 4 is best unless targeting very high-end systems. This has no effect on shader-induced aliasing or texture aliasing. + The multisample antialiasing mode for 2D/Canvas rendering. A higher number results in smoother edges at the cost of significantly worse performance. A value of [constant Viewport.MSAA_2X] or [constant Viewport.MSAA_4X] is best unless targeting very high-end systems. This has no effect on shader-induced aliasing or texture aliasing. + See also [member ProjectSettings.rendering/anti_aliasing/quality/msaa_2d] and [method RenderingServer.viewport_set_msaa_2d]. </member> <member name="msaa_3d" type="int" setter="set_msaa_3d" getter="get_msaa_3d" enum="Viewport.MSAA" default="0"> - The multisample anti-aliasing mode for 3D rendering. A higher number results in smoother edges at the cost of significantly worse performance. A value of 2 or 4 is best unless targeting very high-end systems. See also bilinear scaling 3d [member scaling_3d_mode] for supersampling, which provides higher quality but is much more expensive. This has no effect on shader-induced aliasing or texture aliasing. + The multisample antialiasing mode for 3D rendering. A higher number results in smoother edges at the cost of significantly worse performance. A value of [constant Viewport.MSAA_2X] or [constant Viewport.MSAA_4X] is best unless targeting very high-end systems. See also bilinear scaling 3d [member scaling_3d_mode] for supersampling, which provides higher quality but is much more expensive. This has no effect on shader-induced aliasing or texture aliasing. + See also [member ProjectSettings.rendering/anti_aliasing/quality/msaa_3d] and [method RenderingServer.viewport_set_msaa_3d]. </member> <member name="own_world_3d" type="bool" setter="set_use_own_world_3d" getter="is_using_own_world_3d" default="false"> If [code]true[/code], the viewport will use a unique copy of the [World3D] defined in [member world_3d]. @@ -365,6 +367,7 @@ </member> <member name="screen_space_aa" type="int" setter="set_screen_space_aa" getter="get_screen_space_aa" enum="Viewport.ScreenSpaceAA" default="0"> Sets the screen-space antialiasing method used. Screen-space antialiasing works by selectively blurring edges in a post-process shader. It differs from MSAA which takes multiple coverage samples while rendering objects. Screen-space AA methods are typically faster than MSAA and will smooth out specular aliasing, but tend to make scenes appear blurry. + See also [member ProjectSettings.rendering/anti_aliasing/quality/screen_space_aa] and [method RenderingServer.viewport_set_screen_space_aa]. </member> <member name="sdf_oversize" type="int" setter="set_sdf_oversize" getter="get_sdf_oversize" enum="Viewport.SDFOversize" default="1"> Controls how much of the original viewport's size should be covered by the 2D signed distance field. This SDF can be sampled in [CanvasItem] shaders and is also used for [GPUParticles2D] collision. Higher values allow portions of occluders located outside the viewport to still be taken into account in the generated signed distance field, at the cost of performance. If you notice particles falling through [LightOccluder2D]s as the occluders leave the viewport, increase this setting. @@ -389,8 +392,9 @@ If [code]true[/code], the viewport should render its background as transparent. </member> <member name="use_debanding" type="bool" setter="set_use_debanding" getter="is_using_debanding" default="false"> - If [code]true[/code], uses a fast post-processing filter to make banding significantly less visible in 3D. 2D rendering is [i]not[/i] affected by debanding unless the [member Environment.background_mode] is [constant Environment.BG_CANVAS]. See also [member ProjectSettings.rendering/anti_aliasing/quality/use_debanding]. + If [code]true[/code], uses a fast post-processing filter to make banding significantly less visible in 3D. 2D rendering is [i]not[/i] affected by debanding unless the [member Environment.background_mode] is [constant Environment.BG_CANVAS]. In some cases, debanding may introduce a slightly noticeable dithering pattern. It's recommended to enable debanding only when actually needed since the dithering pattern will make lossless-compressed screenshots larger. + See also [member ProjectSettings.rendering/anti_aliasing/quality/use_debanding] and [method RenderingServer.viewport_set_use_debanding]. </member> <member name="use_hdr_2d" type="bool" setter="set_use_hdr_2d" getter="is_using_hdr_2d" default="false"> If [code]true[/code], 2D rendering will use an high dynamic range (HDR) format framebuffer matching the bit depth of the 3D framebuffer. When using the Forward+ renderer this will be an [code]RGBA16[/code] framebuffer, while when using the Mobile renderer it will be an [code]RGB10_A2[/code] framebuffer. Additionally, 2D rendering will take place in linear color space and will be converted to sRGB space immediately before blitting to the screen (if the Viewport is attached to the screen). Practically speaking, this means that the end result of the Viewport will not be clamped into the [code]0-1[/code] range and can be used in 3D rendering without color space adjustments. This allows 2D rendering to take advantage of effects requiring high dynamic range (e.g. 2D glow) as well as substantially improves the appearance of effects requiring highly detailed gradients. @@ -402,8 +406,9 @@ [b]Note:[/b] Due to memory constraints, occlusion culling is not supported by default in Web export templates. It can be enabled by compiling custom Web export templates with [code]module_raycast_enabled=yes[/code]. </member> <member name="use_taa" type="bool" setter="set_use_taa" getter="is_using_taa" default="false"> - Enables Temporal Anti-Aliasing for this viewport. TAA works by jittering the camera and accumulating the images of the last rendered frames, motion vector rendering is used to account for camera and object motion. + Enables temporal antialiasing for this viewport. TAA works by jittering the camera and accumulating the images of the last rendered frames, motion vector rendering is used to account for camera and object motion. [b]Note:[/b] The implementation is not complete yet, some visual instances such as particles and skinned meshes may show artifacts. + See also [member ProjectSettings.rendering/anti_aliasing/quality/use_taa] and [method RenderingServer.viewport_set_use_taa]. </member> <member name="use_xr" type="bool" setter="set_use_xr" getter="is_using_xr" default="false"> If [code]true[/code], the viewport will use the primary XR interface to render XR output. When applicable this can result in a stereoscopic image and the resulting render being output to a headset. diff --git a/doc/classes/Window.xml b/doc/classes/Window.xml index ca155881c8..02110f0162 100644 --- a/doc/classes/Window.xml +++ b/doc/classes/Window.xml @@ -659,6 +659,11 @@ If [member ProjectSettings.display/window/subwindows/embed_subwindows] is [code]false[/code], the position is in absolute screen coordinates. This typically applies to editor plugins. If the setting is [code]true[/code], the window's position is in the coordinates of its parent [Viewport]. [b]Note:[/b] This property only works if [member initial_position] is set to [constant WINDOW_INITIAL_POSITION_ABSOLUTE]. </member> + <member name="sharp_corners" type="bool" setter="set_flag" getter="get_flag" default="false"> + If [code]true[/code], the [Window] will override the OS window style to display sharp corners. + [b]Note:[/b] This property is implemented only on Windows (11). + [b]Note:[/b] This property only works with native windows. + </member> <member name="size" type="Vector2i" setter="set_size" getter="get_size" default="Vector2i(100, 100)"> The window's size in pixels. </member> @@ -842,7 +847,12 @@ All mouse events are passed to the underlying window of the same application. [b]Note:[/b] This flag has no effect in embedded windows. </constant> - <constant name="FLAG_MAX" value="8" enum="Flags"> + <constant name="FLAG_SHARP_CORNERS" value="8" enum="Flags"> + Window style is overridden, forcing sharp corners. + [b]Note:[/b] This flag has no effect in embedded windows. + [b]Note:[/b] This flag is implemented only on Windows (11). + </constant> + <constant name="FLAG_MAX" value="9" enum="Flags"> Max value of the [enum Flags]. </constant> <constant name="CONTENT_SCALE_MODE_DISABLED" value="0" enum="ContentScaleMode"> @@ -878,7 +888,7 @@ <constant name="LAYOUT_DIRECTION_INHERITED" value="0" enum="LayoutDirection"> Automatic layout direction, determined from the parent window layout direction. </constant> - <constant name="LAYOUT_DIRECTION_LOCALE" value="1" enum="LayoutDirection"> + <constant name="LAYOUT_DIRECTION_APPLICATION_LOCALE" value="1" enum="LayoutDirection"> Automatic layout direction, determined from the current locale. </constant> <constant name="LAYOUT_DIRECTION_LTR" value="2" enum="LayoutDirection"> @@ -887,6 +897,14 @@ <constant name="LAYOUT_DIRECTION_RTL" value="3" enum="LayoutDirection"> Right-to-left layout direction. </constant> + <constant name="LAYOUT_DIRECTION_SYSTEM_LOCALE" value="4" enum="LayoutDirection"> + Automatic layout direction, determined from the system locale. + </constant> + <constant name="LAYOUT_DIRECTION_MAX" value="5" enum="LayoutDirection"> + Represents the size of the [enum LayoutDirection] enum. + </constant> + <constant name="LAYOUT_DIRECTION_LOCALE" value="1" enum="LayoutDirection" deprecated="Use [constant LAYOUT_DIRECTION_APPLICATION_LOCALE] instead."> + </constant> <constant name="WINDOW_INITIAL_POSITION_ABSOLUTE" value="0" enum="WindowInitialPosition"> Initial window position is determined by [member position]. </constant> diff --git a/drivers/coreaudio/audio_driver_coreaudio.cpp b/drivers/coreaudio/audio_driver_coreaudio.cpp index fd0adb1fd1..433bbfb3f5 100644 --- a/drivers/coreaudio/audio_driver_coreaudio.cpp +++ b/drivers/coreaudio/audio_driver_coreaudio.cpp @@ -250,7 +250,7 @@ OSStatus AudioDriverCoreAudio::input_callback(void *inRefCon, } void AudioDriverCoreAudio::start() { - if (!active) { + if (!active && audio_unit != nullptr) { OSStatus result = AudioOutputUnitStart(audio_unit); if (result != noErr) { ERR_PRINT("AudioOutputUnitStart failed, code: " + itos(result)); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 0ef88e7d52..479afbba93 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -2003,6 +2003,8 @@ static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::Text switch (p_texture_layout) { case RDD::TEXTURE_LAYOUT_UNDEFINED: return D3D12_BARRIER_LAYOUT_UNDEFINED; + case RDD::TEXTURE_LAYOUT_GENERAL: + return D3D12_BARRIER_LAYOUT_COMMON; case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL: return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS; case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: @@ -6175,6 +6177,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { return false; case API_TRAIT_CLEARS_WITH_COPY_ENGINE: return false; + case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES: + return true; default: return RenderingDeviceDriver::api_trait_get(p_trait); } diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index 6e508c6ebf..843b6eac05 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -35,6 +35,7 @@ #include "core/config/project_settings.h" #include "core/io/dir_access.h" +#include "core/io/image.h" #include "core/os/os.h" #include "storage/texture_storage.h" diff --git a/drivers/gles3/storage/light_storage.cpp b/drivers/gles3/storage/light_storage.cpp index 9b976c2206..9b81430d45 100644 --- a/drivers/gles3/storage/light_storage.cpp +++ b/drivers/gles3/storage/light_storage.cpp @@ -213,6 +213,23 @@ void LightStorage::light_set_cull_mask(RID p_light, uint32_t p_mask) { light->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_LIGHT); } +void LightStorage::light_set_shadow_caster_mask(RID p_light, uint32_t p_caster_mask) { + Light *light = light_owner.get_or_null(p_light); + ERR_FAIL_NULL(light); + + light->shadow_caster_mask = p_caster_mask; + + light->version++; + light->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_LIGHT); +} + +uint32_t LightStorage::light_get_shadow_caster_mask(RID p_light) const { + Light *light = light_owner.get_or_null(p_light); + ERR_FAIL_NULL_V(light, 0); + + return light->shadow_caster_mask; +} + void LightStorage::light_set_distance_fade(RID p_light, bool p_enabled, float p_begin, float p_shadow, float p_length) { Light *light = light_owner.get_or_null(p_light); ERR_FAIL_NULL(light); diff --git a/drivers/gles3/storage/light_storage.h b/drivers/gles3/storage/light_storage.h index ed00dd235f..5391e607c3 100644 --- a/drivers/gles3/storage/light_storage.h +++ b/drivers/gles3/storage/light_storage.h @@ -59,6 +59,7 @@ struct Light { RS::LightBakeMode bake_mode = RS::LIGHT_BAKE_DYNAMIC; uint32_t max_sdfgi_cascade = 2; uint32_t cull_mask = 0xFFFFFFFF; + uint32_t shadow_caster_mask = 0xFFFFFFFF; bool distance_fade = false; real_t distance_fade_begin = 40.0; real_t distance_fade_shadow = 50.0; @@ -327,6 +328,8 @@ public: virtual void light_set_cull_mask(RID p_light, uint32_t p_mask) override; virtual void light_set_distance_fade(RID p_light, bool p_enabled, float p_begin, float p_shadow, float p_length) override; virtual void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) override; + virtual void light_set_shadow_caster_mask(RID p_light, uint32_t p_caster_mask) override; + virtual uint32_t light_get_shadow_caster_mask(RID p_light) const override; virtual void light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) override; virtual void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) override {} diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index 684f179492..04cbf7f2cd 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -1237,6 +1237,8 @@ MaterialStorage::MaterialStorage() { actions.renames["PI"] = _MKSTR(Math_PI); actions.renames["TAU"] = _MKSTR(Math_TAU); actions.renames["E"] = _MKSTR(Math_E); + actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; + actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["VIEWPORT_SIZE"] = "scene_data.viewport_size"; actions.renames["FRAGCOORD"] = "gl_FragCoord"; @@ -1276,8 +1278,6 @@ MaterialStorage::MaterialStorage() { actions.renames["CUSTOM1"] = "custom1_attrib"; actions.renames["CUSTOM2"] = "custom2_attrib"; actions.renames["CUSTOM3"] = "custom3_attrib"; - actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; - actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["LIGHT_VERTEX"] = "light_vertex"; actions.renames["NODE_POSITION_WORLD"] = "model_matrix[3].xyz"; diff --git a/drivers/gles3/storage/texture_storage.cpp b/drivers/gles3/storage/texture_storage.cpp index d7b4d6911d..5f49a84fe8 100644 --- a/drivers/gles3/storage/texture_storage.cpp +++ b/drivers/gles3/storage/texture_storage.cpp @@ -230,6 +230,32 @@ TextureStorage::TextureStorage() { sdf_shader.shader_version = sdf_shader.shader.version_create(); } + // Initialize texture placeholder data for the `texture_*_placeholder_initialize()` methods. + + constexpr int placeholder_size = 4; + texture_2d_placeholder = Image::create_empty(placeholder_size, placeholder_size, false, Image::FORMAT_RGBA8); + // Draw a magenta/black checkerboard pattern. + for (int i = 0; i < placeholder_size * placeholder_size; i++) { + const int x = i % placeholder_size; + const int y = i / placeholder_size; + texture_2d_placeholder->set_pixel(x, y, (x + y) % 2 == 0 ? Color(1, 0, 1) : Color(0, 0, 0)); + } + + texture_2d_array_placeholder.push_back(texture_2d_placeholder); + + for (int i = 0; i < 6; i++) { + cubemap_placeholder.push_back(texture_2d_placeholder); + } + + Ref<Image> texture_2d_placeholder_rotated; + texture_2d_placeholder_rotated.instantiate(); + texture_2d_placeholder_rotated->copy_from(texture_2d_placeholder); + texture_2d_placeholder_rotated->rotate_90(CLOCKWISE); + for (int i = 0; i < 4; i++) { + // Alternate checkerboard pattern on odd layers (by using a copy that is rotated 90 degrees). + texture_3d_placeholder.push_back(i % 2 == 0 ? texture_2d_placeholder : texture_2d_placeholder_rotated); + } + #ifdef GL_API_ENABLED if (RasterizerGLES3::is_gles_over_gl()) { glEnable(GL_PROGRAM_POINT_SIZE); @@ -1014,46 +1040,19 @@ void TextureStorage::texture_proxy_update(RID p_texture, RID p_proxy_to) { } void TextureStorage::texture_2d_placeholder_initialize(RID p_texture) { - //this could be better optimized to reuse an existing image , done this way - //for now to get it working - Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); - image->fill(Color(1, 0, 1, 1)); - - texture_2d_initialize(p_texture, image); + texture_2d_initialize(p_texture, texture_2d_placeholder); } -void TextureStorage::texture_2d_layered_placeholder_initialize(RID p_texture, RenderingServer::TextureLayeredType p_layered_type) { - //this could be better optimized to reuse an existing image , done this way - //for now to get it working - Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); - image->fill(Color(1, 0, 1, 1)); - - Vector<Ref<Image>> images; +void TextureStorage::texture_2d_layered_placeholder_initialize(RID p_texture, RS::TextureLayeredType p_layered_type) { if (p_layered_type == RS::TEXTURE_LAYERED_2D_ARRAY) { - images.push_back(image); + texture_2d_layered_initialize(p_texture, texture_2d_array_placeholder, p_layered_type); } else { - //cube - for (int i = 0; i < 6; i++) { - images.push_back(image); - } + texture_2d_layered_initialize(p_texture, cubemap_placeholder, p_layered_type); } - - texture_2d_layered_initialize(p_texture, images, p_layered_type); } void TextureStorage::texture_3d_placeholder_initialize(RID p_texture) { - //this could be better optimized to reuse an existing image , done this way - //for now to get it working - Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); - image->fill(Color(1, 0, 1, 1)); - - Vector<Ref<Image>> images; - //cube - for (int i = 0; i < 4; i++) { - images.push_back(image); - } - - texture_3d_initialize(p_texture, Image::FORMAT_RGBA8, 4, 4, 4, false, images); + texture_3d_initialize(p_texture, Image::FORMAT_RGBA8, 4, 4, 4, false, texture_3d_placeholder); } Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { diff --git a/drivers/gles3/storage/texture_storage.h b/drivers/gles3/storage/texture_storage.h index 3786c8c690..d85d10e235 100644 --- a/drivers/gles3/storage/texture_storage.h +++ b/drivers/gles3/storage/texture_storage.h @@ -36,6 +36,7 @@ #include "platform_gl.h" #include "config.h" +#include "core/io/image.h" #include "core/os/os.h" #include "core/templates/rid_owner.h" #include "servers/rendering/renderer_compositor.h" @@ -521,6 +522,11 @@ public: virtual void texture_external_update(RID p_texture, int p_width, int p_height, uint64_t p_external_buffer) override; virtual void texture_proxy_update(RID p_proxy, RID p_base) override; + Ref<Image> texture_2d_placeholder; + Vector<Ref<Image>> texture_2d_array_placeholder; + Vector<Ref<Image>> cubemap_placeholder; + Vector<Ref<Image>> texture_3d_placeholder; + //these two APIs can be used together or in combination with the others. virtual void texture_2d_placeholder_initialize(RID p_texture) override; virtual void texture_2d_layered_placeholder_initialize(RID p_texture, RenderingServer::TextureLayeredType p_layered_type) override; diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index 030b353ee8..38d5b53ffa 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -96,6 +96,22 @@ _FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) { return p_a; } +enum StageResourceUsage : uint32_t { + VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2), + VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2), + FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2), + FragmentWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2), + TesselationControlRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), + TesselationControlWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), + TesselationEvaluationRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), + TesselationEvaluationWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), + ComputeRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2), + ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2), +}; + +typedef LocalVector<__unsafe_unretained id<MTLResource>> ResourceVector; +typedef HashMap<StageResourceUsage, ResourceVector> ResourceUsageMap; + enum class MDCommandBufferStateType { None, Render, @@ -230,6 +246,7 @@ public: uint32_t index_offset = 0; LocalVector<id<MTLBuffer> __unsafe_unretained> vertex_buffers; LocalVector<NSUInteger> vertex_offsets; + ResourceUsageMap resource_usage; // clang-format off enum DirtyFlag: uint8_t { DIRTY_NONE = 0b0000'0000, @@ -271,8 +288,14 @@ public: blend_constants.reset(); vertex_buffers.clear(); vertex_offsets.clear(); + // Keep the keys, as they are likely to be used again. + for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) { + kv.value.clear(); + } } + void end_encoding(); + _FORCE_INLINE_ void mark_viewport_dirty() { if (viewports.is_empty()) { return; @@ -356,13 +379,20 @@ public: } render; // State specific for a compute pass. - struct { + struct ComputeState { MDComputePipeline *pipeline = nullptr; id<MTLComputeCommandEncoder> encoder = nil; + ResourceUsageMap resource_usage; _FORCE_INLINE_ void reset() { pipeline = nil; encoder = nil; + // Keep the keys, as they are likely to be used again. + for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) { + kv.value.clear(); + } } + + void end_encoding(); } compute; // State specific to a blit pass. @@ -632,19 +662,6 @@ public: MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_vert, MDLibrary *p_frag); }; -enum StageResourceUsage : uint32_t { - VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2), - VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2), - FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2), - FragmentWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2), - TesselationControlRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), - TesselationControlWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2), - TesselationEvaluationRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), - TesselationEvaluationWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2), - ComputeRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2), - ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2), -}; - _FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) { p_a = StageResourceUsage(uint32_t(p_a) | p_b); return p_a; @@ -667,7 +684,13 @@ struct HashMapComparatorDefault<RDD::ShaderID> { struct BoundUniformSet { id<MTLBuffer> buffer; - HashMap<id<MTLResource>, StageResourceUsage> bound_resources; + ResourceUsageMap usage_to_resources; + + /// Perform a 2-way merge each key of `ResourceVector` resources from this set into the + /// destination set. + /// + /// Assumes the vectors of resources are sorted. + void merge_into(ResourceUsageMap &p_dst) const; }; class API_AVAILABLE(macos(11.0), ios(14.0)) MDUniformSet { diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index 596728212a..c3906af159 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -58,7 +58,7 @@ void MDCommandBuffer::begin() { DEV_ASSERT(commandBuffer == nil); - commandBuffer = queue.commandBuffer; + commandBuffer = queue.commandBufferWithUnretainedReferences; } void MDCommandBuffer::end() { @@ -390,6 +390,38 @@ void MDCommandBuffer::render_set_blend_constants(const Color &p_constants) { } } +void BoundUniformSet::merge_into(ResourceUsageMap &p_dst) const { + for (KeyValue<StageResourceUsage, ResourceVector> const &keyval : usage_to_resources) { + ResourceVector *resources = p_dst.getptr(keyval.key); + if (resources == nullptr) { + resources = &p_dst.insert(keyval.key, ResourceVector())->value; + } + // Reserve space for the new resources, assuming they are all added. + resources->reserve(resources->size() + keyval.value.size()); + + uint32_t i = 0, j = 0; + __unsafe_unretained id<MTLResource> *resources_ptr = resources->ptr(); + const __unsafe_unretained id<MTLResource> *keyval_ptr = keyval.value.ptr(); + // 2-way merge. + while (i < resources->size() && j < keyval.value.size()) { + if (resources_ptr[i] < keyval_ptr[j]) { + i++; + } else if (resources_ptr[i] > keyval_ptr[j]) { + resources->insert(i, keyval_ptr[j]); + i++; + j++; + } else { + i++; + j++; + } + } + // Append the remaining resources. + for (; j < keyval.value.size(); j++) { + resources->push_back(keyval_ptr[j]); + } + } +} + void MDCommandBuffer::_render_bind_uniform_sets() { DEV_ASSERT(type == MDCommandBufferStateType::Render); if (!render.dirty.has_flag(RenderState::DIRTY_UNIFORMS)) { @@ -408,7 +440,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() { // Find the index of the next set bit. int index = __builtin_ctzll(set_uniforms); // Clear the set bit. - set_uniforms &= ~(1ULL << index); + set_uniforms &= (set_uniforms - 1); MDUniformSet *set = render.uniform_sets[index]; if (set == nullptr || set->index >= (uint32_t)shader->sets.size()) { continue; @@ -416,17 +448,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() { UniformSet const &set_info = shader->sets[set->index]; BoundUniformSet &bus = set->boundUniformSetForShader(shader, device); - - for (KeyValue<id<MTLResource>, StageResourceUsage> const &keyval : bus.bound_resources) { - MTLResourceUsage usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_VERTEX); - if (usage != 0) { - [enc useResource:keyval.key usage:usage stages:MTLRenderStageVertex]; - } - usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_FRAGMENT); - if (usage != 0) { - [enc useResource:keyval.key usage:usage stages:MTLRenderStageFragment]; - } - } + bus.merge_into(render.resource_usage); // Set the buffer for the vertex stage. { @@ -545,8 +567,7 @@ void MDCommandBuffer::_end_render_pass() { // see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407 } - [render.encoder endEncoding]; - render.encoder = nil; + render.end_encoding(); } void MDCommandBuffer::_render_clear_render_area() { @@ -792,10 +813,59 @@ void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer ERR_FAIL_MSG("not implemented"); } +void MDCommandBuffer::RenderState::end_encoding() { + if (encoder == nil) { + return; + } + + // Bind all resources. + for (KeyValue<StageResourceUsage, ResourceVector> const &keyval : resource_usage) { + if (keyval.value.is_empty()) { + continue; + } + + MTLResourceUsage vert_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_VERTEX); + MTLResourceUsage frag_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_FRAGMENT); + if (vert_usage == frag_usage) { + [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex | MTLRenderStageFragment]; + } else { + if (vert_usage != 0) { + [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex]; + } + if (frag_usage != 0) { + [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:frag_usage stages:MTLRenderStageFragment]; + } + } + } + + [encoder endEncoding]; + encoder = nil; +} + +void MDCommandBuffer::ComputeState::end_encoding() { + if (encoder == nil) { + return; + } + + // Bind all resources. + for (KeyValue<StageResourceUsage, ResourceVector> const &keyval : resource_usage) { + if (keyval.value.is_empty()) { + continue; + } + MTLResourceUsage usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_COMPUTE); + if (usage != 0) { + [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:usage]; + } + } + + [encoder endEncoding]; + encoder = nil; +} + void MDCommandBuffer::render_end_pass() { DEV_ASSERT(type == MDCommandBufferStateType::Render); - [render.encoder endEncoding]; + render.end_encoding(); render.reset(); type = MDCommandBufferStateType::None; } @@ -813,13 +883,7 @@ void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id); BoundUniformSet &bus = set->boundUniformSetForShader(shader, device); - - for (KeyValue<id<MTLResource>, StageResourceUsage> &keyval : bus.bound_resources) { - MTLResourceUsage usage = resource_usage_for_stage(keyval.value, RDD::ShaderStage::SHADER_STAGE_COMPUTE); - if (usage != 0) { - [enc useResource:keyval.key usage:usage]; - } - } + bus.merge_into(compute.resource_usage); uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE); if (offset) { @@ -848,7 +912,7 @@ void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, void MDCommandBuffer::_end_compute_dispatch() { DEV_ASSERT(type == MDCommandBufferStateType::Compute); - [compute.encoder endEncoding]; + compute.end_encoding(); compute.reset(); type = MDCommandBufferStateType::None; } @@ -1052,7 +1116,20 @@ BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id<M } } - BoundUniformSet bs = { .buffer = enc_buffer, .bound_resources = bound_resources }; + SearchArray<__unsafe_unretained id<MTLResource>> search; + ResourceUsageMap usage_to_resources; + for (KeyValue<id<MTLResource>, StageResourceUsage> const &keyval : bound_resources) { + ResourceVector *resources = usage_to_resources.getptr(keyval.value); + if (resources == nullptr) { + resources = &usage_to_resources.insert(keyval.value, ResourceVector())->value; + } + int64_t pos = search.bisect(resources->ptr(), resources->size(), keyval.key, true); + if (pos == resources->size() || (*resources)[pos] != keyval.key) { + resources->insert(pos, keyval.key); + } + } + + BoundUniformSet bs = { .buffer = enc_buffer, .usage_to_resources = usage_to_resources }; bound_uniforms.insert(p_shader, bs); return bound_uniforms.get(p_shader); } @@ -1211,8 +1288,7 @@ vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant Cle varyings.layer = uint(attributes.a_position.w); return varyings; } -)", - ClearAttKey::DEPTH_INDEX]; +)", ClearAttKey::DEPTH_INDEX]; return new_func(msl, @"vertClear", nil); } diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index a4a408356a..4da11ecd21 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -2060,6 +2060,10 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec case BT::Sampler: { primary.dataType = MTLDataTypeSampler; + primary.arrayLength = 1; + for (uint32_t const &a : a_type.array) { + primary.arrayLength *= a; + } } break; default: { @@ -2067,7 +2071,7 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec } break; } - // Find array length. + // Find array length of image. if (basetype == BT::Image || basetype == BT::SampledImage) { primary.arrayLength = 1; for (uint32_t const &a : a_type.array) { diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index d20f396281..32086515da 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -266,6 +266,7 @@ static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = { static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = { VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED + VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_GENERAL VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL @@ -2636,11 +2637,13 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buf bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) { // Reset is implicit (VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT). + Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + VkCommandBufferInheritanceInfo inheritance_info = {}; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; inheritance_info.renderPass = (VkRenderPass)p_render_pass.id; inheritance_info.subpass = p_subpass; - inheritance_info.framebuffer = (VkFramebuffer)p_framebuffer.id; + inheritance_info.framebuffer = framebuffer->vk_framebuffer; VkCommandBufferBeginInfo cmd_buf_begin_info = {}; cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; @@ -2950,12 +2953,16 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, fb_create_info.height = surface->height; fb_create_info.layers = 1; - VkFramebuffer framebuffer; + VkFramebuffer vk_framebuffer; for (uint32_t i = 0; i < image_count; i++) { fb_create_info.pAttachments = &swap_chain->image_views[i]; - err = vkCreateFramebuffer(vk_device, &fb_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &framebuffer); + err = vkCreateFramebuffer(vk_device, &fb_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &vk_framebuffer); ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + Framebuffer *framebuffer = memnew(Framebuffer); + framebuffer->vk_framebuffer = vk_framebuffer; + framebuffer->swap_chain_image = swap_chain->images[i]; + framebuffer->swap_chain_image_subresource_range = view_create_info.subresourceRange; swap_chain->framebuffers.push_back(RDD::FramebufferID(framebuffer)); } @@ -3024,7 +3031,10 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::swap_chain_acquire_framebuffer(C command_queue->pending_semaphores_for_fence.push_back(semaphore_index); // Return the corresponding framebuffer to the new current image. - return swap_chain->framebuffers[swap_chain->image_index]; + FramebufferID framebuffer_id = swap_chain->framebuffers[swap_chain->image_index]; + Framebuffer *framebuffer = (Framebuffer *)(framebuffer_id.id); + framebuffer->swap_chain_acquired = true; + return framebuffer_id; } RDD::RenderPassID RenderingDeviceDriverVulkan::swap_chain_get_render_pass(SwapChainID p_swap_chain) { @@ -3093,11 +3103,15 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID } #endif - return FramebufferID(vk_framebuffer); + Framebuffer *framebuffer = memnew(Framebuffer); + framebuffer->vk_framebuffer = vk_framebuffer; + return FramebufferID(framebuffer); } void RenderingDeviceDriverVulkan::framebuffer_free(FramebufferID p_framebuffer) { - vkDestroyFramebuffer(vk_device, (VkFramebuffer)p_framebuffer.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER)); + Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + vkDestroyFramebuffer(vk_device, framebuffer->vk_framebuffer, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER)); + memdelete(framebuffer); } /****************/ @@ -4315,10 +4329,25 @@ void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) { static_assert(ARRAYS_COMPATIBLE_FIELDWISE(RDD::RenderPassClearValue, VkClearValue)); void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) { + Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + if (framebuffer->swap_chain_acquired) { + // Insert a barrier to wait for the acquisition of the framebuffer before the render pass begins. + VkImageMemoryBarrier image_barrier = {}; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = framebuffer->swap_chain_image; + image_barrier.subresourceRange = framebuffer->swap_chain_image_subresource_range; + vkCmdPipelineBarrier((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); + framebuffer->swap_chain_acquired = false; + } + VkRenderPassBeginInfo render_pass_begin = {}; render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin.renderPass = (VkRenderPass)p_render_pass.id; - render_pass_begin.framebuffer = (VkFramebuffer)p_framebuffer.id; + render_pass_begin.framebuffer = framebuffer->vk_framebuffer; render_pass_begin.renderArea.offset.x = p_rect.position.x; render_pass_begin.renderArea.offset.y = p_rect.position.y; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 58f7a97ec0..4d5de897cd 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -366,6 +366,15 @@ public: /**** FRAMEBUFFER ****/ /*********************/ + struct Framebuffer { + VkFramebuffer vk_framebuffer = VK_NULL_HANDLE; + + // Only filled in by a framebuffer created by a swap chain. Unused otherwise. + VkImage swap_chain_image = VK_NULL_HANDLE; + VkImageSubresourceRange swap_chain_image_subresource_range = {}; + bool swap_chain_acquired = false; + }; + virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) override final; virtual void framebuffer_free(FramebufferID p_framebuffer) override final; diff --git a/editor/add_metadata_dialog.cpp b/editor/add_metadata_dialog.cpp index 0a070e37b6..66a7b820f5 100644 --- a/editor/add_metadata_dialog.cpp +++ b/editor/add_metadata_dialog.cpp @@ -64,7 +64,6 @@ AddMetadataDialog::AddMetadataDialog() { } void AddMetadataDialog::_complete_init(const StringName &p_title) { - add_meta_name->grab_focus(); add_meta_name->set_text(""); validation_panel->update(); @@ -90,6 +89,7 @@ void AddMetadataDialog::open(const StringName p_title, List<StringName> &p_exist this->_existing_metas = p_existing_metas; _complete_init(p_title); popup_centered(); + add_meta_name->grab_focus(); } StringName AddMetadataDialog::get_meta_name() { diff --git a/editor/animation_bezier_editor.cpp b/editor/animation_bezier_editor.cpp index 2e8c727849..8d7c6a1f16 100644 --- a/editor/animation_bezier_editor.cpp +++ b/editor/animation_bezier_editor.cpp @@ -1501,11 +1501,6 @@ void AnimationBezierTrackEdit::gui_input(const Ref<InputEvent> &p_event) { } box_selection_to = mm->get_position(); - - if (get_local_mouse_position().y < 0) { - // Avoid cursor from going too above, so it does not lose focus with viewport. - warp_mouse(Vector2(get_local_mouse_position().x, 0)); - } queue_redraw(); } diff --git a/editor/animation_track_editor.cpp b/editor/animation_track_editor.cpp index f8d35f2112..741d127ea2 100644 --- a/editor/animation_track_editor.cpp +++ b/editor/animation_track_editor.cpp @@ -63,7 +63,7 @@ constexpr double FPS_DECIMAL = 1.0; constexpr double SECOND_DECIMAL = 0.0001; -constexpr double FACTOR = 0.0625; +constexpr double FPS_STEP_FRACTION = 0.0625; void AnimationTrackKeyEdit::_bind_methods() { ClassDB::bind_method(D_METHOD("_update_obj"), &AnimationTrackKeyEdit::_update_obj); @@ -5028,6 +5028,8 @@ void AnimationTrackEditor::_snap_mode_changed(int p_mode) { key_edit->set_use_fps(use_fps); } marker_edit->set_use_fps(use_fps); + // To ensure that the conversion results are consistent between serialization and load, the value is snapped with 0.0625 to be a rational number when FPS mode is used. + step->set_step(use_fps ? FPS_STEP_FRACTION : SECOND_DECIMAL); _update_step_spinbox(); } @@ -5155,12 +5157,12 @@ void AnimationTrackEditor::_update_step(double p_new_step) { EditorUndoRedoManager *undo_redo = EditorUndoRedoManager::get_singleton(); undo_redo->create_action(TTR("Change Animation Step")); - // To ensure that the conversion results are consistent between serialization and load, the value is snapped with 0.0625 to be a rational number. - // step_value must also be less than or equal to 1000 to ensure that no error accumulates due to interactions with retrieving values from inner range. - double step_value = MIN(1000.0, Math::snapped(p_new_step, FACTOR)); + double step_value = p_new_step; if (timeline->is_using_fps()) { if (step_value != 0.0) { - step_value = 1.0 / step_value; + // step_value must also be less than or equal to 1000 to ensure that no error accumulates due to interactions with retrieving values from inner range. + step_value = 1.0 / MIN(1000.0, p_new_step); + ; } timeline->queue_redraw(); } @@ -8761,7 +8763,7 @@ void AnimationMarkerEdit::_move_selection_commit() { void AnimationMarkerEdit::_delete_selected_markers() { if (selection.size()) { EditorUndoRedoManager *undo_redo = EditorUndoRedoManager::get_singleton(); - undo_redo->create_action(TTR("Animation Delete Keys")); + undo_redo->create_action(TTR("Animation Delete Markers")); for (const StringName &name : selection) { double time = animation->get_marker_time(name); undo_redo->add_do_method(animation.ptr(), "remove_marker", name); @@ -8965,7 +8967,7 @@ AnimationMarkerEdit::AnimationMarkerEdit() { add_child(menu); menu->connect(SceneStringName(id_pressed), callable_mp(this, &AnimationMarkerEdit::_menu_selected)); menu->add_shortcut(ED_SHORTCUT("animation_marker_edit/rename_marker", TTR("Rename Marker"), Key::R), MENU_KEY_RENAME); - menu->add_shortcut(ED_SHORTCUT("animation_marker_edit/delete_selection", TTR("Delete Markers (s)"), Key::KEY_DELETE), MENU_KEY_DELETE); + menu->add_shortcut(ED_SHORTCUT("animation_marker_edit/delete_selection", TTR("Delete Marker(s)"), Key::KEY_DELETE), MENU_KEY_DELETE); menu->add_shortcut(ED_SHORTCUT("animation_marker_edit/toggle_marker_names", TTR("Show All Marker Names"), Key::M), MENU_KEY_TOGGLE_MARKER_NAMES); marker_insert_confirm = memnew(ConfirmationDialog); diff --git a/editor/debugger/editor_debugger_tree.cpp b/editor/debugger/editor_debugger_tree.cpp index c4d7899b2d..a900842651 100644 --- a/editor/debugger/editor_debugger_tree.cpp +++ b/editor/debugger/editor_debugger_tree.cpp @@ -277,11 +277,14 @@ Variant EditorDebuggerTree::get_drag_data(const Point2 &p_point) { } String path = selected->get_text(0); + const int icon_size = get_theme_constant(SNAME("class_icon_size"), EditorStringName(Editor)); HBoxContainer *hb = memnew(HBoxContainer); TextureRect *tf = memnew(TextureRect); tf->set_texture(selected->get_icon(0)); - tf->set_stretch_mode(TextureRect::STRETCH_KEEP_CENTERED); + tf->set_custom_minimum_size(Size2(icon_size, icon_size)); + tf->set_stretch_mode(TextureRect::STRETCH_KEEP_ASPECT_CENTERED); + tf->set_expand_mode(TextureRect::EXPAND_IGNORE_SIZE); hb->add_child(tf); Label *label = memnew(Label(path)); hb->add_child(label); diff --git a/editor/debugger/editor_profiler.cpp b/editor/debugger/editor_profiler.cpp index d244b6b4cd..8b253f36e4 100644 --- a/editor/debugger/editor_profiler.cpp +++ b/editor/debugger/editor_profiler.cpp @@ -30,6 +30,7 @@ #include "editor_profiler.h" +#include "core/io/image.h" #include "core/os/os.h" #include "editor/editor_settings.h" #include "editor/editor_string_names.h" diff --git a/editor/debugger/editor_visual_profiler.cpp b/editor/debugger/editor_visual_profiler.cpp index d4859fbe4d..b949df4518 100644 --- a/editor/debugger/editor_visual_profiler.cpp +++ b/editor/debugger/editor_visual_profiler.cpp @@ -30,6 +30,7 @@ #include "editor_visual_profiler.h" +#include "core/io/image.h" #include "core/os/os.h" #include "editor/editor_settings.h" #include "editor/editor_string_names.h" @@ -437,11 +438,7 @@ void EditorVisualProfiler::_notification(int p_what) { case NOTIFICATION_LAYOUT_DIRECTION_CHANGED: case NOTIFICATION_THEME_CHANGED: case NOTIFICATION_TRANSLATION_CHANGED: { - if (is_layout_rtl()) { - activate->set_icon(get_editor_theme_icon(SNAME("PlayBackwards"))); - } else { - activate->set_icon(get_editor_theme_icon(SNAME("Play"))); - } + activate->set_icon(get_editor_theme_icon(SNAME("Play"))); clear_button->set_icon(get_editor_theme_icon(SNAME("Clear"))); } break; } diff --git a/editor/editor_data.cpp b/editor/editor_data.cpp index ee16c61c89..bb02172b1a 100644 --- a/editor/editor_data.cpp +++ b/editor/editor_data.cpp @@ -547,6 +547,7 @@ Variant EditorData::instantiate_custom_type(const String &p_type, const String & if (n) { n->set_name(p_type); } + n->set_meta(SceneStringName(_custom_type_script), script); ((Object *)ob)->set_script(script); return ob; } @@ -1008,6 +1009,7 @@ Variant EditorData::script_class_instance(const String &p_class) { // Store in a variant to initialize the refcount if needed. Variant obj = ClassDB::instantiate(script->get_instance_base_type()); if (obj) { + Object::cast_to<Object>(obj)->set_meta(SceneStringName(_custom_type_script), script); obj.operator Object *()->set_script(script); } return obj; diff --git a/editor/editor_file_system.cpp b/editor/editor_file_system.cpp index 2d1a914120..c6ed310a9a 100644 --- a/editor/editor_file_system.cpp +++ b/editor/editor_file_system.cpp @@ -248,11 +248,16 @@ void EditorFileSystem::_first_scan_filesystem() { ep.step(TTR("Scanning file structure..."), 0, true); nb_files_total = _scan_new_dir(first_scan_root_dir, d); + // Preloading GDExtensions file extensions to prevent looping on all the resource loaders + // for each files in _first_scan_process_scripts. + List<String> gdextension_extensions; + ResourceLoader::get_recognized_extensions_for_type("GDExtension", &gdextension_extensions); + // This loads the global class names from the scripts and ensures that even if the // global_script_class_cache.cfg was missing or invalid, the global class names are valid in ScriptServer. // At the same time, to prevent looping multiple times in all files, it looks for extensions. ep.step(TTR("Loading global class names..."), 1, true); - _first_scan_process_scripts(first_scan_root_dir, existing_class_names, extensions); + _first_scan_process_scripts(first_scan_root_dir, gdextension_extensions, existing_class_names, extensions); // Removing invalid global class to prevent having invalid paths in ScriptServer. _remove_invalid_global_class_names(existing_class_names); @@ -276,16 +281,16 @@ void EditorFileSystem::_first_scan_filesystem() { ep.step(TTR("Starting file scan..."), 5, true); } -void EditorFileSystem::_first_scan_process_scripts(const ScannedDirectory *p_scan_dir, HashSet<String> &p_existing_class_names, HashSet<String> &p_extensions) { +void EditorFileSystem::_first_scan_process_scripts(const ScannedDirectory *p_scan_dir, List<String> &p_gdextension_extensions, HashSet<String> &p_existing_class_names, HashSet<String> &p_extensions) { for (ScannedDirectory *scan_sub_dir : p_scan_dir->subdirs) { - _first_scan_process_scripts(scan_sub_dir, p_existing_class_names, p_extensions); + _first_scan_process_scripts(scan_sub_dir, p_gdextension_extensions, p_existing_class_names, p_extensions); } for (const String &scan_file : p_scan_dir->files) { // Optimization to skip the ResourceLoader::get_resource_type for files // that are not scripts. Some loader get_resource_type methods read the file // which can be very slow on large projects. - String ext = scan_file.get_extension().to_lower(); + const String ext = scan_file.get_extension().to_lower(); bool is_script = false; for (int i = 0; i < ScriptServer::get_language_count(); i++) { if (ScriptServer::get_language(i)->get_extension() == ext) { @@ -293,24 +298,29 @@ void EditorFileSystem::_first_scan_process_scripts(const ScannedDirectory *p_sca break; } } - if (!is_script) { - continue; // Not a script. - } + if (is_script) { + const String path = p_scan_dir->full_path.path_join(scan_file); + const String type = ResourceLoader::get_resource_type(path); - String path = p_scan_dir->full_path.path_join(scan_file); - String type = ResourceLoader::get_resource_type(path); + if (ClassDB::is_parent_class(type, SNAME("Script"))) { + String script_class_extends; + String script_class_icon_path; + String script_class_name = _get_global_script_class(type, path, &script_class_extends, &script_class_icon_path); + _register_global_class_script(path, path, type, script_class_name, script_class_extends, script_class_icon_path); - if (ClassDB::is_parent_class(type, SNAME("Script"))) { - String script_class_extends; - String script_class_icon_path; - String script_class_name = _get_global_script_class(type, path, &script_class_extends, &script_class_icon_path); - _register_global_class_script(path, path, type, script_class_name, script_class_extends, script_class_icon_path); + if (!script_class_name.is_empty()) { + p_existing_class_names.insert(script_class_name); + } + } + } - if (!script_class_name.is_empty()) { - p_existing_class_names.insert(script_class_name); + // Check for GDExtensions. + if (p_gdextension_extensions.find(ext)) { + const String path = p_scan_dir->full_path.path_join(scan_file); + const String type = ResourceLoader::get_resource_type(path); + if (type == SNAME("GDExtension")) { + p_extensions.insert(path); } - } else if (type == SNAME("GDExtension")) { - p_extensions.insert(path); } } } diff --git a/editor/editor_file_system.h b/editor/editor_file_system.h index 7aa0137f4e..7120a68b39 100644 --- a/editor/editor_file_system.h +++ b/editor/editor_file_system.h @@ -191,7 +191,7 @@ class EditorFileSystem : public Node { void _scan_filesystem(); void _first_scan_filesystem(); - void _first_scan_process_scripts(const ScannedDirectory *p_scan_dir, HashSet<String> &p_existing_class_names, HashSet<String> &p_extensions); + void _first_scan_process_scripts(const ScannedDirectory *p_scan_dir, List<String> &p_gdextension_extensions, HashSet<String> &p_existing_class_names, HashSet<String> &p_extensions); HashSet<String> late_update_files; diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp index d88fb134f1..c69f443a22 100644 --- a/editor/editor_node.cpp +++ b/editor/editor_node.cpp @@ -35,6 +35,7 @@ #include "core/input/input.h" #include "core/io/config_file.h" #include "core/io/file_access.h" +#include "core/io/image.h" #include "core/io/resource_loader.h" #include "core/io/resource_saver.h" #include "core/object/class_db.h" @@ -3378,6 +3379,8 @@ void EditorNode::unload_editor_addons() { remove_editor_plugin(E.value, false); memdelete(E.value); } + + addon_name_to_plugin.clear(); } void EditorNode::_discard_changes(const String &p_str) { @@ -4672,6 +4675,11 @@ void EditorNode::stop_child_process(OS::ProcessID p_pid) { Ref<Script> EditorNode::get_object_custom_type_base(const Object *p_object) const { ERR_FAIL_NULL_V(p_object, nullptr); + const Node *node = Object::cast_to<const Node>(p_object); + if (node && node->has_meta(SceneStringName(_custom_type_script))) { + return node->get_meta(SceneStringName(_custom_type_script)); + } + Ref<Script> scr = p_object->get_script(); if (scr.is_valid()) { @@ -4771,7 +4779,13 @@ Ref<Texture2D> EditorNode::_get_class_or_script_icon(const String &p_class, cons // Look for the native base type in the editor theme. This is relevant for // scripts extending other scripts and for built-in classes. String script_class_name = p_script->get_language()->get_global_class_name(p_script->get_path()); - String base_type = ScriptServer::get_global_class_native_base(script_class_name); + String base_type; + if (script_class_name.is_empty()) { + base_type = p_script->get_instance_base_type(); + } else { + base_type = ScriptServer::get_global_class_native_base(script_class_name); + } + if (theme.is_valid() && theme->has_icon(base_type, EditorStringName(EditorIcons))) { return theme->get_icon(base_type, EditorStringName(EditorIcons)); } @@ -4836,6 +4850,8 @@ Ref<Texture2D> EditorNode::get_class_icon(const String &p_class, const String &p Ref<Script> scr; if (ScriptServer::is_global_class(p_class)) { scr = EditorNode::get_editor_data().script_class_load_script(p_class); + } else if (ResourceLoader::exists(p_class)) { // If the script is not a class_name we check if the script resource exists. + scr = ResourceLoader::load(p_class); } return _get_class_or_script_icon(p_class, scr, p_fallback, true); @@ -6737,10 +6753,6 @@ EditorNode::EditorNode() { ED_SHORTCUT("editor/group_selected_nodes", TTR("Group Selected Node(s)"), KeyModifierMask::CMD_OR_CTRL | Key::G); ED_SHORTCUT("editor/ungroup_selected_nodes", TTR("Ungroup Selected Node(s)"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::SHIFT | Key::G); - // Used in the GPUParticles/CPUParticles 2D/3D editor plugins. - // The shortcut is Ctrl + R even on macOS, as Cmd + R is used to run the current scene on macOS. - ED_SHORTCUT("particles/restart_emission", TTR("Restart Emission"), KeyModifierMask::CTRL | Key::R); - FileAccess::set_backup_save(EDITOR_GET("filesystem/on_save/safe_save_on_backup_then_rename")); _update_vsync_mode(); diff --git a/editor/editor_properties.cpp b/editor/editor_properties.cpp index c5a35e466c..2e2a18b9d6 100644 --- a/editor/editor_properties.cpp +++ b/editor/editor_properties.cpp @@ -3221,6 +3221,7 @@ void EditorPropertyResource::setup(Object *p_object, const String &p_path, const } resource_picker->set_base_type(p_base_type); + resource_picker->set_resource_owner(p_object); resource_picker->set_editable(true); resource_picker->set_h_size_flags(SIZE_EXPAND_FILL); add_child(resource_picker); diff --git a/editor/editor_resource_picker.cpp b/editor/editor_resource_picker.cpp index 0f0287718c..025d019f45 100644 --- a/editor/editor_resource_picker.cpp +++ b/editor/editor_resource_picker.cpp @@ -224,7 +224,9 @@ void EditorResourcePicker::_update_menu_items() { } if (is_editable()) { - edit_menu->add_icon_item(get_editor_theme_icon(SNAME("Clear")), TTR("Clear"), OBJ_MENU_CLEAR); + if (!_is_custom_type_script()) { + edit_menu->add_icon_item(get_editor_theme_icon(SNAME("Clear")), TTR("Clear"), OBJ_MENU_CLEAR); + } edit_menu->add_icon_item(get_editor_theme_icon(SNAME("Duplicate")), TTR("Make Unique"), OBJ_MENU_MAKE_UNIQUE); // Check whether the resource has subresources. @@ -694,6 +696,16 @@ bool EditorResourcePicker::_is_type_valid(const String &p_type_name, const HashS return false; } +bool EditorResourcePicker::_is_custom_type_script() const { + Ref<Script> resource_as_script = edited_resource; + + if (resource_as_script.is_valid() && resource_owner && resource_owner->has_meta(SceneStringName(_custom_type_script))) { + return true; + } + + return false; +} + Variant EditorResourcePicker::get_drag_data_fw(const Point2 &p_point, Control *p_from) { if (edited_resource.is_valid()) { Dictionary drag_data = EditorNode::get_singleton()->drag_resource(edited_resource, p_from); @@ -953,6 +965,10 @@ bool EditorResourcePicker::is_toggle_pressed() const { return assign_button->is_pressed(); } +void EditorResourcePicker::set_resource_owner(Object *p_object) { + resource_owner = p_object; +} + void EditorResourcePicker::set_editable(bool p_editable) { editable = p_editable; assign_button->set_disabled(!editable && !edited_resource.is_valid()); @@ -1098,7 +1114,10 @@ void EditorScriptPicker::set_create_options(Object *p_menu_node) { return; } - menu_node->add_icon_item(get_editor_theme_icon(SNAME("ScriptCreate")), TTR("New Script..."), OBJ_MENU_NEW_SCRIPT); + if (!(script_owner && script_owner->has_meta(SceneStringName(_custom_type_script)))) { + menu_node->add_icon_item(get_editor_theme_icon(SNAME("ScriptCreate")), TTR("New Script..."), OBJ_MENU_NEW_SCRIPT); + } + if (script_owner) { Ref<Script> scr = script_owner->get_script(); if (scr.is_valid()) { diff --git a/editor/editor_resource_picker.h b/editor/editor_resource_picker.h index 0a32dea3ed..8fb774a2cb 100644 --- a/editor/editor_resource_picker.h +++ b/editor/editor_resource_picker.h @@ -81,6 +81,8 @@ class EditorResourcePicker : public HBoxContainer { CONVERT_BASE_ID = 1000, }; + Object *resource_owner = nullptr; + PopupMenu *edit_menu = nullptr; void _update_resource_preview(const String &p_path, const Ref<Texture2D> &p_preview, const Ref<Texture2D> &p_small_preview, ObjectID p_obj); @@ -102,6 +104,7 @@ class EditorResourcePicker : public HBoxContainer { void _ensure_allowed_types() const; bool _is_drop_valid(const Dictionary &p_drag_data) const; bool _is_type_valid(const String &p_type_name, const HashSet<StringName> &p_allowed_types) const; + bool _is_custom_type_script() const; Variant get_drag_data_fw(const Point2 &p_point, Control *p_from); bool can_drop_data_fw(const Point2 &p_point, const Variant &p_data, Control *p_from) const; @@ -137,6 +140,8 @@ public: void set_toggle_pressed(bool p_pressed); bool is_toggle_pressed() const; + void set_resource_owner(Object *p_object); + void set_editable(bool p_editable); bool is_editable() const; diff --git a/editor/event_listener_line_edit.cpp b/editor/event_listener_line_edit.cpp index a6b30233fc..8fde728027 100644 --- a/editor/event_listener_line_edit.cpp +++ b/editor/event_listener_line_edit.cpp @@ -121,7 +121,7 @@ String EventListenerLineEdit::get_event_text(const Ref<InputEvent> &p_event, boo } String EventListenerLineEdit::get_device_string(int p_device) { - if (p_device == InputMap::ALL_DEVICES) { + if (p_device == InputEvent::DEVICE_ID_ALL_DEVICES) { return TTR("All Devices"); } return TTR("Device") + " " + itos(p_device); diff --git a/editor/export/editor_export_platform.cpp b/editor/export/editor_export_platform.cpp index 58737c53ed..50fa49dc52 100644 --- a/editor/export/editor_export_platform.cpp +++ b/editor/export/editor_export_platform.cpp @@ -326,11 +326,7 @@ Error EditorExportPlatform::_save_zip_patch_file(void *p_userdata, const String Ref<ImageTexture> EditorExportPlatform::get_option_icon(int p_index) const { Ref<Theme> theme = EditorNode::get_singleton()->get_editor_theme(); ERR_FAIL_COND_V(theme.is_null(), Ref<ImageTexture>()); - if (EditorNode::get_singleton()->get_gui_base()->is_layout_rtl()) { - return theme->get_icon(SNAME("PlayBackwards"), EditorStringName(EditorIcons)); - } else { - return theme->get_icon(SNAME("Play"), EditorStringName(EditorIcons)); - } + return theme->get_icon(SNAME("Play"), EditorStringName(EditorIcons)); } String EditorExportPlatform::find_export_template(const String &template_file_name, String *err) const { diff --git a/editor/filesystem_dock.cpp b/editor/filesystem_dock.cpp index 53982b37b9..afe191eb3e 100644 --- a/editor/filesystem_dock.cpp +++ b/editor/filesystem_dock.cpp @@ -3431,7 +3431,7 @@ void FileSystemDock::_file_and_folders_fill_popup(PopupMenu *p_popup, const Vect const bool is_directory = fpath.ends_with("/"); p_popup->add_icon_shortcut(get_editor_theme_icon(SNAME("Terminal")), ED_GET_SHORTCUT("filesystem_dock/open_in_terminal"), FILE_OPEN_IN_TERMINAL); - p_popup->set_item_text(p_popup->get_item_index(FILE_OPEN_IN_TERMINAL), is_directory ? TTR("Open in Terminal") : TTR("Open Containing Folder in Terminal")); + p_popup->set_item_text(p_popup->get_item_index(FILE_OPEN_IN_TERMINAL), is_directory ? TTR("Open in Terminal") : TTR("Open Folder in Terminal")); if (!is_directory) { p_popup->add_icon_shortcut(get_editor_theme_icon(SNAME("ExternalLink")), ED_GET_SHORTCUT("filesystem_dock/open_in_external_program"), FILE_OPEN_EXTERNAL); @@ -4060,17 +4060,17 @@ FileSystemDock::FileSystemDock() { // `KeyModifierMask::CMD_OR_CTRL | Key::C` conflicts with other editor shortcuts. ED_SHORTCUT("filesystem_dock/copy_path", TTR("Copy Path"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::SHIFT | Key::C); - ED_SHORTCUT("filesystem_dock/copy_absolute_path", TTR("Copy Absolute Path")); - ED_SHORTCUT("filesystem_dock/copy_uid", TTR("Copy UID")); + ED_SHORTCUT("filesystem_dock/copy_absolute_path", TTR("Copy Absolute Path"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::ALT | Key::C); + ED_SHORTCUT("filesystem_dock/copy_uid", TTR("Copy UID"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::ALT | KeyModifierMask::SHIFT | Key::C); ED_SHORTCUT("filesystem_dock/duplicate", TTR("Duplicate..."), KeyModifierMask::CMD_OR_CTRL | Key::D); ED_SHORTCUT("filesystem_dock/delete", TTR("Delete"), Key::KEY_DELETE); ED_SHORTCUT("filesystem_dock/rename", TTR("Rename..."), Key::F2); ED_SHORTCUT_OVERRIDE("filesystem_dock/rename", "macos", Key::ENTER); #if !defined(ANDROID_ENABLED) && !defined(WEB_ENABLED) // Opening the system file manager or opening in an external program is not supported on the Android and web editors. - ED_SHORTCUT("filesystem_dock/show_in_explorer", TTR("Open in File Manager")); - ED_SHORTCUT("filesystem_dock/open_in_external_program", TTR("Open in External Program")); - ED_SHORTCUT("filesystem_dock/open_in_terminal", TTR("Open in Terminal")); + ED_SHORTCUT("filesystem_dock/show_in_explorer", TTR("Open in File Manager"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::ALT | Key::R); + ED_SHORTCUT("filesystem_dock/open_in_external_program", TTR("Open in External Program"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::ALT | Key::E); + ED_SHORTCUT("filesystem_dock/open_in_terminal", TTR("Open in Terminal"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::ALT | Key::T); #endif // Properly translating color names would require a separate HashMap, so for simplicity they are provided as comments. diff --git a/editor/gui/editor_file_dialog.cpp b/editor/gui/editor_file_dialog.cpp index 77d0ba7a60..7600748685 100644 --- a/editor/gui/editor_file_dialog.cpp +++ b/editor/gui/editor_file_dialog.cpp @@ -1355,6 +1355,13 @@ EditorFileDialog::Access EditorFileDialog::get_access() const { void EditorFileDialog::_make_dir_confirm() { const String stripped_dirname = makedirname->get_text().strip_edges(); + if (stripped_dirname.is_empty()) { + error_dialog->set_text(TTR("The path specified is invalid.")); + error_dialog->popup_centered(Size2(250, 50) * EDSCALE); + makedirname->set_text(""); // Reset label. + return; + } + if (dir_access->dir_exists(stripped_dirname)) { error_dialog->set_text(TTR("Could not create folder. File with that name already exists.")); error_dialog->popup_centered(Size2(250, 50) * EDSCALE); @@ -1638,6 +1645,7 @@ void EditorFileDialog::_update_favorites() { for (int i = 0; i < favorited_paths.size(); i++) { favorites->add_item(favorited_names[i], theme_cache.folder); + favorites->set_item_tooltip(-1, favorited_paths[i]); favorites->set_item_metadata(-1, favorited_paths[i]); favorites->set_item_icon_modulate(-1, get_dir_icon_color(favorited_paths[i])); @@ -1719,6 +1727,7 @@ void EditorFileDialog::_update_recent() { for (int i = 0; i < recentd_paths.size(); i++) { recent->add_item(recentd_names[i], theme_cache.folder); + recent->set_item_tooltip(-1, recentd_paths[i]); recent->set_item_metadata(-1, recentd_paths[i]); recent->set_item_icon_modulate(-1, get_dir_icon_color(recentd_paths[i])); } diff --git a/editor/gui/editor_object_selector.cpp b/editor/gui/editor_object_selector.cpp index b73cd3b65c..ec5caa8c2a 100644 --- a/editor/gui/editor_object_selector.cpp +++ b/editor/gui/editor_object_selector.cpp @@ -30,6 +30,7 @@ #include "editor_object_selector.h" +#include "editor/debugger/editor_debugger_inspector.h" #include "editor/editor_data.h" #include "editor/editor_node.h" #include "editor/editor_string_names.h" @@ -131,6 +132,19 @@ void EditorObjectSelector::update_path() { Ref<Texture2D> obj_icon; if (Object::cast_to<MultiNodeEdit>(obj)) { obj_icon = EditorNode::get_singleton()->get_class_icon(Object::cast_to<MultiNodeEdit>(obj)->get_edited_class_name()); + } else if (Object::cast_to<EditorDebuggerRemoteObject>(obj)) { + String class_name; + Ref<Script> base_script = obj->get_script(); + if (base_script.is_valid()) { + class_name = base_script->get_global_name(); + + if (class_name.is_empty()) { + // If there is no class_name in this script we just take the script path. + class_name = base_script->get_path(); + } + } + + obj_icon = EditorNode::get_singleton()->get_class_icon(class_name.is_empty() ? Object::cast_to<EditorDebuggerRemoteObject>(obj)->type_name : class_name); } else { obj_icon = EditorNode::get_singleton()->get_object_icon(obj); } diff --git a/editor/gui/editor_spin_slider.cpp b/editor/gui/editor_spin_slider.cpp index a073a2338b..27b6bbafb7 100644 --- a/editor/gui/editor_spin_slider.cpp +++ b/editor/gui/editor_spin_slider.cpp @@ -37,10 +37,6 @@ #include "editor/themes/editor_scale.h" #include "scene/theme/theme_db.h" -bool EditorSpinSlider::is_text_field() const { - return true; -} - String EditorSpinSlider::get_tooltip(const Point2 &p_pos) const { if (!read_only && grabber->is_visible()) { Key key = (OS::get_singleton()->has_feature("macos") || OS::get_singleton()->has_feature("web_macos") || OS::get_singleton()->has_feature("web_ios")) ? Key::META : Key::CTRL; diff --git a/editor/gui/editor_spin_slider.h b/editor/gui/editor_spin_slider.h index 2476c2f71b..dfc50878dd 100644 --- a/editor/gui/editor_spin_slider.h +++ b/editor/gui/editor_spin_slider.h @@ -101,8 +101,6 @@ protected: void _focus_entered(); public: - virtual bool is_text_field() const override; - String get_tooltip(const Point2 &p_pos) const override; String get_text_value() const; diff --git a/editor/icons/GuiToggleOffMirrored.svg b/editor/icons/GuiToggleOffMirrored.svg index 4e7f63d573..138a856a1d 100644 --- a/editor/icons/GuiToggleOffMirrored.svg +++ b/editor/icons/GuiToggleOffMirrored.svg @@ -1 +1 @@ -<svg xmlns="http://www.w3.org/2000/svg" width="38" height="16"><rect width="36" height="14" x="1" y="1" fill="gray" rx="7"/><circle cx="8" cy="8" r="5" fill="#b3b3b3"/></svg>
\ No newline at end of file +<svg xmlns="http://www.w3.org/2000/svg" width="38" height="16"><g fill="#e0e0e0"><rect width="36" height="14" x="1" y="1" fill-opacity=".2" rx="7"/><circle cx="30" cy="8" r="5"/></g></svg>
\ No newline at end of file diff --git a/editor/import/3d/resource_importer_obj.cpp b/editor/import/3d/resource_importer_obj.cpp index a579224ecd..5cec366d69 100644 --- a/editor/import/3d/resource_importer_obj.cpp +++ b/editor/import/3d/resource_importer_obj.cpp @@ -202,12 +202,12 @@ static Error _parse_material_library(const String &p_path, HashMap<String, Ref<S return OK; } -static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, bool p_single_mesh, bool p_generate_tangents, Vector3 p_scale_mesh, Vector3 p_offset_mesh, bool p_disable_compression, List<String> *r_missing_deps) { +static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, bool p_single_mesh, bool p_generate_tangents, bool p_generate_lods, bool p_generate_shadow_mesh, bool p_generate_lightmap_uv2, float p_generate_lightmap_uv2_texel_size, const PackedByteArray &p_src_lightmap_cache, Vector3 p_scale_mesh, Vector3 p_offset_mesh, bool p_disable_compression, Vector<Vector<uint8_t>> &r_lightmap_caches, List<String> *r_missing_deps) { Ref<FileAccess> f = FileAccess::open(p_path, FileAccess::READ); ERR_FAIL_COND_V_MSG(f.is_null(), ERR_CANT_OPEN, vformat("Couldn't open OBJ file '%s', it may not exist or not be readable.", p_path)); - // Avoid trying to load/interpret potential build artifacts from Visual Studio (e.g. when compiling native plugins inside the project tree) - // This should only match, if it's indeed a COFF file header + // Avoid trying to load/interpret potential build artifacts from Visual Studio (e.g. when compiling native plugins inside the project tree). + // This should only match if it's indeed a COFF file header. // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types const int first_bytes = f->get_16(); static const Vector<int> coff_header_machines{ @@ -445,6 +445,7 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, } mesh->add_surface(Mesh::PRIMITIVE_TRIANGLES, array, TypedArray<Array>(), Dictionary(), material, name, mesh_flags); + print_verbose("OBJ: Added surface :" + mesh->get_surface_name(mesh->get_surface_count() - 1)); if (!current_material.is_empty()) { @@ -508,6 +509,43 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, } } + if (p_generate_lightmap_uv2) { + Vector<uint8_t> lightmap_cache; + mesh->lightmap_unwrap_cached(Transform3D(), p_generate_lightmap_uv2_texel_size, p_src_lightmap_cache, lightmap_cache); + + if (!lightmap_cache.is_empty()) { + if (r_lightmap_caches.is_empty()) { + r_lightmap_caches.push_back(lightmap_cache); + } else { + // MD5 is stored at the beginning of the cache data. + const String new_md5 = String::md5(lightmap_cache.ptr()); + + for (int i = 0; i < r_lightmap_caches.size(); i++) { + const String md5 = String::md5(r_lightmap_caches[i].ptr()); + if (new_md5 < md5) { + r_lightmap_caches.insert(i, lightmap_cache); + break; + } + + if (new_md5 == md5) { + break; + } + } + } + } + } + + mesh->optimize_indices_for_cache(); + + if (p_generate_lods) { + // Use normal merge/split angles that match the defaults used for 3D scene importing. + mesh->generate_lods(60.0f, 25.0f, {}); + } + + if (p_generate_shadow_mesh) { + mesh->create_shadow_mesh(); + } + if (p_single_mesh && mesh->get_surface_count() > 0) { r_meshes.push_back(mesh); } @@ -518,7 +556,10 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, Node *EditorOBJImporter::import_scene(const String &p_path, uint32_t p_flags, const HashMap<StringName, Variant> &p_options, List<String> *r_missing_deps, Error *r_err) { List<Ref<ImporterMesh>> meshes; - Error err = _parse_obj(p_path, meshes, false, p_flags & IMPORT_GENERATE_TANGENT_ARRAYS, Vector3(1, 1, 1), Vector3(0, 0, 0), p_flags & IMPORT_FORCE_DISABLE_MESH_COMPRESSION, r_missing_deps); + // LOD, shadow mesh and lightmap UV2 generation are handled by ResourceImporterScene in this case, + // so disable it within the OBJ mesh import. + Vector<Vector<uint8_t>> mesh_lightmap_caches; + Error err = _parse_obj(p_path, meshes, false, p_flags & IMPORT_GENERATE_TANGENT_ARRAYS, false, false, false, 0.2, PackedByteArray(), Vector3(1, 1, 1), Vector3(0, 0, 0), p_flags & IMPORT_FORCE_DISABLE_MESH_COMPRESSION, mesh_lightmap_caches, r_missing_deps); if (err != OK) { if (r_err) { @@ -587,19 +628,51 @@ String ResourceImporterOBJ::get_preset_name(int p_idx) const { void ResourceImporterOBJ::get_import_options(const String &p_path, List<ImportOption> *r_options, int p_preset) const { r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_tangents"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_lods"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_shadow_mesh"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_lightmap_uv2", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), false)); + r_options->push_back(ImportOption(PropertyInfo(Variant::FLOAT, "generate_lightmap_uv2_texel_size", PROPERTY_HINT_RANGE, "0.001,100,0.001"), 0.2)); r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "scale_mesh"), Vector3(1, 1, 1))); r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "offset_mesh"), Vector3(0, 0, 0))); r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "force_disable_mesh_compression"), false)); } bool ResourceImporterOBJ::get_option_visibility(const String &p_path, const String &p_option, const HashMap<StringName, Variant> &p_options) const { + if (p_option == "generate_lightmap_uv2_texel_size" && !p_options["generate_lightmap_uv2"]) { + // Only display the lightmap texel size import option when lightmap UV2 generation is enabled. + return false; + } + return true; } Error ResourceImporterOBJ::import(const String &p_source_file, const String &p_save_path, const HashMap<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) { List<Ref<ImporterMesh>> meshes; - Error err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], p_options["scale_mesh"], p_options["offset_mesh"], p_options["force_disable_mesh_compression"], nullptr); + Vector<uint8_t> src_lightmap_cache; + Vector<Vector<uint8_t>> mesh_lightmap_caches; + + Error err; + { + src_lightmap_cache = FileAccess::get_file_as_bytes(p_source_file + ".unwrap_cache", &err); + if (err != OK) { + src_lightmap_cache.clear(); + } + } + + err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], p_options["generate_lods"], p_options["generate_shadow_mesh"], p_options["generate_lightmap_uv2"], p_options["generate_lightmap_uv2_texel_size"], src_lightmap_cache, p_options["scale_mesh"], p_options["offset_mesh"], p_options["force_disable_mesh_compression"], mesh_lightmap_caches, nullptr); + + if (mesh_lightmap_caches.size()) { + Ref<FileAccess> f = FileAccess::open(p_source_file + ".unwrap_cache", FileAccess::WRITE); + if (f.is_valid()) { + f->store_32(mesh_lightmap_caches.size()); + for (int i = 0; i < mesh_lightmap_caches.size(); i++) { + String md5 = String::md5(mesh_lightmap_caches[i].ptr()); + f->store_buffer(mesh_lightmap_caches[i].ptr(), mesh_lightmap_caches[i].size()); + } + } + } + err = OK; ERR_FAIL_COND_V(err != OK, err); ERR_FAIL_COND_V(meshes.size() != 1, ERR_BUG); diff --git a/editor/import/dynamic_font_import_settings.cpp b/editor/import/dynamic_font_import_settings.cpp index c526ca0b0c..e124697b20 100644 --- a/editor/import/dynamic_font_import_settings.cpp +++ b/editor/import/dynamic_font_import_settings.cpp @@ -96,7 +96,7 @@ struct UniRange { }; // Unicode Character Blocks -// Source: https://www.unicode.org/Public/14.0.0/ucd/Blocks.txt +// Source: https://www.unicode.org/Public/16.0.0/ucd/Blocks.txt static UniRange unicode_ranges[] = { { 0x0000, 0x007F, U"Basic Latin" }, { 0x0080, 0x00FF, U"Latin-1 Supplement" }, @@ -283,6 +283,7 @@ static UniRange unicode_ranges[] = { { 0x10500, 0x1052F, U"Elbasan" }, { 0x10530, 0x1056F, U"Caucasian Albanian" }, { 0x10570, 0x105BF, U"Vithkuqi" }, + { 0x105C0, 0x105FF, U"Todhri" }, { 0x10600, 0x1077F, U"Linear A" }, { 0x10780, 0x107BF, U"Latin Extended-F" }, { 0x10800, 0x1083F, U"Cypriot Syllabary" }, @@ -305,6 +306,7 @@ static UniRange unicode_ranges[] = { { 0x10C00, 0x10C4F, U"Old Turkic" }, { 0x10C80, 0x10CFF, U"Old Hungarian" }, { 0x10D00, 0x10D3F, U"Hanifi Rohingya" }, + { 0x10D40, 0x10D8F, U"Garay" }, { 0x10E60, 0x10E7F, U"Rumi Numeral Symbols" }, { 0x10E80, 0x10EBF, U"Yezidi" }, { 0x10EC0, 0x10EFF, U"Arabic Extended-C" }, @@ -324,12 +326,14 @@ static UniRange unicode_ranges[] = { { 0x11280, 0x112AF, U"Multani" }, { 0x112B0, 0x112FF, U"Khudawadi" }, { 0x11300, 0x1137F, U"Grantha" }, + { 0x11380, 0x113FF, U"Tulu-Tigalari" }, { 0x11400, 0x1147F, U"Newa" }, { 0x11480, 0x114DF, U"Tirhuta" }, { 0x11580, 0x115FF, U"Siddham" }, { 0x11600, 0x1165F, U"Modi" }, { 0x11660, 0x1167F, U"Mongolian Supplement" }, { 0x11680, 0x116CF, U"Takri" }, + { 0x116D0, 0x116FF, U"Myanmar Extended-C" }, { 0x11700, 0x1174F, U"Ahom" }, { 0x11800, 0x1184F, U"Dogra" }, { 0x118A0, 0x118FF, U"Warang Citi" }, @@ -340,6 +344,7 @@ static UniRange unicode_ranges[] = { { 0x11AB0, 0x11ABF, U"Unified Canadian Aboriginal Syllabics Extended-A" }, { 0x11AC0, 0x11AFF, U"Pau Cin Hau" }, { 0x11B00, 0x11B5F, U"Devanagari Extended-A" }, + { 0x11BC0, 0x11BFF, U"Sunuwar" }, { 0x11C00, 0x11C6F, U"Bhaiksuki" }, { 0x11C70, 0x11CBF, U"Marchen" }, { 0x11D00, 0x11D5F, U"Masaram Gondi" }, @@ -354,12 +359,15 @@ static UniRange unicode_ranges[] = { { 0x12F90, 0x12FFF, U"Cypro-Minoan" }, { 0x13000, 0x1342F, U"Egyptian Hieroglyphs" }, { 0x13430, 0x1343F, U"Egyptian Hieroglyph Format Controls" }, + { 0x13460, 0x143FF, U"Egyptian Hieroglyphs Extended-A" }, { 0x14400, 0x1467F, U"Anatolian Hieroglyphs" }, + { 0x16100, 0x1613F, U"Gurung Khema" }, { 0x16800, 0x16A3F, U"Bamum Supplement" }, { 0x16A40, 0x16A6F, U"Mro" }, { 0x16A70, 0x16ACF, U"Tangsa" }, { 0x16AD0, 0x16AFF, U"Bassa Vah" }, { 0x16B00, 0x16B8F, U"Pahawh Hmong" }, + { 0x16D40, 0x16D7F, U"Kirat Rai" }, { 0x16E40, 0x16E9F, U"Medefaidrin" }, { 0x16F00, 0x16F9F, U"Miao" }, { 0x16FE0, 0x16FFF, U"Ideographic Symbols and Punctuation" }, @@ -374,6 +382,7 @@ static UniRange unicode_ranges[] = { { 0x1B170, 0x1B2FF, U"Nushu" }, { 0x1BC00, 0x1BC9F, U"Duployan" }, { 0x1BCA0, 0x1BCAF, U"Shorthand Format Controls" }, + { 0x1CC00, 0x1CEBF, U"Symbols for Legacy Computing Supplement" }, { 0x1CF00, 0x1CFCF, U"Znamenny Musical Notation" }, { 0x1D000, 0x1D0FF, U"Byzantine Musical Symbols" }, { 0x1D100, 0x1D1FF, U"Musical Symbols" }, @@ -391,6 +400,7 @@ static UniRange unicode_ranges[] = { { 0x1E290, 0x1E2BF, U"Toto" }, { 0x1E2C0, 0x1E2FF, U"Wancho" }, { 0x1E4D0, 0x1E4FF, U"Nag Mundari" }, + { 0x1E5D0, 0x1E5FF, U"Ol Onal" }, { 0x1E7E0, 0x1E7FF, U"Ethiopic Extended-B" }, { 0x1E800, 0x1E8DF, U"Mende Kikakui" }, { 0x1E900, 0x1E95F, U"Adlam" }, @@ -418,6 +428,7 @@ static UniRange unicode_ranges[] = { { 0x2B740, 0x2B81F, U"CJK Unified Ideographs Extension D" }, { 0x2B820, 0x2CEAF, U"CJK Unified Ideographs Extension E" }, { 0x2CEB0, 0x2EBEF, U"CJK Unified Ideographs Extension F" }, + { 0x2EBF0, 0x2EE5F, U"CJK Unified Ideographs Extension I" }, { 0x2F800, 0x2FA1F, U"CJK Compatibility Ideographs Supplement" }, { 0x30000, 0x3134F, U"CJK Unified Ideographs Extension G" }, { 0x31350, 0x323AF, U"CJK Unified Ideographs Extension H" }, diff --git a/editor/import/resource_importer_layered_texture.cpp b/editor/import/resource_importer_layered_texture.cpp index 72d715ac2d..312195fcd7 100644 --- a/editor/import/resource_importer_layered_texture.cpp +++ b/editor/import/resource_importer_layered_texture.cpp @@ -276,6 +276,10 @@ void ResourceImporterLayeredTexture::_save_tex(Vector<Ref<Image>> p_images, cons f->store_32(0); f->store_32(0); + if ((p_compress_mode == COMPRESS_LOSSLESS || p_compress_mode == COMPRESS_LOSSY) && p_images[0]->get_format() >= Image::FORMAT_RF) { + p_compress_mode = COMPRESS_VRAM_UNCOMPRESSED; // These can't go as lossy. + } + for (int i = 0; i < p_images.size(); i++) { ResourceImporterTexture::save_to_ctex_format(f, p_images[i], ResourceImporterTexture::CompressMode(p_compress_mode), used_channels, p_vram_compression, p_lossy); } @@ -335,11 +339,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const return err; } - if (compress_mode == COMPRESS_BASIS_UNIVERSAL && image->get_format() >= Image::FORMAT_RF) { - //basis universal does not support float formats, fall back - compress_mode = COMPRESS_VRAM_COMPRESSED; - } - if (compress_mode == COMPRESS_VRAM_COMPRESSED) { //if using video ram, optimize if (channel_pack == 0) { diff --git a/editor/import/resource_importer_texture.cpp b/editor/import/resource_importer_texture.cpp index a205123df1..24a14c60ad 100644 --- a/editor/import/resource_importer_texture.cpp +++ b/editor/import/resource_importer_texture.cpp @@ -380,7 +380,7 @@ void ResourceImporterTexture::_save_ctex(const Ref<Image> &p_image, const String f->store_32(0); f->store_32(0); - if ((p_compress_mode == COMPRESS_LOSSLESS || p_compress_mode == COMPRESS_LOSSY) && p_image->get_format() > Image::FORMAT_RGBA8) { + if ((p_compress_mode == COMPRESS_LOSSLESS || p_compress_mode == COMPRESS_LOSSY) && p_image->get_format() >= Image::FORMAT_RF) { p_compress_mode = COMPRESS_VRAM_UNCOMPRESSED; //these can't go as lossy } @@ -593,11 +593,6 @@ Error ResourceImporterTexture::import(const String &p_source_file, const String } } - if (compress_mode == COMPRESS_BASIS_UNIVERSAL && image->get_format() >= Image::FORMAT_RF) { - // Basis universal does not support float formats, fallback. - compress_mode = COMPRESS_VRAM_COMPRESSED; - } - bool detect_3d = int(p_options["detect_3d/compress_to"]) > 0; bool detect_roughness = roughness == 0; bool detect_normal = normal == 0; diff --git a/editor/import_dock.cpp b/editor/import_dock.cpp index 14065abf73..6c22a965ae 100644 --- a/editor/import_dock.cpp +++ b/editor/import_dock.cpp @@ -790,23 +790,14 @@ ImportDock::ImportDock() { import->set_text(TTR("Reimport")); import->set_disabled(true); import->connect(SceneStringName(pressed), callable_mp(this, &ImportDock::_reimport_pressed)); - if (!DisplayServer::get_singleton()->get_swap_cancel_ok()) { - advanced_spacer = hb->add_spacer(); - advanced = memnew(Button); - advanced->set_text(TTR("Advanced...")); - hb->add_child(advanced); - } + advanced_spacer = hb->add_spacer(); + advanced = memnew(Button); + advanced->set_text(TTR("Advanced...")); + hb->add_child(advanced); hb->add_spacer(); hb->add_child(import); hb->add_spacer(); - if (DisplayServer::get_singleton()->get_swap_cancel_ok()) { - advanced = memnew(Button); - advanced->set_text(TTR("Advanced...")); - hb->add_child(advanced); - advanced_spacer = hb->add_spacer(); - } - advanced->hide(); advanced_spacer->hide(); advanced->connect(SceneStringName(pressed), callable_mp(this, &ImportDock::_advanced_options)); diff --git a/editor/input_event_configuration_dialog.cpp b/editor/input_event_configuration_dialog.cpp index c60197b96b..a2aeeb11bd 100644 --- a/editor/input_event_configuration_dialog.cpp +++ b/editor/input_event_configuration_dialog.cpp @@ -551,18 +551,18 @@ void InputEventConfigurationDialog::_input_list_item_selected() { } void InputEventConfigurationDialog::_device_selection_changed(int p_option_button_index) { - // Subtract 1 as option index 0 corresponds to "All Devices" (value of -1) - // and option index 1 corresponds to device 0, etc... - event->set_device(p_option_button_index - 1); + // Option index 0 corresponds to "All Devices" (value of -3). + // Otherwise subtract 1 as option index 1 corresponds to device 0, etc... + event->set_device(p_option_button_index == 0 ? InputEvent::DEVICE_ID_ALL_DEVICES : p_option_button_index - 1); event_as_text->set_text(EventListenerLineEdit::get_event_text(event, true)); } void InputEventConfigurationDialog::_set_current_device(int p_device) { - device_id_option->select(p_device + 1); + device_id_option->select(p_device == InputEvent::DEVICE_ID_ALL_DEVICES ? 0 : p_device + 1); } int InputEventConfigurationDialog::_get_current_device() const { - return device_id_option->get_selected() - 1; + return device_id_option->get_selected() == 0 ? InputEvent::DEVICE_ID_ALL_DEVICES : device_id_option->get_selected() - 1; } void InputEventConfigurationDialog::_notification(int p_what) { @@ -705,11 +705,12 @@ InputEventConfigurationDialog::InputEventConfigurationDialog() { device_id_option = memnew(OptionButton); device_id_option->set_h_size_flags(Control::SIZE_EXPAND_FILL); - for (int i = -1; i < 8; i++) { + device_id_option->add_item(EventListenerLineEdit::get_device_string(InputEvent::DEVICE_ID_ALL_DEVICES)); + for (int i = 0; i < 8; i++) { device_id_option->add_item(EventListenerLineEdit::get_device_string(i)); } device_id_option->connect(SceneStringName(item_selected), callable_mp(this, &InputEventConfigurationDialog::_device_selection_changed)); - _set_current_device(InputMap::ALL_DEVICES); + _set_current_device(InputEvent::DEVICE_ID_ALL_DEVICES); device_container->add_child(device_id_option); device_container->hide(); diff --git a/editor/plugins/animation_library_editor.cpp b/editor/plugins/animation_library_editor.cpp index 38f8b16b34..147b92c094 100644 --- a/editor/plugins/animation_library_editor.cpp +++ b/editor/plugins/animation_library_editor.cpp @@ -598,7 +598,7 @@ void AnimationLibraryEditor::_button_pressed(TreeItem *p_item, int p_column, int file_popup->add_separator(); file_popup->add_item(TTR("Open in Inspector"), FILE_MENU_EDIT_LIBRARY); Rect2 pos = tree->get_item_rect(p_item, 1, 0); - Vector2 popup_pos = tree->get_screen_transform().xform(pos.position + Vector2(0, pos.size.height)); + Vector2 popup_pos = tree->get_screen_transform().xform(pos.position + Vector2(0, pos.size.height)) - tree->get_scroll(); file_popup->popup(Rect2(popup_pos, Size2())); file_dialog_animation = StringName(); @@ -638,7 +638,7 @@ void AnimationLibraryEditor::_button_pressed(TreeItem *p_item, int p_column, int file_popup->add_separator(); file_popup->add_item(TTR("Open in Inspector"), FILE_MENU_EDIT_ANIMATION); Rect2 pos = tree->get_item_rect(p_item, 1, 0); - Vector2 popup_pos = tree->get_screen_transform().xform(pos.position + Vector2(0, pos.size.height)); + Vector2 popup_pos = tree->get_screen_transform().xform(pos.position + Vector2(0, pos.size.height)) - tree->get_scroll(); file_popup->popup(Rect2(popup_pos, Size2())); file_dialog_animation = anim_name; diff --git a/editor/plugins/animation_player_editor_plugin.cpp b/editor/plugins/animation_player_editor_plugin.cpp index e6afc85e9e..1581e7cc66 100644 --- a/editor/plugins/animation_player_editor_plugin.cpp +++ b/editor/plugins/animation_player_editor_plugin.cpp @@ -2006,30 +2006,34 @@ AnimationPlayerEditor::AnimationPlayerEditor(AnimationPlayerEditorPlugin *p_plug HBoxContainer *hb = memnew(HBoxContainer); add_child(hb); + HBoxContainer *playback_container = memnew(HBoxContainer); + playback_container->set_layout_direction(LAYOUT_DIRECTION_LTR); + hb->add_child(playback_container); + play_bw_from = memnew(Button); play_bw_from->set_theme_type_variation("FlatButton"); play_bw_from->set_tooltip_text(TTR("Play Animation Backwards")); - hb->add_child(play_bw_from); + playback_container->add_child(play_bw_from); play_bw = memnew(Button); play_bw->set_theme_type_variation("FlatButton"); play_bw->set_tooltip_text(TTR("Play Animation Backwards from End")); - hb->add_child(play_bw); + playback_container->add_child(play_bw); stop = memnew(Button); stop->set_theme_type_variation("FlatButton"); stop->set_tooltip_text(TTR("Pause/Stop Animation")); - hb->add_child(stop); + playback_container->add_child(stop); play = memnew(Button); play->set_theme_type_variation("FlatButton"); play->set_tooltip_text(TTR("Play Animation from Start")); - hb->add_child(play); + playback_container->add_child(play); play_from = memnew(Button); play_from->set_theme_type_variation("FlatButton"); play_from->set_tooltip_text(TTR("Play Animation")); - hb->add_child(play_from); + playback_container->add_child(play_from); frame = memnew(SpinBox); hb->add_child(frame); diff --git a/editor/plugins/control_editor_plugin.cpp b/editor/plugins/control_editor_plugin.cpp index 5c5f236ff3..24943af60f 100644 --- a/editor/plugins/control_editor_plugin.cpp +++ b/editor/plugins/control_editor_plugin.cpp @@ -31,7 +31,6 @@ #include "control_editor_plugin.h" #include "editor/editor_node.h" -#include "editor/editor_settings.h" #include "editor/editor_string_names.h" #include "editor/editor_undo_redo_manager.h" #include "editor/plugins/canvas_item_editor_plugin.h" @@ -511,6 +510,9 @@ void ControlEditorPopupButton::_notification(int p_what) { case NOTIFICATION_DRAW: { if (arrow_icon.is_valid()) { Vector2 arrow_pos = Point2(26, 0) * EDSCALE; + if (is_layout_rtl()) { + arrow_pos.x = get_size().x - arrow_pos.x - arrow_icon->get_width(); + } arrow_pos.y = get_size().y / 2 - arrow_icon->get_height() / 2; draw_texture(arrow_icon, arrow_pos); } @@ -534,7 +536,6 @@ ControlEditorPopupButton::ControlEditorPopupButton() { set_focus_mode(FOCUS_NONE); popup_panel = memnew(PopupPanel); - popup_panel->set_theme_type_variation("ControlEditorPopupPanel"); add_child(popup_panel); popup_panel->connect("about_to_popup", callable_mp(this, &ControlEditorPopupButton::_popup_visibility_changed).bind(true)); popup_panel->connect("popup_hide", callable_mp(this, &ControlEditorPopupButton::_popup_visibility_changed).bind(false)); diff --git a/editor/plugins/cpu_particles_2d_editor_plugin.cpp b/editor/plugins/cpu_particles_2d_editor_plugin.cpp deleted file mode 100644 index 4869a202d7..0000000000 --- a/editor/plugins/cpu_particles_2d_editor_plugin.cpp +++ /dev/null @@ -1,309 +0,0 @@ -/**************************************************************************/ -/* cpu_particles_2d_editor_plugin.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "cpu_particles_2d_editor_plugin.h" - -#include "canvas_item_editor_plugin.h" -#include "core/io/image_loader.h" -#include "editor/editor_node.h" -#include "editor/editor_settings.h" -#include "editor/editor_undo_redo_manager.h" -#include "editor/gui/editor_file_dialog.h" -#include "editor/scene_tree_dock.h" -#include "scene/2d/cpu_particles_2d.h" -#include "scene/2d/gpu_particles_2d.h" -#include "scene/gui/check_box.h" -#include "scene/gui/menu_button.h" -#include "scene/gui/option_button.h" -#include "scene/gui/separator.h" -#include "scene/gui/spin_box.h" -#include "scene/resources/particle_process_material.h" - -void CPUParticles2DEditorPlugin::edit(Object *p_object) { - particles = Object::cast_to<CPUParticles2D>(p_object); -} - -bool CPUParticles2DEditorPlugin::handles(Object *p_object) const { - return p_object->is_class("CPUParticles2D"); -} - -void CPUParticles2DEditorPlugin::make_visible(bool p_visible) { - if (p_visible) { - toolbar->show(); - } else { - toolbar->hide(); - } -} - -void CPUParticles2DEditorPlugin::_file_selected(const String &p_file) { - source_emission_file = p_file; - emission_mask->popup_centered(); -} - -void CPUParticles2DEditorPlugin::_menu_callback(int p_idx) { - switch (p_idx) { - case MENU_LOAD_EMISSION_MASK: { - file->popup_file_dialog(); - } break; - case MENU_CLEAR_EMISSION_MASK: { - emission_mask->popup_centered(); - } break; - case MENU_RESTART: { - particles->restart(); - } break; - case MENU_CONVERT_TO_GPU_PARTICLES: { - GPUParticles2D *gpu_particles = memnew(GPUParticles2D); - gpu_particles->convert_from_particles(particles); - gpu_particles->set_name(particles->get_name()); - gpu_particles->set_transform(particles->get_transform()); - gpu_particles->set_visible(particles->is_visible()); - gpu_particles->set_process_mode(particles->get_process_mode()); - - EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); - ur->create_action(TTR("Convert to GPUParticles3D"), UndoRedo::MERGE_DISABLE, particles); - SceneTreeDock::get_singleton()->replace_node(particles, gpu_particles); - ur->commit_action(false); - } break; - } -} - -void CPUParticles2DEditorPlugin::_generate_emission_mask() { - Ref<Image> img; - img.instantiate(); - Error err = ImageLoader::load_image(source_emission_file, img); - ERR_FAIL_COND_MSG(err != OK, "Error loading image '" + source_emission_file + "'."); - - if (img->is_compressed()) { - img->decompress(); - } - img->convert(Image::FORMAT_RGBA8); - ERR_FAIL_COND(img->get_format() != Image::FORMAT_RGBA8); - Size2i s = img->get_size(); - ERR_FAIL_COND(s.width == 0 || s.height == 0); - - Vector<Point2> valid_positions; - Vector<Point2> valid_normals; - Vector<uint8_t> valid_colors; - - valid_positions.resize(s.width * s.height); - - EmissionMode emode = (EmissionMode)emission_mask_mode->get_selected(); - - if (emode == EMISSION_MODE_BORDER_DIRECTED) { - valid_normals.resize(s.width * s.height); - } - - bool capture_colors = emission_colors->is_pressed(); - - if (capture_colors) { - valid_colors.resize(s.width * s.height * 4); - } - - int vpc = 0; - - { - Vector<uint8_t> img_data = img->get_data(); - const uint8_t *r = img_data.ptr(); - - for (int i = 0; i < s.width; i++) { - for (int j = 0; j < s.height; j++) { - uint8_t a = r[(j * s.width + i) * 4 + 3]; - - if (a > 128) { - if (emode == EMISSION_MODE_SOLID) { - if (capture_colors) { - valid_colors.write[vpc * 4 + 0] = r[(j * s.width + i) * 4 + 0]; - valid_colors.write[vpc * 4 + 1] = r[(j * s.width + i) * 4 + 1]; - valid_colors.write[vpc * 4 + 2] = r[(j * s.width + i) * 4 + 2]; - valid_colors.write[vpc * 4 + 3] = r[(j * s.width + i) * 4 + 3]; - } - valid_positions.write[vpc++] = Point2(i, j); - - } else { - bool on_border = false; - for (int x = i - 1; x <= i + 1; x++) { - for (int y = j - 1; y <= j + 1; y++) { - if (x < 0 || y < 0 || x >= s.width || y >= s.height || r[(y * s.width + x) * 4 + 3] <= 128) { - on_border = true; - break; - } - } - - if (on_border) { - break; - } - } - - if (on_border) { - valid_positions.write[vpc] = Point2(i, j); - - if (emode == EMISSION_MODE_BORDER_DIRECTED) { - Vector2 normal; - for (int x = i - 2; x <= i + 2; x++) { - for (int y = j - 2; y <= j + 2; y++) { - if (x == i && y == j) { - continue; - } - - if (x < 0 || y < 0 || x >= s.width || y >= s.height || r[(y * s.width + x) * 4 + 3] <= 128) { - normal += Vector2(x - i, y - j).normalized(); - } - } - } - - normal.normalize(); - valid_normals.write[vpc] = normal; - } - - if (capture_colors) { - valid_colors.write[vpc * 4 + 0] = r[(j * s.width + i) * 4 + 0]; - valid_colors.write[vpc * 4 + 1] = r[(j * s.width + i) * 4 + 1]; - valid_colors.write[vpc * 4 + 2] = r[(j * s.width + i) * 4 + 2]; - valid_colors.write[vpc * 4 + 3] = r[(j * s.width + i) * 4 + 3]; - } - - vpc++; - } - } - } - } - } - } - - valid_positions.resize(vpc); - if (valid_normals.size()) { - valid_normals.resize(vpc); - } - - ERR_FAIL_COND_MSG(valid_positions.is_empty(), "No pixels with transparency > 128 in image..."); - - if (capture_colors) { - PackedColorArray pca; - pca.resize(vpc); - Color *pcaw = pca.ptrw(); - for (int i = 0; i < vpc; i += 1) { - Color color; - color.r = valid_colors[i * 4 + 0] / 255.0f; - color.g = valid_colors[i * 4 + 1] / 255.0f; - color.b = valid_colors[i * 4 + 2] / 255.0f; - color.a = valid_colors[i * 4 + 3] / 255.0f; - pcaw[i] = color; - } - particles->set_emission_colors(pca); - } - - if (valid_normals.size()) { - particles->set_emission_shape(CPUParticles2D::EMISSION_SHAPE_DIRECTED_POINTS); - PackedVector2Array norms; - norms.resize(valid_normals.size()); - Vector2 *normsw = norms.ptrw(); - for (int i = 0; i < valid_normals.size(); i += 1) { - normsw[i] = valid_normals[i]; - } - particles->set_emission_normals(norms); - } else { - particles->set_emission_shape(CPUParticles2D::EMISSION_SHAPE_POINTS); - } - - { - Vector2 offset; - if (emission_mask_centered->is_pressed()) { - offset = Vector2(-s.width * 0.5, -s.height * 0.5); - } - - PackedVector2Array points; - points.resize(valid_positions.size()); - Vector2 *pointsw = points.ptrw(); - for (int i = 0; i < valid_positions.size(); i += 1) { - pointsw[i] = valid_positions[i] + offset; - } - particles->set_emission_points(points); - } -} - -void CPUParticles2DEditorPlugin::_notification(int p_what) { - switch (p_what) { - case NOTIFICATION_ENTER_TREE: { - menu->get_popup()->connect(SceneStringName(id_pressed), callable_mp(this, &CPUParticles2DEditorPlugin::_menu_callback)); - menu->set_icon(file->get_editor_theme_icon(SNAME("CPUParticles2D"))); - file->connect("file_selected", callable_mp(this, &CPUParticles2DEditorPlugin::_file_selected)); - } break; - } -} - -CPUParticles2DEditorPlugin::CPUParticles2DEditorPlugin() { - particles = nullptr; - - toolbar = memnew(HBoxContainer); - add_control_to_container(CONTAINER_CANVAS_EDITOR_MENU, toolbar); - toolbar->hide(); - - menu = memnew(MenuButton); - menu->get_popup()->add_shortcut(ED_GET_SHORTCUT("particles/restart_emission"), MENU_RESTART); - menu->get_popup()->add_item(TTR("Load Emission Mask"), MENU_LOAD_EMISSION_MASK); - menu->get_popup()->add_item(TTR("Convert to GPUParticles2D"), MENU_CONVERT_TO_GPU_PARTICLES); - menu->set_text(TTR("CPUParticles2D")); - menu->set_switch_on_hover(true); - toolbar->add_child(menu); - - file = memnew(EditorFileDialog); - List<String> ext; - ImageLoader::get_recognized_extensions(&ext); - for (const String &E : ext) { - file->add_filter("*." + E, E.to_upper()); - } - file->set_file_mode(EditorFileDialog::FILE_MODE_OPEN_FILE); - toolbar->add_child(file); - - emission_mask = memnew(ConfirmationDialog); - emission_mask->set_title(TTR("Load Emission Mask")); - VBoxContainer *emvb = memnew(VBoxContainer); - emission_mask->add_child(emvb); - emission_mask_mode = memnew(OptionButton); - emvb->add_margin_child(TTR("Emission Mask"), emission_mask_mode); - emission_mask_mode->add_item(TTR("Solid Pixels"), EMISSION_MODE_SOLID); - emission_mask_mode->add_item(TTR("Border Pixels"), EMISSION_MODE_BORDER); - emission_mask_mode->add_item(TTR("Directed Border Pixels"), EMISSION_MODE_BORDER_DIRECTED); - VBoxContainer *optionsvb = memnew(VBoxContainer); - emvb->add_margin_child(TTR("Options"), optionsvb); - emission_mask_centered = memnew(CheckBox); - emission_mask_centered->set_text(TTR("Centered")); - optionsvb->add_child(emission_mask_centered); - emission_colors = memnew(CheckBox); - emission_colors->set_text(TTR("Capture Colors from Pixel")); - optionsvb->add_child(emission_colors); - - toolbar->add_child(emission_mask); - - emission_mask->connect(SceneStringName(confirmed), callable_mp(this, &CPUParticles2DEditorPlugin::_generate_emission_mask)); -} - -CPUParticles2DEditorPlugin::~CPUParticles2DEditorPlugin() { -} diff --git a/editor/plugins/cpu_particles_2d_editor_plugin.h b/editor/plugins/cpu_particles_2d_editor_plugin.h deleted file mode 100644 index 645e6d8345..0000000000 --- a/editor/plugins/cpu_particles_2d_editor_plugin.h +++ /dev/null @@ -1,94 +0,0 @@ -/**************************************************************************/ -/* cpu_particles_2d_editor_plugin.h */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#ifndef CPU_PARTICLES_2D_EDITOR_PLUGIN_H -#define CPU_PARTICLES_2D_EDITOR_PLUGIN_H - -#include "editor/plugins/editor_plugin.h" -#include "scene/2d/cpu_particles_2d.h" -#include "scene/2d/physics/collision_polygon_2d.h" -#include "scene/gui/box_container.h" - -class CheckBox; -class ConfirmationDialog; -class SpinBox; -class EditorFileDialog; -class MenuButton; -class OptionButton; - -class CPUParticles2DEditorPlugin : public EditorPlugin { - GDCLASS(CPUParticles2DEditorPlugin, EditorPlugin); - - enum { - MENU_LOAD_EMISSION_MASK, - MENU_CLEAR_EMISSION_MASK, - MENU_RESTART, - MENU_CONVERT_TO_GPU_PARTICLES, - }; - - enum EmissionMode { - EMISSION_MODE_SOLID, - EMISSION_MODE_BORDER, - EMISSION_MODE_BORDER_DIRECTED - }; - - CPUParticles2D *particles = nullptr; - - EditorFileDialog *file = nullptr; - - HBoxContainer *toolbar = nullptr; - MenuButton *menu = nullptr; - - ConfirmationDialog *emission_mask = nullptr; - OptionButton *emission_mask_mode = nullptr; - CheckBox *emission_mask_centered = nullptr; - CheckBox *emission_colors = nullptr; - - String source_emission_file; - - void _file_selected(const String &p_file); - void _menu_callback(int p_idx); - void _generate_emission_mask(); - -protected: - void _notification(int p_what); - -public: - virtual String get_name() const override { return "CPUParticles2D"; } - bool has_main_screen() const override { return false; } - virtual void edit(Object *p_object) override; - virtual bool handles(Object *p_object) const override; - virtual void make_visible(bool p_visible) override; - - CPUParticles2DEditorPlugin(); - ~CPUParticles2DEditorPlugin(); -}; - -#endif // CPU_PARTICLES_2D_EDITOR_PLUGIN_H diff --git a/editor/plugins/cpu_particles_3d_editor_plugin.cpp b/editor/plugins/cpu_particles_3d_editor_plugin.cpp deleted file mode 100644 index e0ce330813..0000000000 --- a/editor/plugins/cpu_particles_3d_editor_plugin.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/**************************************************************************/ -/* cpu_particles_3d_editor_plugin.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "cpu_particles_3d_editor_plugin.h" - -#include "editor/editor_node.h" -#include "editor/editor_settings.h" -#include "editor/editor_undo_redo_manager.h" -#include "editor/gui/scene_tree_editor.h" -#include "editor/plugins/node_3d_editor_plugin.h" -#include "editor/scene_tree_dock.h" -#include "scene/gui/menu_button.h" - -void CPUParticles3DEditor::_node_removed(Node *p_node) { - if (p_node == node) { - node = nullptr; - hide(); - } -} - -void CPUParticles3DEditor::_notification(int p_notification) { - switch (p_notification) { - case NOTIFICATION_ENTER_TREE: { - options->set_icon(get_editor_theme_icon(SNAME("CPUParticles3D"))); - } break; - } -} - -void CPUParticles3DEditor::_menu_option(int p_option) { - switch (p_option) { - case MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE: { - emission_tree_dialog->popup_scenetree_dialog(); - } break; - - case MENU_OPTION_RESTART: { - node->restart(); - } break; - - case MENU_OPTION_CONVERT_TO_GPU_PARTICLES: { - GPUParticles3D *gpu_particles = memnew(GPUParticles3D); - gpu_particles->convert_from_particles(node); - gpu_particles->set_name(node->get_name()); - gpu_particles->set_transform(node->get_transform()); - gpu_particles->set_visible(node->is_visible()); - gpu_particles->set_process_mode(node->get_process_mode()); - - EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); - ur->create_action(TTR("Convert to GPUParticles3D"), UndoRedo::MERGE_DISABLE, node); - SceneTreeDock::get_singleton()->replace_node(node, gpu_particles); - ur->commit_action(false); - - } break; - case MENU_OPTION_GENERATE_AABB: { - // Add one second to the default generation lifetime, since the progress is updated every second. - generate_seconds->set_value(MAX(1.0, trunc(node->get_lifetime()) + 1.0)); - - if (generate_seconds->get_value() >= 11.0 + CMP_EPSILON) { - // Only pop up the time dialog if the particle's lifetime is long enough to warrant shortening it. - generate_aabb->popup_centered(); - } else { - // Generate the visibility AABB immediately. - _generate_aabb(); - } - } break; - } -} - -void CPUParticles3DEditor::_generate_aabb() { - double time = generate_seconds->get_value(); - - double running = 0.0; - - EditorProgress ep("gen_aabb", TTR("Generating Visibility AABB (Waiting for Particle Simulation)"), int(time)); - - bool was_emitting = node->is_emitting(); - if (!was_emitting) { - node->set_emitting(true); - OS::get_singleton()->delay_usec(1000); - } - - AABB rect; - - while (running < time) { - uint64_t ticks = OS::get_singleton()->get_ticks_usec(); - ep.step(TTR("Generating..."), int(running), true); - OS::get_singleton()->delay_usec(1000); - - AABB capture = node->capture_aabb(); - if (rect == AABB()) { - rect = capture; - } else { - rect.merge_with(capture); - } - - running += (OS::get_singleton()->get_ticks_usec() - ticks) / 1000000.0; - } - - if (!was_emitting) { - node->set_emitting(false); - } - - EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); - ur->create_action(TTR("Generate Visibility AABB")); - ur->add_do_method(node, "set_visibility_aabb", rect); - ur->add_undo_method(node, "set_visibility_aabb", node->get_visibility_aabb()); - ur->commit_action(); -} - -void CPUParticles3DEditor::edit(CPUParticles3D *p_particles) { - base_node = p_particles; - node = p_particles; -} - -void CPUParticles3DEditor::_generate_emission_points() { - /// hacer codigo aca - Vector<Vector3> points; - Vector<Vector3> normals; - - if (!_generate(points, normals)) { - return; - } - - if (normals.size() == 0) { - node->set_emission_shape(CPUParticles3D::EMISSION_SHAPE_POINTS); - node->set_emission_points(points); - } else { - node->set_emission_shape(CPUParticles3D::EMISSION_SHAPE_DIRECTED_POINTS); - node->set_emission_points(points); - node->set_emission_normals(normals); - } -} - -CPUParticles3DEditor::CPUParticles3DEditor() { - particles_editor_hb = memnew(HBoxContainer); - Node3DEditor::get_singleton()->add_control_to_menu_panel(particles_editor_hb); - options = memnew(MenuButton); - options->set_switch_on_hover(true); - particles_editor_hb->add_child(options); - particles_editor_hb->hide(); - - options->set_text(TTR("CPUParticles3D")); - options->get_popup()->add_shortcut(ED_GET_SHORTCUT("particles/restart_emission"), MENU_OPTION_RESTART); - options->get_popup()->add_item(TTR("Generate AABB"), MENU_OPTION_GENERATE_AABB); - options->get_popup()->add_item(TTR("Create Emission Points From Node"), MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE); - options->get_popup()->add_item(TTR("Convert to GPUParticles3D"), MENU_OPTION_CONVERT_TO_GPU_PARTICLES); - options->get_popup()->connect(SceneStringName(id_pressed), callable_mp(this, &CPUParticles3DEditor::_menu_option)); - - generate_aabb = memnew(ConfirmationDialog); - generate_aabb->set_title(TTR("Generate Visibility AABB")); - VBoxContainer *genvb = memnew(VBoxContainer); - generate_aabb->add_child(genvb); - generate_seconds = memnew(SpinBox); - genvb->add_margin_child(TTR("Generation Time (sec):"), generate_seconds); - generate_seconds->set_min(0.1); - generate_seconds->set_max(25); - generate_seconds->set_value(2); - - add_child(generate_aabb); - - generate_aabb->connect(SceneStringName(confirmed), callable_mp(this, &CPUParticles3DEditor::_generate_aabb)); -} - -void CPUParticles3DEditorPlugin::edit(Object *p_object) { - particles_editor->edit(Object::cast_to<CPUParticles3D>(p_object)); -} - -bool CPUParticles3DEditorPlugin::handles(Object *p_object) const { - return p_object->is_class("CPUParticles3D"); -} - -void CPUParticles3DEditorPlugin::make_visible(bool p_visible) { - if (p_visible) { - particles_editor->show(); - particles_editor->particles_editor_hb->show(); - } else { - particles_editor->particles_editor_hb->hide(); - particles_editor->hide(); - particles_editor->edit(nullptr); - } -} - -CPUParticles3DEditorPlugin::CPUParticles3DEditorPlugin() { - particles_editor = memnew(CPUParticles3DEditor); - EditorNode::get_singleton()->get_gui_base()->add_child(particles_editor); - - particles_editor->hide(); -} - -CPUParticles3DEditorPlugin::~CPUParticles3DEditorPlugin() { -} diff --git a/editor/plugins/editor_plugin.cpp b/editor/plugins/editor_plugin.cpp index c8426bce73..29e4adb45a 100644 --- a/editor/plugins/editor_plugin.cpp +++ b/editor/plugins/editor_plugin.cpp @@ -153,7 +153,6 @@ void EditorPlugin::add_control_to_container(CustomControlContainer p_location, C } break; case CONTAINER_PROJECT_SETTING_TAB_RIGHT: { ProjectSettingsEditor::get_singleton()->get_tabs()->add_child(p_control); - ProjectSettingsEditor::get_singleton()->get_tabs()->move_child(p_control, 1); } break; } diff --git a/editor/plugins/editor_preview_plugins.cpp b/editor/plugins/editor_preview_plugins.cpp index 9a53f07a3f..3618c0e6d3 100644 --- a/editor/plugins/editor_preview_plugins.cpp +++ b/editor/plugins/editor_preview_plugins.cpp @@ -32,6 +32,7 @@ #include "core/config/project_settings.h" #include "core/io/file_access_memory.h" +#include "core/io/image.h" #include "core/io/resource_loader.h" #include "core/object/script_language.h" #include "core/os/os.h" diff --git a/editor/plugins/gizmos/lightmap_gi_gizmo_plugin.cpp b/editor/plugins/gizmos/lightmap_gi_gizmo_plugin.cpp index 748f770d4d..007cc0636a 100644 --- a/editor/plugins/gizmos/lightmap_gi_gizmo_plugin.cpp +++ b/editor/plugins/gizmos/lightmap_gi_gizmo_plugin.cpp @@ -44,7 +44,10 @@ LightmapGIGizmoPlugin::LightmapGIGizmoPlugin() { Ref<StandardMaterial3D> mat = memnew(StandardMaterial3D); mat->set_shading_mode(StandardMaterial3D::SHADING_MODE_UNSHADED); - mat->set_cull_mode(StandardMaterial3D::CULL_DISABLED); + // Fade out probes when camera gets too close to them. + mat->set_distance_fade(StandardMaterial3D::DISTANCE_FADE_PIXEL_DITHER); + mat->set_distance_fade_min_distance(0.5); + mat->set_distance_fade_max_distance(1.5); mat->set_flag(StandardMaterial3D::FLAG_ALBEDO_FROM_VERTEX_COLOR, true); mat->set_flag(StandardMaterial3D::FLAG_SRGB_VERTEX_COLOR, false); mat->set_flag(StandardMaterial3D::FLAG_DISABLE_FOG, true); diff --git a/editor/plugins/gpu_particles_2d_editor_plugin.cpp b/editor/plugins/gpu_particles_2d_editor_plugin.cpp deleted file mode 100644 index 1b68b55ff6..0000000000 --- a/editor/plugins/gpu_particles_2d_editor_plugin.cpp +++ /dev/null @@ -1,427 +0,0 @@ -/**************************************************************************/ -/* gpu_particles_2d_editor_plugin.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "gpu_particles_2d_editor_plugin.h" - -#include "canvas_item_editor_plugin.h" -#include "core/io/image_loader.h" -#include "editor/editor_node.h" -#include "editor/editor_settings.h" -#include "editor/editor_undo_redo_manager.h" -#include "editor/gui/editor_file_dialog.h" -#include "editor/scene_tree_dock.h" -#include "scene/2d/cpu_particles_2d.h" -#include "scene/gui/menu_button.h" -#include "scene/gui/separator.h" -#include "scene/resources/image_texture.h" -#include "scene/resources/particle_process_material.h" - -void GPUParticles2DEditorPlugin::edit(Object *p_object) { - particles = Object::cast_to<GPUParticles2D>(p_object); -} - -bool GPUParticles2DEditorPlugin::handles(Object *p_object) const { - return p_object->is_class("GPUParticles2D"); -} - -void GPUParticles2DEditorPlugin::make_visible(bool p_visible) { - if (p_visible) { - toolbar->show(); - } else { - toolbar->hide(); - } -} - -void GPUParticles2DEditorPlugin::_file_selected(const String &p_file) { - source_emission_file = p_file; - emission_mask->popup_centered(); -} - -void GPUParticles2DEditorPlugin::_selection_changed() { - List<Node *> selected_nodes = EditorNode::get_singleton()->get_editor_selection()->get_selected_node_list(); - - if (selected_particles.is_empty() && selected_nodes.is_empty()) { - return; - } - - for (GPUParticles2D *SP : selected_particles) { - SP->set_show_visibility_rect(false); - } - selected_particles.clear(); - - for (Node *P : selected_nodes) { - GPUParticles2D *selected_particle = Object::cast_to<GPUParticles2D>(P); - if (selected_particle != nullptr) { - selected_particle->set_show_visibility_rect(true); - selected_particles.push_back(selected_particle); - } - } -} - -void GPUParticles2DEditorPlugin::_menu_callback(int p_idx) { - switch (p_idx) { - case MENU_GENERATE_VISIBILITY_RECT: { - // Add one second to the default generation lifetime, since the progress is updated every second. - generate_seconds->set_value(MAX(1.0, trunc(particles->get_lifetime()) + 1.0)); - - if (generate_seconds->get_value() >= 11.0 + CMP_EPSILON) { - // Only pop up the time dialog if the particle's lifetime is long enough to warrant shortening it. - generate_visibility_rect->popup_centered(); - } else { - // Generate the visibility rect immediately. - _generate_visibility_rect(); - } - } break; - case MENU_LOAD_EMISSION_MASK: { - file->popup_file_dialog(); - - } break; - case MENU_CLEAR_EMISSION_MASK: { - emission_mask->popup_centered(); - } break; - case MENU_OPTION_CONVERT_TO_CPU_PARTICLES: { - CPUParticles2D *cpu_particles = memnew(CPUParticles2D); - cpu_particles->convert_from_particles(particles); - cpu_particles->set_name(particles->get_name()); - cpu_particles->set_transform(particles->get_transform()); - cpu_particles->set_visible(particles->is_visible()); - cpu_particles->set_process_mode(particles->get_process_mode()); - cpu_particles->set_z_index(particles->get_z_index()); - - EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); - ur->create_action(TTR("Convert to CPUParticles2D"), UndoRedo::MERGE_DISABLE, particles); - SceneTreeDock::get_singleton()->replace_node(particles, cpu_particles); - ur->commit_action(false); - - } break; - case MENU_RESTART: { - particles->restart(); - } - } -} - -void GPUParticles2DEditorPlugin::_generate_visibility_rect() { - double time = generate_seconds->get_value(); - - float running = 0.0; - - EditorProgress ep("gen_vrect", TTR("Generating Visibility Rect (Waiting for Particle Simulation)"), int(time)); - - bool was_emitting = particles->is_emitting(); - if (!was_emitting) { - particles->set_emitting(true); - OS::get_singleton()->delay_usec(1000); - } - - Rect2 rect; - while (running < time) { - uint64_t ticks = OS::get_singleton()->get_ticks_usec(); - ep.step(TTR("Generating..."), int(running), true); - OS::get_singleton()->delay_usec(1000); - - Rect2 capture = particles->capture_rect(); - if (rect == Rect2()) { - rect = capture; - } else { - rect = rect.merge(capture); - } - - running += (OS::get_singleton()->get_ticks_usec() - ticks) / 1000000.0; - } - - if (!was_emitting) { - particles->set_emitting(false); - } - - EditorUndoRedoManager *undo_redo = EditorUndoRedoManager::get_singleton(); - undo_redo->create_action(TTR("Generate Visibility Rect")); - undo_redo->add_do_method(particles, "set_visibility_rect", rect); - undo_redo->add_undo_method(particles, "set_visibility_rect", particles->get_visibility_rect()); - undo_redo->commit_action(); -} - -void GPUParticles2DEditorPlugin::_generate_emission_mask() { - Ref<ParticleProcessMaterial> pm = particles->get_process_material(); - if (!pm.is_valid()) { - EditorNode::get_singleton()->show_warning(TTR("Can only set point into a ParticleProcessMaterial process material")); - return; - } - - Ref<Image> img; - img.instantiate(); - Error err = ImageLoader::load_image(source_emission_file, img); - ERR_FAIL_COND_MSG(err != OK, "Error loading image '" + source_emission_file + "'."); - - if (img->is_compressed()) { - img->decompress(); - } - img->convert(Image::FORMAT_RGBA8); - ERR_FAIL_COND(img->get_format() != Image::FORMAT_RGBA8); - Size2i s = img->get_size(); - ERR_FAIL_COND(s.width == 0 || s.height == 0); - - Vector<Point2> valid_positions; - Vector<Point2> valid_normals; - Vector<uint8_t> valid_colors; - - valid_positions.resize(s.width * s.height); - - EmissionMode emode = (EmissionMode)emission_mask_mode->get_selected(); - - if (emode == EMISSION_MODE_BORDER_DIRECTED) { - valid_normals.resize(s.width * s.height); - } - - bool capture_colors = emission_colors->is_pressed(); - - if (capture_colors) { - valid_colors.resize(s.width * s.height * 4); - } - - int vpc = 0; - - { - Vector<uint8_t> img_data = img->get_data(); - const uint8_t *r = img_data.ptr(); - - for (int i = 0; i < s.width; i++) { - for (int j = 0; j < s.height; j++) { - uint8_t a = r[(j * s.width + i) * 4 + 3]; - - if (a > 128) { - if (emode == EMISSION_MODE_SOLID) { - if (capture_colors) { - valid_colors.write[vpc * 4 + 0] = r[(j * s.width + i) * 4 + 0]; - valid_colors.write[vpc * 4 + 1] = r[(j * s.width + i) * 4 + 1]; - valid_colors.write[vpc * 4 + 2] = r[(j * s.width + i) * 4 + 2]; - valid_colors.write[vpc * 4 + 3] = r[(j * s.width + i) * 4 + 3]; - } - valid_positions.write[vpc++] = Point2(i, j); - - } else { - bool on_border = false; - for (int x = i - 1; x <= i + 1; x++) { - for (int y = j - 1; y <= j + 1; y++) { - if (x < 0 || y < 0 || x >= s.width || y >= s.height || r[(y * s.width + x) * 4 + 3] <= 128) { - on_border = true; - break; - } - } - - if (on_border) { - break; - } - } - - if (on_border) { - valid_positions.write[vpc] = Point2(i, j); - - if (emode == EMISSION_MODE_BORDER_DIRECTED) { - Vector2 normal; - for (int x = i - 2; x <= i + 2; x++) { - for (int y = j - 2; y <= j + 2; y++) { - if (x == i && y == j) { - continue; - } - - if (x < 0 || y < 0 || x >= s.width || y >= s.height || r[(y * s.width + x) * 4 + 3] <= 128) { - normal += Vector2(x - i, y - j).normalized(); - } - } - } - - normal.normalize(); - valid_normals.write[vpc] = normal; - } - - if (capture_colors) { - valid_colors.write[vpc * 4 + 0] = r[(j * s.width + i) * 4 + 0]; - valid_colors.write[vpc * 4 + 1] = r[(j * s.width + i) * 4 + 1]; - valid_colors.write[vpc * 4 + 2] = r[(j * s.width + i) * 4 + 2]; - valid_colors.write[vpc * 4 + 3] = r[(j * s.width + i) * 4 + 3]; - } - - vpc++; - } - } - } - } - } - } - - valid_positions.resize(vpc); - if (valid_normals.size()) { - valid_normals.resize(vpc); - } - - ERR_FAIL_COND_MSG(valid_positions.is_empty(), "No pixels with transparency > 128 in image..."); - - Vector<uint8_t> texdata; - - int w = 2048; - int h = (vpc / 2048) + 1; - - texdata.resize(w * h * 2 * sizeof(float)); - - { - Vector2 offset; - if (emission_mask_centered->is_pressed()) { - offset = Vector2(-s.width * 0.5, -s.height * 0.5); - } - - uint8_t *tw = texdata.ptrw(); - float *twf = reinterpret_cast<float *>(tw); - for (int i = 0; i < vpc; i++) { - twf[i * 2 + 0] = valid_positions[i].x + offset.x; - twf[i * 2 + 1] = valid_positions[i].y + offset.y; - } - } - - img.instantiate(); - img->set_data(w, h, false, Image::FORMAT_RGF, texdata); - pm->set_emission_point_texture(ImageTexture::create_from_image(img)); - pm->set_emission_point_count(vpc); - - if (capture_colors) { - Vector<uint8_t> colordata; - colordata.resize(w * h * 4); //use RG texture - - { - uint8_t *tw = colordata.ptrw(); - for (int i = 0; i < vpc * 4; i++) { - tw[i] = valid_colors[i]; - } - } - - img.instantiate(); - img->set_data(w, h, false, Image::FORMAT_RGBA8, colordata); - pm->set_emission_color_texture(ImageTexture::create_from_image(img)); - } - - if (valid_normals.size()) { - pm->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_DIRECTED_POINTS); - - Vector<uint8_t> normdata; - normdata.resize(w * h * 2 * sizeof(float)); //use RG texture - - { - uint8_t *tw = normdata.ptrw(); - float *twf = reinterpret_cast<float *>(tw); - for (int i = 0; i < vpc; i++) { - twf[i * 2 + 0] = valid_normals[i].x; - twf[i * 2 + 1] = valid_normals[i].y; - } - } - - img.instantiate(); - img->set_data(w, h, false, Image::FORMAT_RGF, normdata); - pm->set_emission_normal_texture(ImageTexture::create_from_image(img)); - - } else { - pm->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_POINTS); - } -} - -void GPUParticles2DEditorPlugin::_notification(int p_what) { - switch (p_what) { - case NOTIFICATION_ENTER_TREE: { - menu->get_popup()->connect(SceneStringName(id_pressed), callable_mp(this, &GPUParticles2DEditorPlugin::_menu_callback)); - menu->set_icon(menu->get_editor_theme_icon(SNAME("GPUParticles2D"))); - file->connect("file_selected", callable_mp(this, &GPUParticles2DEditorPlugin::_file_selected)); - EditorNode::get_singleton()->get_editor_selection()->connect("selection_changed", callable_mp(this, &GPUParticles2DEditorPlugin::_selection_changed)); - } break; - } -} - -GPUParticles2DEditorPlugin::GPUParticles2DEditorPlugin() { - particles = nullptr; - - toolbar = memnew(HBoxContainer); - add_control_to_container(CONTAINER_CANVAS_EDITOR_MENU, toolbar); - toolbar->hide(); - - menu = memnew(MenuButton); - menu->get_popup()->add_shortcut(ED_GET_SHORTCUT("particles/restart_emission"), MENU_RESTART); - menu->get_popup()->add_item(TTR("Generate Visibility Rect"), MENU_GENERATE_VISIBILITY_RECT); - menu->get_popup()->add_item(TTR("Load Emission Mask"), MENU_LOAD_EMISSION_MASK); - // menu->get_popup()->add_item(TTR("Clear Emission Mask"), MENU_CLEAR_EMISSION_MASK); - menu->get_popup()->add_item(TTR("Convert to CPUParticles2D"), MENU_OPTION_CONVERT_TO_CPU_PARTICLES); - menu->set_text(TTR("GPUParticles2D")); - menu->set_switch_on_hover(true); - toolbar->add_child(menu); - - file = memnew(EditorFileDialog); - List<String> ext; - ImageLoader::get_recognized_extensions(&ext); - for (const String &E : ext) { - file->add_filter("*." + E, E.to_upper()); - } - file->set_file_mode(EditorFileDialog::FILE_MODE_OPEN_FILE); - toolbar->add_child(file); - - generate_visibility_rect = memnew(ConfirmationDialog); - generate_visibility_rect->set_title(TTR("Generate Visibility Rect")); - VBoxContainer *genvb = memnew(VBoxContainer); - generate_visibility_rect->add_child(genvb); - generate_seconds = memnew(SpinBox); - genvb->add_margin_child(TTR("Generation Time (sec):"), generate_seconds); - generate_seconds->set_min(0.1); - generate_seconds->set_max(25); - generate_seconds->set_value(2); - - toolbar->add_child(generate_visibility_rect); - - generate_visibility_rect->connect(SceneStringName(confirmed), callable_mp(this, &GPUParticles2DEditorPlugin::_generate_visibility_rect)); - - emission_mask = memnew(ConfirmationDialog); - emission_mask->set_title(TTR("Load Emission Mask")); - VBoxContainer *emvb = memnew(VBoxContainer); - emission_mask->add_child(emvb); - emission_mask_mode = memnew(OptionButton); - emvb->add_margin_child(TTR("Emission Mask"), emission_mask_mode); - emission_mask_mode->add_item(TTR("Solid Pixels"), EMISSION_MODE_SOLID); - emission_mask_mode->add_item(TTR("Border Pixels"), EMISSION_MODE_BORDER); - emission_mask_mode->add_item(TTR("Directed Border Pixels"), EMISSION_MODE_BORDER_DIRECTED); - VBoxContainer *optionsvb = memnew(VBoxContainer); - emvb->add_margin_child(TTR("Options"), optionsvb); - emission_mask_centered = memnew(CheckBox); - emission_mask_centered->set_text(TTR("Centered")); - optionsvb->add_child(emission_mask_centered); - emission_colors = memnew(CheckBox); - emission_colors->set_text(TTR("Capture Colors from Pixel")); - optionsvb->add_child(emission_colors); - - toolbar->add_child(emission_mask); - - emission_mask->connect(SceneStringName(confirmed), callable_mp(this, &GPUParticles2DEditorPlugin::_generate_emission_mask)); -} - -GPUParticles2DEditorPlugin::~GPUParticles2DEditorPlugin() { -} diff --git a/editor/plugins/gpu_particles_2d_editor_plugin.h b/editor/plugins/gpu_particles_2d_editor_plugin.h deleted file mode 100644 index 658e4d87e5..0000000000 --- a/editor/plugins/gpu_particles_2d_editor_plugin.h +++ /dev/null @@ -1,101 +0,0 @@ -/**************************************************************************/ -/* gpu_particles_2d_editor_plugin.h */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#ifndef GPU_PARTICLES_2D_EDITOR_PLUGIN_H -#define GPU_PARTICLES_2D_EDITOR_PLUGIN_H - -#include "editor/plugins/editor_plugin.h" -#include "scene/2d/gpu_particles_2d.h" -#include "scene/2d/physics/collision_polygon_2d.h" -#include "scene/gui/box_container.h" -#include "scene/gui/spin_box.h" - -class CheckBox; -class ConfirmationDialog; -class EditorFileDialog; -class MenuButton; -class OptionButton; - -class GPUParticles2DEditorPlugin : public EditorPlugin { - GDCLASS(GPUParticles2DEditorPlugin, EditorPlugin); - - enum { - MENU_GENERATE_VISIBILITY_RECT, - MENU_LOAD_EMISSION_MASK, - MENU_CLEAR_EMISSION_MASK, - MENU_OPTION_CONVERT_TO_CPU_PARTICLES, - MENU_RESTART - }; - - enum EmissionMode { - EMISSION_MODE_SOLID, - EMISSION_MODE_BORDER, - EMISSION_MODE_BORDER_DIRECTED - }; - - GPUParticles2D *particles = nullptr; - List<GPUParticles2D *> selected_particles; - - EditorFileDialog *file = nullptr; - - HBoxContainer *toolbar = nullptr; - MenuButton *menu = nullptr; - - ConfirmationDialog *generate_visibility_rect = nullptr; - SpinBox *generate_seconds = nullptr; - - ConfirmationDialog *emission_mask = nullptr; - OptionButton *emission_mask_mode = nullptr; - CheckBox *emission_mask_centered = nullptr; - CheckBox *emission_colors = nullptr; - - String source_emission_file; - - void _file_selected(const String &p_file); - void _menu_callback(int p_idx); - void _generate_visibility_rect(); - void _generate_emission_mask(); - void _selection_changed(); - -protected: - void _notification(int p_what); - -public: - virtual String get_name() const override { return "GPUParticles2D"; } - bool has_main_screen() const override { return false; } - virtual void edit(Object *p_object) override; - virtual bool handles(Object *p_object) const override; - virtual void make_visible(bool p_visible) override; - - GPUParticles2DEditorPlugin(); - ~GPUParticles2DEditorPlugin(); -}; - -#endif // GPU_PARTICLES_2D_EDITOR_PLUGIN_H diff --git a/editor/plugins/gpu_particles_3d_editor_plugin.cpp b/editor/plugins/gpu_particles_3d_editor_plugin.cpp deleted file mode 100644 index e711f44ccf..0000000000 --- a/editor/plugins/gpu_particles_3d_editor_plugin.cpp +++ /dev/null @@ -1,461 +0,0 @@ -/**************************************************************************/ -/* gpu_particles_3d_editor_plugin.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "gpu_particles_3d_editor_plugin.h" - -#include "core/io/resource_loader.h" -#include "editor/editor_node.h" -#include "editor/editor_settings.h" -#include "editor/editor_undo_redo_manager.h" -#include "editor/plugins/node_3d_editor_plugin.h" -#include "editor/scene_tree_dock.h" -#include "scene/3d/cpu_particles_3d.h" -#include "scene/3d/mesh_instance_3d.h" -#include "scene/gui/menu_button.h" -#include "scene/resources/image_texture.h" -#include "scene/resources/particle_process_material.h" - -bool GPUParticles3DEditorBase::_generate(Vector<Vector3> &points, Vector<Vector3> &normals) { - bool use_normals = emission_fill->get_selected() == 1; - - if (emission_fill->get_selected() < 2) { - float area_accum = 0; - RBMap<float, int> triangle_area_map; - - for (int i = 0; i < geometry.size(); i++) { - float area = geometry[i].get_area(); - if (area < CMP_EPSILON) { - continue; - } - triangle_area_map[area_accum] = i; - area_accum += area; - } - - if (!triangle_area_map.size() || area_accum == 0) { - EditorNode::get_singleton()->show_warning(TTR("The geometry's faces don't contain any area.")); - return false; - } - - int emissor_count = emission_amount->get_value(); - - for (int i = 0; i < emissor_count; i++) { - float areapos = Math::random(0.0f, area_accum); - - RBMap<float, int>::Iterator E = triangle_area_map.find_closest(areapos); - ERR_FAIL_COND_V(!E, false); - int index = E->value; - ERR_FAIL_INDEX_V(index, geometry.size(), false); - - // ok FINALLY get face - Face3 face = geometry[index]; - //now compute some position inside the face... - - Vector3 pos = face.get_random_point_inside(); - - points.push_back(pos); - - if (use_normals) { - Vector3 normal = face.get_plane().normal; - normals.push_back(normal); - } - } - } else { - int gcount = geometry.size(); - - if (gcount == 0) { - EditorNode::get_singleton()->show_warning(TTR("The geometry doesn't contain any faces.")); - return false; - } - - const Face3 *r = geometry.ptr(); - - AABB aabb; - - for (int i = 0; i < gcount; i++) { - for (int j = 0; j < 3; j++) { - if (i == 0 && j == 0) { - aabb.position = r[i].vertex[j]; - } else { - aabb.expand_to(r[i].vertex[j]); - } - } - } - - int emissor_count = emission_amount->get_value(); - - for (int i = 0; i < emissor_count; i++) { - int attempts = 5; - - for (int j = 0; j < attempts; j++) { - Vector3 dir; - dir[Math::rand() % 3] = 1.0; - Vector3 ofs = (Vector3(1, 1, 1) - dir) * Vector3(Math::randf(), Math::randf(), Math::randf()) * aabb.size + aabb.position; - - Vector3 ofsv = ofs + aabb.size * dir; - - //space it a little - ofs -= dir; - ofsv += dir; - - float max = -1e7, min = 1e7; - - for (int k = 0; k < gcount; k++) { - const Face3 &f3 = r[k]; - - Vector3 res; - if (f3.intersects_segment(ofs, ofsv, &res)) { - res -= ofs; - float d = dir.dot(res); - - if (d < min) { - min = d; - } - if (d > max) { - max = d; - } - } - } - - if (max < min) { - continue; //lost attempt - } - - float val = min + (max - min) * Math::randf(); - - Vector3 point = ofs + dir * val; - - points.push_back(point); - break; - } - } - } - - return true; -} - -void GPUParticles3DEditorBase::_node_selected(const NodePath &p_path) { - Node *sel = get_node(p_path); - if (!sel) { - return; - } - - if (!sel->is_class("Node3D")) { - EditorNode::get_singleton()->show_warning(vformat(TTR("\"%s\" doesn't inherit from Node3D."), sel->get_name())); - return; - } - - MeshInstance3D *mi = Object::cast_to<MeshInstance3D>(sel); - if (!mi || mi->get_mesh().is_null()) { - EditorNode::get_singleton()->show_warning(vformat(TTR("\"%s\" doesn't contain geometry."), sel->get_name())); - return; - } - - geometry = mi->get_mesh()->get_faces(); - - if (geometry.size() == 0) { - EditorNode::get_singleton()->show_warning(vformat(TTR("\"%s\" doesn't contain face geometry."), sel->get_name())); - return; - } - - Transform3D geom_xform = base_node->get_global_transform().affine_inverse() * mi->get_global_transform(); - - int gc = geometry.size(); - Face3 *w = geometry.ptrw(); - - for (int i = 0; i < gc; i++) { - for (int j = 0; j < 3; j++) { - w[i].vertex[j] = geom_xform.xform(w[i].vertex[j]); - } - } - - emission_dialog->popup_centered(Size2(300, 130)); -} - -GPUParticles3DEditorBase::GPUParticles3DEditorBase() { - emission_dialog = memnew(ConfirmationDialog); - emission_dialog->set_title(TTR("Create Emitter")); - add_child(emission_dialog); - VBoxContainer *emd_vb = memnew(VBoxContainer); - emission_dialog->add_child(emd_vb); - - emission_amount = memnew(SpinBox); - emission_amount->set_min(1); - emission_amount->set_max(100000); - emission_amount->set_value(512); - emd_vb->add_margin_child(TTR("Emission Points:"), emission_amount); - - emission_fill = memnew(OptionButton); - emission_fill->add_item(TTR("Surface Points")); - emission_fill->add_item(TTR("Surface Points+Normal (Directed)")); - emission_fill->add_item(TTR("Volume")); - emd_vb->add_margin_child(TTR("Emission Source:"), emission_fill); - - emission_dialog->set_ok_button_text(TTR("Create")); - emission_dialog->connect(SceneStringName(confirmed), callable_mp(this, &GPUParticles3DEditorBase::_generate_emission_points)); - - emission_tree_dialog = memnew(SceneTreeDialog); - Vector<StringName> valid_types; - valid_types.push_back("MeshInstance3D"); - emission_tree_dialog->set_valid_types(valid_types); - add_child(emission_tree_dialog); - emission_tree_dialog->connect("selected", callable_mp(this, &GPUParticles3DEditorBase::_node_selected)); -} - -void GPUParticles3DEditor::_node_removed(Node *p_node) { - if (p_node == node) { - node = nullptr; - hide(); - } -} - -void GPUParticles3DEditor::_notification(int p_notification) { - switch (p_notification) { - case NOTIFICATION_ENTER_TREE: { - options->set_icon(options->get_popup()->get_editor_theme_icon(SNAME("GPUParticles3D"))); - get_tree()->connect("node_removed", callable_mp(this, &GPUParticles3DEditor::_node_removed)); - } break; - } -} - -void GPUParticles3DEditor::_menu_option(int p_option) { - switch (p_option) { - case MENU_OPTION_GENERATE_AABB: { - // Add one second to the default generation lifetime, since the progress is updated every second. - generate_seconds->set_value(MAX(1.0, trunc(node->get_lifetime()) + 1.0)); - - if (generate_seconds->get_value() >= 11.0 + CMP_EPSILON) { - // Only pop up the time dialog if the particle's lifetime is long enough to warrant shortening it. - generate_aabb->popup_centered(); - } else { - // Generate the visibility AABB immediately. - _generate_aabb(); - } - } break; - case MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE: { - Ref<ParticleProcessMaterial> mat = node->get_process_material(); - if (mat.is_null()) { - EditorNode::get_singleton()->show_warning(TTR("A processor material of type 'ParticleProcessMaterial' is required.")); - return; - } - - emission_tree_dialog->popup_scenetree_dialog(); - - } break; - case MENU_OPTION_CONVERT_TO_CPU_PARTICLES: { - CPUParticles3D *cpu_particles = memnew(CPUParticles3D); - cpu_particles->convert_from_particles(node); - cpu_particles->set_name(node->get_name()); - cpu_particles->set_transform(node->get_transform()); - cpu_particles->set_visible(node->is_visible()); - cpu_particles->set_process_mode(node->get_process_mode()); - - EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); - ur->create_action(TTR("Convert to CPUParticles3D"), UndoRedo::MERGE_DISABLE, node); - SceneTreeDock::get_singleton()->replace_node(node, cpu_particles); - ur->commit_action(false); - - } break; - case MENU_OPTION_RESTART: { - node->restart(); - - } break; - } -} - -void GPUParticles3DEditor::_generate_aabb() { - double time = generate_seconds->get_value(); - - double running = 0.0; - - EditorProgress ep("gen_aabb", TTR("Generating Visibility AABB (Waiting for Particle Simulation)"), int(time)); - - bool was_emitting = node->is_emitting(); - if (!was_emitting) { - node->set_emitting(true); - OS::get_singleton()->delay_usec(1000); - } - - AABB rect; - - while (running < time) { - uint64_t ticks = OS::get_singleton()->get_ticks_usec(); - ep.step(TTR("Generating..."), int(running), true); - OS::get_singleton()->delay_usec(1000); - - AABB capture = node->capture_aabb(); - if (rect == AABB()) { - rect = capture; - } else { - rect.merge_with(capture); - } - - running += (OS::get_singleton()->get_ticks_usec() - ticks) / 1000000.0; - } - - if (!was_emitting) { - node->set_emitting(false); - } - - EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); - ur->create_action(TTR("Generate Visibility AABB")); - ur->add_do_method(node, "set_visibility_aabb", rect); - ur->add_undo_method(node, "set_visibility_aabb", node->get_visibility_aabb()); - ur->commit_action(); -} - -void GPUParticles3DEditor::edit(GPUParticles3D *p_particles) { - base_node = p_particles; - node = p_particles; -} - -void GPUParticles3DEditor::_generate_emission_points() { - /// hacer codigo aca - Vector<Vector3> points; - Vector<Vector3> normals; - - if (!_generate(points, normals)) { - return; - } - - int point_count = points.size(); - - int w = 2048; - int h = (point_count / 2048) + 1; - - Vector<uint8_t> point_img; - point_img.resize(w * h * 3 * sizeof(float)); - - { - uint8_t *iw = point_img.ptrw(); - memset(iw, 0, w * h * 3 * sizeof(float)); - const Vector3 *r = points.ptr(); - float *wf = reinterpret_cast<float *>(iw); - for (int i = 0; i < point_count; i++) { - wf[i * 3 + 0] = r[i].x; - wf[i * 3 + 1] = r[i].y; - wf[i * 3 + 2] = r[i].z; - } - } - - Ref<Image> image = memnew(Image(w, h, false, Image::FORMAT_RGBF, point_img)); - Ref<ImageTexture> tex = ImageTexture::create_from_image(image); - - Ref<ParticleProcessMaterial> mat = node->get_process_material(); - ERR_FAIL_COND(mat.is_null()); - - if (normals.size() > 0) { - mat->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_DIRECTED_POINTS); - mat->set_emission_point_count(point_count); - mat->set_emission_point_texture(tex); - - Vector<uint8_t> point_img2; - point_img2.resize(w * h * 3 * sizeof(float)); - - { - uint8_t *iw = point_img2.ptrw(); - memset(iw, 0, w * h * 3 * sizeof(float)); - const Vector3 *r = normals.ptr(); - float *wf = reinterpret_cast<float *>(iw); - for (int i = 0; i < point_count; i++) { - wf[i * 3 + 0] = r[i].x; - wf[i * 3 + 1] = r[i].y; - wf[i * 3 + 2] = r[i].z; - } - } - - Ref<Image> image2 = memnew(Image(w, h, false, Image::FORMAT_RGBF, point_img2)); - mat->set_emission_normal_texture(ImageTexture::create_from_image(image2)); - } else { - mat->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_POINTS); - mat->set_emission_point_count(point_count); - mat->set_emission_point_texture(tex); - } -} - -GPUParticles3DEditor::GPUParticles3DEditor() { - node = nullptr; - particles_editor_hb = memnew(HBoxContainer); - Node3DEditor::get_singleton()->add_control_to_menu_panel(particles_editor_hb); - options = memnew(MenuButton); - options->set_switch_on_hover(true); - particles_editor_hb->add_child(options); - particles_editor_hb->hide(); - - options->set_text(TTR("GPUParticles3D")); - options->get_popup()->add_shortcut(ED_GET_SHORTCUT("particles/restart_emission"), MENU_OPTION_RESTART); - options->get_popup()->add_item(TTR("Generate AABB"), MENU_OPTION_GENERATE_AABB); - options->get_popup()->add_item(TTR("Create Emission Points From Node"), MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE); - options->get_popup()->add_item(TTR("Convert to CPUParticles3D"), MENU_OPTION_CONVERT_TO_CPU_PARTICLES); - - options->get_popup()->connect(SceneStringName(id_pressed), callable_mp(this, &GPUParticles3DEditor::_menu_option)); - - generate_aabb = memnew(ConfirmationDialog); - generate_aabb->set_title(TTR("Generate Visibility AABB")); - VBoxContainer *genvb = memnew(VBoxContainer); - generate_aabb->add_child(genvb); - generate_seconds = memnew(SpinBox); - genvb->add_margin_child(TTR("Generation Time (sec):"), generate_seconds); - generate_seconds->set_min(0.1); - generate_seconds->set_max(25); - generate_seconds->set_value(2); - - add_child(generate_aabb); - - generate_aabb->connect(SceneStringName(confirmed), callable_mp(this, &GPUParticles3DEditor::_generate_aabb)); -} - -void GPUParticles3DEditorPlugin::edit(Object *p_object) { - particles_editor->edit(Object::cast_to<GPUParticles3D>(p_object)); -} - -bool GPUParticles3DEditorPlugin::handles(Object *p_object) const { - return p_object->is_class("GPUParticles3D"); -} - -void GPUParticles3DEditorPlugin::make_visible(bool p_visible) { - if (p_visible) { - particles_editor->show(); - particles_editor->particles_editor_hb->show(); - } else { - particles_editor->particles_editor_hb->hide(); - particles_editor->hide(); - particles_editor->edit(nullptr); - } -} - -GPUParticles3DEditorPlugin::GPUParticles3DEditorPlugin() { - particles_editor = memnew(GPUParticles3DEditor); - EditorNode::get_singleton()->get_gui_base()->add_child(particles_editor); - - particles_editor->hide(); -} - -GPUParticles3DEditorPlugin::~GPUParticles3DEditorPlugin() { -} diff --git a/editor/plugins/gpu_particles_3d_editor_plugin.h b/editor/plugins/gpu_particles_3d_editor_plugin.h deleted file mode 100644 index 1295836b5f..0000000000 --- a/editor/plugins/gpu_particles_3d_editor_plugin.h +++ /dev/null @@ -1,118 +0,0 @@ -/**************************************************************************/ -/* gpu_particles_3d_editor_plugin.h */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#ifndef GPU_PARTICLES_3D_EDITOR_PLUGIN_H -#define GPU_PARTICLES_3D_EDITOR_PLUGIN_H - -#include "editor/plugins/editor_plugin.h" -#include "scene/3d/gpu_particles_3d.h" -#include "scene/gui/spin_box.h" - -class ConfirmationDialog; -class HBoxContainer; -class MenuButton; -class OptionButton; -class SceneTreeDialog; - -class GPUParticles3DEditorBase : public Control { - GDCLASS(GPUParticles3DEditorBase, Control); - -protected: - Node3D *base_node = nullptr; - Panel *panel = nullptr; - MenuButton *options = nullptr; - HBoxContainer *particles_editor_hb = nullptr; - - SceneTreeDialog *emission_tree_dialog = nullptr; - - ConfirmationDialog *emission_dialog = nullptr; - SpinBox *emission_amount = nullptr; - OptionButton *emission_fill = nullptr; - - Vector<Face3> geometry; - - bool _generate(Vector<Vector3> &points, Vector<Vector3> &normals); - virtual void _generate_emission_points() {} - void _node_selected(const NodePath &p_path); - -public: - GPUParticles3DEditorBase(); -}; - -class GPUParticles3DEditor : public GPUParticles3DEditorBase { - GDCLASS(GPUParticles3DEditor, GPUParticles3DEditorBase); - - ConfirmationDialog *generate_aabb = nullptr; - SpinBox *generate_seconds = nullptr; - GPUParticles3D *node = nullptr; - - enum Menu { - MENU_OPTION_GENERATE_AABB, - MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE, - MENU_OPTION_CLEAR_EMISSION_VOLUME, - MENU_OPTION_CONVERT_TO_CPU_PARTICLES, - MENU_OPTION_RESTART, - - }; - - void _generate_aabb(); - - void _menu_option(int); - - friend class GPUParticles3DEditorPlugin; - - virtual void _generate_emission_points() override; - -protected: - void _notification(int p_notification); - void _node_removed(Node *p_node); - -public: - void edit(GPUParticles3D *p_particles); - GPUParticles3DEditor(); -}; - -class GPUParticles3DEditorPlugin : public EditorPlugin { - GDCLASS(GPUParticles3DEditorPlugin, EditorPlugin); - - GPUParticles3DEditor *particles_editor = nullptr; - -public: - virtual String get_name() const override { return "GPUParticles3D"; } - bool has_main_screen() const override { return false; } - virtual void edit(Object *p_object) override; - virtual bool handles(Object *p_object) const override; - virtual void make_visible(bool p_visible) override; - - GPUParticles3DEditorPlugin(); - ~GPUParticles3DEditorPlugin(); -}; - -#endif // GPU_PARTICLES_3D_EDITOR_PLUGIN_H diff --git a/editor/plugins/node_3d_editor_plugin.cpp b/editor/plugins/node_3d_editor_plugin.cpp index 0cf194b7fe..0df0274495 100644 --- a/editor/plugins/node_3d_editor_plugin.cpp +++ b/editor/plugins/node_3d_editor_plugin.cpp @@ -673,6 +673,7 @@ void Node3DEditorViewport::cancel_transform() { sp->set_global_transform(se->original); } + collision_reposition = false; finish_transform(); set_message(TTR("Transform Aborted."), 3); } @@ -1802,7 +1803,7 @@ void Node3DEditorViewport::_sinput(const Ref<InputEvent> &p_event) { if (b->is_pressed()) { clicked_wants_append = b->is_shift_pressed(); - if (_edit.mode != TRANSFORM_NONE && _edit.instant) { + if (_edit.mode != TRANSFORM_NONE && (_edit.instant || collision_reposition)) { commit_transform(); break; // just commit the edit, stop processing the event so we don't deselect the object } @@ -2398,10 +2399,10 @@ void Node3DEditorViewport::_sinput(const Ref<InputEvent> &p_event) { cancel_transform(); } if (!is_freelook_active() && !k->is_echo()) { - if (ED_IS_SHORTCUT("spatial_editor/instant_translate", p_event) && _edit.mode != TRANSFORM_TRANSLATE) { + if (ED_IS_SHORTCUT("spatial_editor/instant_translate", p_event) && (_edit.mode != TRANSFORM_TRANSLATE || collision_reposition)) { if (_edit.mode == TRANSFORM_NONE) { begin_transform(TRANSFORM_TRANSLATE, true); - } else if (_edit.instant) { + } else if (_edit.instant || collision_reposition) { commit_transform(); begin_transform(TRANSFORM_TRANSLATE, true); } @@ -2409,7 +2410,7 @@ void Node3DEditorViewport::_sinput(const Ref<InputEvent> &p_event) { if (ED_IS_SHORTCUT("spatial_editor/instant_rotate", p_event) && _edit.mode != TRANSFORM_ROTATE) { if (_edit.mode == TRANSFORM_NONE) { begin_transform(TRANSFORM_ROTATE, true); - } else if (_edit.instant) { + } else if (_edit.instant || collision_reposition) { commit_transform(); begin_transform(TRANSFORM_ROTATE, true); } @@ -2417,11 +2418,23 @@ void Node3DEditorViewport::_sinput(const Ref<InputEvent> &p_event) { if (ED_IS_SHORTCUT("spatial_editor/instant_scale", p_event) && _edit.mode != TRANSFORM_SCALE) { if (_edit.mode == TRANSFORM_NONE) { begin_transform(TRANSFORM_SCALE, true); - } else if (_edit.instant) { + } else if (_edit.instant || collision_reposition) { commit_transform(); begin_transform(TRANSFORM_SCALE, true); } } + if (ED_IS_SHORTCUT("spatial_editor/collision_reposition", p_event) && editor_selection->get_selected_node_list().size() == 1 && !collision_reposition) { + if (_edit.mode == TRANSFORM_NONE || _edit.instant) { + if (_edit.mode == TRANSFORM_NONE) { + _compute_edit(_edit.mouse_pos); + } else { + commit_transform(); + _compute_edit(_edit.mouse_pos); + } + _edit.mode = TRANSFORM_TRANSLATE; + collision_reposition = true; + } + } } // Freelook doesn't work in orthogonal mode. @@ -3072,11 +3085,24 @@ void Node3DEditorViewport::_notification(int p_what) { } break; case NOTIFICATION_PHYSICS_PROCESS: { + if (collision_reposition) { + List<Node *> &selection = editor_selection->get_selected_node_list(); + + if (selection.size() == 1) { + Node3D *first_selected_node = Object::cast_to<Node3D>(selection.front()->get()); + double snap = EDITOR_GET("interface/inspector/default_float_step"); + int snap_step_decimals = Math::range_step_decimals(snap); + set_message(TTR("Translating:") + " (" + String::num(first_selected_node->get_global_position().x, snap_step_decimals) + ", " + + String::num(first_selected_node->get_global_position().y, snap_step_decimals) + ", " + String::num(first_selected_node->get_global_position().z, snap_step_decimals) + ")"); + first_selected_node->set_global_position(spatial_editor->snap_point(_get_instance_position(_edit.mouse_pos, first_selected_node))); + } + } + if (!update_preview_node) { return; } if (preview_node->is_inside_tree()) { - preview_node_pos = spatial_editor->snap_point(_get_instance_position(preview_node_viewport_pos)); + preview_node_pos = spatial_editor->snap_point(_get_instance_position(preview_node_viewport_pos, preview_node)); double snap = EDITOR_GET("interface/inspector/default_float_step"); int snap_step_decimals = Math::range_step_decimals(snap); set_message(TTR("Instantiating:") + " (" + String::num(preview_node_pos.x, snap_step_decimals) + ", " + @@ -3966,7 +3992,7 @@ void Node3DEditorViewport::update_transform_gizmo_view() { return; } - bool show_gizmo = spatial_editor->is_gizmo_visible() && !_edit.instant && transform_gizmo_visible; + bool show_gizmo = spatial_editor->is_gizmo_visible() && !_edit.instant && transform_gizmo_visible && !collision_reposition; for (int i = 0; i < 3; i++) { Transform3D axis_angle; if (xform.basis.get_column(i).normalized().dot(xform.basis.get_column((i + 1) % 3).normalized()) < 1.0) { @@ -3997,7 +4023,7 @@ void Node3DEditorViewport::update_transform_gizmo_view() { xform.orthonormalize(); xform.basis.scale(scale); RenderingServer::get_singleton()->instance_set_transform(rotate_gizmo_instance[3], xform); - RenderingServer::get_singleton()->instance_set_visible(rotate_gizmo_instance[3], spatial_editor->is_gizmo_visible() && !_edit.instant && transform_gizmo_visible && (spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_SELECT || spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_ROTATE)); + RenderingServer::get_singleton()->instance_set_visible(rotate_gizmo_instance[3], spatial_editor->is_gizmo_visible() && !_edit.instant && transform_gizmo_visible && !collision_reposition && (spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_SELECT || spatial_editor->get_tool_mode() == Node3DEditor::TOOL_MODE_ROTATE)); } void Node3DEditorViewport::set_state(const Dictionary &p_state) { @@ -4241,7 +4267,19 @@ void Node3DEditorViewport::assign_pending_data_pointers(Node3D *p_preview_node, accept = p_accept; } -Vector3 Node3DEditorViewport::_get_instance_position(const Point2 &p_pos) const { +void _insert_rid_recursive(Node *node, HashSet<RID> &rids) { + CollisionObject3D *co = Object::cast_to<CollisionObject3D>(node); + if (co) { + rids.insert(co->get_rid()); + } + + for (int i = 0; i < node->get_child_count(); i++) { + Node *child = node->get_child(i); + _insert_rid_recursive(child, rids); + } +} + +Vector3 Node3DEditorViewport::_get_instance_position(const Point2 &p_pos, Node3D *p_node) const { const float MAX_DISTANCE = 50.0; const float FALLBACK_DISTANCE = 5.0; @@ -4250,13 +4288,28 @@ Vector3 Node3DEditorViewport::_get_instance_position(const Point2 &p_pos) const PhysicsDirectSpaceState3D *ss = get_tree()->get_root()->get_world_3d()->get_direct_space_state(); + HashSet<RID> rids; + + if (!preview_node->is_inside_tree()) { + List<Node *> &selection = editor_selection->get_selected_node_list(); + + Node3D *first_selected_node = Object::cast_to<Node3D>(selection.front()->get()); + + Array children = first_selected_node->get_children(); + + if (first_selected_node) { + _insert_rid_recursive(first_selected_node, rids); + } + } + PhysicsDirectSpaceState3D::RayParameters ray_params; + ray_params.exclude = rids; ray_params.from = world_pos; ray_params.to = world_pos + world_ray * camera->get_far(); PhysicsDirectSpaceState3D::RayResult result; - if (ss->intersect_ray(ray_params, result) && preview_node->get_child_count() > 0) { - // Calculate an offset for the `preview_node` such that the its bounding box is on top of and touching the contact surface's plane. + if (ss->intersect_ray(ray_params, result) && (preview_node->get_child_count() > 0 || !preview_node->is_inside_tree())) { + // Calculate an offset for the `p_node` such that the its bounding box is on top of and touching the contact surface's plane. // Use the Gram-Schmidt process to get an orthonormal Basis aligned with the surface normal. const Vector3 bb_basis_x = result.normal; @@ -4271,10 +4324,10 @@ Vector3 Node3DEditorViewport::_get_instance_position(const Point2 &p_pos) const const Basis bb_basis = Basis(bb_basis_x, bb_basis_y, bb_basis_z); // This normal-aligned Basis allows us to create an AABB that can fit on the surface plane as snugly as possible. - const Transform3D bb_transform = Transform3D(bb_basis, preview_node->get_transform().origin); - const AABB preview_node_bb = _calculate_spatial_bounds(preview_node, true, &bb_transform); - // The x-axis's alignment with the surface normal also makes it trivial to get the distance from `preview_node`'s origin at (0, 0, 0) to the correct AABB face. - const float offset_distance = -preview_node_bb.position.x; + const Transform3D bb_transform = Transform3D(bb_basis, p_node->get_transform().origin); + const AABB p_node_bb = _calculate_spatial_bounds(p_node, true, &bb_transform); + // The x-axis's alignment with the surface normal also makes it trivial to get the distance from `p_node`'s origin at (0, 0, 0) to the correct AABB face. + const float offset_distance = -p_node_bb.position.x; // `result_offset` is in global space. const Vector3 result_offset = result.position + result.normal * offset_distance; @@ -4912,6 +4965,7 @@ void Node3DEditorViewport::commit_transform() { } undo_redo->commit_action(); + collision_reposition = false; finish_transform(); set_message(""); } @@ -5509,6 +5563,7 @@ Node3DEditorViewport::Node3DEditorViewport(Node3DEditor *p_spatial_editor, int p ED_SHORTCUT("spatial_editor/instant_translate", TTR("Begin Translate Transformation")); ED_SHORTCUT("spatial_editor/instant_rotate", TTR("Begin Rotate Transformation")); ED_SHORTCUT("spatial_editor/instant_scale", TTR("Begin Scale Transformation")); + ED_SHORTCUT("spatial_editor/collision_reposition", TTR("Reposition Using Collisions"), KeyModifierMask::SHIFT | Key::G); preview_camera = memnew(CheckBox); preview_camera->set_text(TTR("Preview")); diff --git a/editor/plugins/node_3d_editor_plugin.h b/editor/plugins/node_3d_editor_plugin.h index a8cade36fd..1b03362606 100644 --- a/editor/plugins/node_3d_editor_plugin.h +++ b/editor/plugins/node_3d_editor_plugin.h @@ -245,6 +245,7 @@ private: bool auto_orthogonal; bool lock_rotation; bool transform_gizmo_visible = true; + bool collision_reposition = false; real_t gizmo_scale; bool freelook_active; @@ -338,7 +339,6 @@ private: TRANSFORM_ROTATE, TRANSFORM_TRANSLATE, TRANSFORM_SCALE - }; enum TransformPlane { TRANSFORM_VIEW, @@ -471,7 +471,7 @@ private: void _list_select(Ref<InputEventMouseButton> b); Point2 _get_warped_mouse_motion(const Ref<InputEventMouseMotion> &p_ev_mouse_motion) const; - Vector3 _get_instance_position(const Point2 &p_pos) const; + Vector3 _get_instance_position(const Point2 &p_pos, Node3D *p_node) const; static AABB _calculate_spatial_bounds(const Node3D *p_parent, bool p_omit_top_level = false, const Transform3D *p_bounds_orientation = nullptr); Node *_sanitize_preview_node(Node *p_node) const; diff --git a/editor/plugins/particles_editor_plugin.cpp b/editor/plugins/particles_editor_plugin.cpp new file mode 100644 index 0000000000..34f5dcf963 --- /dev/null +++ b/editor/plugins/particles_editor_plugin.cpp @@ -0,0 +1,968 @@ +/**************************************************************************/ +/* particles_editor_plugin.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "particles_editor_plugin.h" + +#include "canvas_item_editor_plugin.h" +#include "core/io/image_loader.h" +#include "editor/editor_node.h" +#include "editor/editor_settings.h" +#include "editor/editor_undo_redo_manager.h" +#include "editor/gui/editor_file_dialog.h" +#include "editor/scene_tree_dock.h" +#include "scene/2d/cpu_particles_2d.h" +#include "scene/2d/gpu_particles_2d.h" +#include "scene/3d/cpu_particles_3d.h" +#include "scene/3d/gpu_particles_3d.h" +#include "scene/3d/mesh_instance_3d.h" +#include "scene/gui/box_container.h" +#include "scene/gui/menu_button.h" +#include "scene/gui/separator.h" +#include "scene/gui/spin_box.h" +#include "scene/resources/image_texture.h" +#include "scene/resources/particle_process_material.h" + +void ParticlesEditorPlugin::_notification(int p_what) { + switch (p_what) { + case NOTIFICATION_ENTER_TREE: { + if (handled_type.ends_with("2D")) { + add_control_to_container(CONTAINER_CANVAS_EDITOR_MENU, toolbar); + } else if (handled_type.ends_with("3D")) { + add_control_to_container(CONTAINER_SPATIAL_EDITOR_MENU, toolbar); + } else { + DEV_ASSERT(false); + } + + menu->set_icon(menu->get_editor_theme_icon(handled_type)); + menu->set_text(handled_type); + + PopupMenu *popup = menu->get_popup(); + popup->add_shortcut(ED_SHORTCUT("particles/restart_emission", TTR("Restart Emission"), KeyModifierMask::CTRL | Key::R), MENU_RESTART); + _add_menu_options(popup); + popup->add_item(conversion_option_name, MENU_OPTION_CONVERT); + } break; + } +} + +bool ParticlesEditorPlugin::need_show_lifetime_dialog(SpinBox *p_seconds) { + // Add one second to the default generation lifetime, since the progress is updated every second. + p_seconds->set_value(MAX(1.0, trunc(edited_node->get("lifetime").operator double()) + 1.0)); + + if (p_seconds->get_value() >= 11.0 + CMP_EPSILON) { + // Only pop up the time dialog if the particle's lifetime is long enough to warrant shortening it. + return true; + } else { + // Generate the visibility rect/AABB immediately. + return false; + } +} + +void ParticlesEditorPlugin::_menu_callback(int p_idx) { + switch (p_idx) { + case MENU_OPTION_CONVERT: { + Node *converted_node = _convert_particles(); + + EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); + ur->create_action(conversion_option_name, UndoRedo::MERGE_DISABLE, edited_node); + SceneTreeDock::get_singleton()->replace_node(edited_node, converted_node); + ur->commit_action(false); + } break; + + case MENU_RESTART: { + edited_node->call("restart"); + } + } +} + +void ParticlesEditorPlugin::edit(Object *p_object) { + edited_node = Object::cast_to<Node>(p_object); +} + +bool ParticlesEditorPlugin::handles(Object *p_object) const { + return p_object->is_class(handled_type); +} + +void ParticlesEditorPlugin::make_visible(bool p_visible) { + toolbar->set_visible(p_visible); +} + +ParticlesEditorPlugin::ParticlesEditorPlugin() { + toolbar = memnew(HBoxContainer); + toolbar->hide(); + + menu = memnew(MenuButton); + menu->set_switch_on_hover(true); + toolbar->add_child(menu); + menu->get_popup()->connect(SceneStringName(id_pressed), callable_mp(this, &ParticlesEditorPlugin::_menu_callback)); +} + +// 2D ///////////////////////////////////////////// + +void GPUParticles2DEditorPlugin::_menu_callback(int p_idx) { + if (p_idx == MENU_GENERATE_VISIBILITY_RECT) { + if (need_show_lifetime_dialog(generate_seconds)) { + generate_visibility_rect->popup_centered(); + } else { + _generate_visibility_rect(); + } + } else { + Particles2DEditorPlugin::_menu_callback(p_idx); + } +} + +void GPUParticles2DEditorPlugin::_add_menu_options(PopupMenu *p_menu) { + Particles2DEditorPlugin::_add_menu_options(p_menu); + p_menu->add_item(TTR("Generate Visibility Rect"), MENU_GENERATE_VISIBILITY_RECT); +} + +void Particles2DEditorPlugin::_file_selected(const String &p_file) { + source_emission_file = p_file; + emission_mask->popup_centered(); +} + +void Particles2DEditorPlugin::_get_base_emission_mask(PackedVector2Array &r_valid_positions, PackedVector2Array &r_valid_normals, PackedByteArray &r_valid_colors, Vector2i &r_image_size) { + Ref<Image> img; + img.instantiate(); + Error err = ImageLoader::load_image(source_emission_file, img); + ERR_FAIL_COND_MSG(err != OK, "Error loading image '" + source_emission_file + "'."); + + if (img->is_compressed()) { + img->decompress(); + } + img->convert(Image::FORMAT_RGBA8); + ERR_FAIL_COND(img->get_format() != Image::FORMAT_RGBA8); + Size2i s = img->get_size(); + ERR_FAIL_COND(s.width == 0 || s.height == 0); + + r_image_size = s; + + r_valid_positions.resize(s.width * s.height); + + EmissionMode emode = (EmissionMode)emission_mask_mode->get_selected(); + + if (emode == EMISSION_MODE_BORDER_DIRECTED) { + r_valid_normals.resize(s.width * s.height); + } + + bool capture_colors = emission_colors->is_pressed(); + + if (capture_colors) { + r_valid_colors.resize(s.width * s.height * 4); + } + + int vpc = 0; + + { + Vector<uint8_t> img_data = img->get_data(); + const uint8_t *r = img_data.ptr(); + + for (int i = 0; i < s.width; i++) { + for (int j = 0; j < s.height; j++) { + uint8_t a = r[(j * s.width + i) * 4 + 3]; + + if (a > 128) { + if (emode == EMISSION_MODE_SOLID) { + if (capture_colors) { + r_valid_colors.write[vpc * 4 + 0] = r[(j * s.width + i) * 4 + 0]; + r_valid_colors.write[vpc * 4 + 1] = r[(j * s.width + i) * 4 + 1]; + r_valid_colors.write[vpc * 4 + 2] = r[(j * s.width + i) * 4 + 2]; + r_valid_colors.write[vpc * 4 + 3] = r[(j * s.width + i) * 4 + 3]; + } + r_valid_positions.write[vpc++] = Point2(i, j); + + } else { + bool on_border = false; + for (int x = i - 1; x <= i + 1; x++) { + for (int y = j - 1; y <= j + 1; y++) { + if (x < 0 || y < 0 || x >= s.width || y >= s.height || r[(y * s.width + x) * 4 + 3] <= 128) { + on_border = true; + break; + } + } + + if (on_border) { + break; + } + } + + if (on_border) { + r_valid_positions.write[vpc] = Point2(i, j); + + if (emode == EMISSION_MODE_BORDER_DIRECTED) { + Vector2 normal; + for (int x = i - 2; x <= i + 2; x++) { + for (int y = j - 2; y <= j + 2; y++) { + if (x == i && y == j) { + continue; + } + + if (x < 0 || y < 0 || x >= s.width || y >= s.height || r[(y * s.width + x) * 4 + 3] <= 128) { + normal += Vector2(x - i, y - j).normalized(); + } + } + } + + normal.normalize(); + r_valid_normals.write[vpc] = normal; + } + + if (capture_colors) { + r_valid_colors.write[vpc * 4 + 0] = r[(j * s.width + i) * 4 + 0]; + r_valid_colors.write[vpc * 4 + 1] = r[(j * s.width + i) * 4 + 1]; + r_valid_colors.write[vpc * 4 + 2] = r[(j * s.width + i) * 4 + 2]; + r_valid_colors.write[vpc * 4 + 3] = r[(j * s.width + i) * 4 + 3]; + } + + vpc++; + } + } + } + } + } + } + + r_valid_positions.resize(vpc); + if (!r_valid_normals.is_empty()) { + r_valid_normals.resize(vpc); + } +} + +Particles2DEditorPlugin::Particles2DEditorPlugin() { + file = memnew(EditorFileDialog); + + List<String> ext; + ImageLoader::get_recognized_extensions(&ext); + for (const String &E : ext) { + file->add_filter("*." + E, E.to_upper()); + } + + file->set_file_mode(EditorFileDialog::FILE_MODE_OPEN_FILE); + EditorNode::get_singleton()->get_gui_base()->add_child(file); + file->connect("file_selected", callable_mp(this, &Particles2DEditorPlugin::_file_selected)); + + emission_mask = memnew(ConfirmationDialog); + emission_mask->set_title(TTR("Load Emission Mask")); + + VBoxContainer *emvb = memnew(VBoxContainer); + emission_mask->add_child(emvb); + + emission_mask_mode = memnew(OptionButton); + emission_mask_mode->add_item(TTR("Solid Pixels"), EMISSION_MODE_SOLID); + emission_mask_mode->add_item(TTR("Border Pixels"), EMISSION_MODE_BORDER); + emission_mask_mode->add_item(TTR("Directed Border Pixels"), EMISSION_MODE_BORDER_DIRECTED); + emvb->add_margin_child(TTR("Emission Mask"), emission_mask_mode); + + VBoxContainer *optionsvb = memnew(VBoxContainer); + emvb->add_margin_child(TTR("Options"), optionsvb); + + emission_mask_centered = memnew(CheckBox(TTR("Centered"))); + optionsvb->add_child(emission_mask_centered); + emission_colors = memnew(CheckBox(TTR("Capture Colors from Pixel"))); + optionsvb->add_child(emission_colors); + + EditorNode::get_singleton()->get_gui_base()->add_child(emission_mask); + + emission_mask->connect(SceneStringName(confirmed), callable_mp(this, &Particles2DEditorPlugin::_generate_emission_mask)); +} + +void GPUParticles2DEditorPlugin::_selection_changed() { + List<Node *> selected_nodes = EditorNode::get_singleton()->get_editor_selection()->get_selected_node_list(); + if (selected_particles.is_empty() && selected_nodes.is_empty()) { + return; + } + + for (GPUParticles2D *particles : selected_particles) { + particles->set_show_visibility_rect(false); + } + selected_particles.clear(); + + for (Node *node : selected_nodes) { + GPUParticles2D *selected_particle = Object::cast_to<GPUParticles2D>(node); + if (selected_particle) { + selected_particle->set_show_visibility_rect(true); + selected_particles.push_back(selected_particle); + } + } +} + +void GPUParticles2DEditorPlugin::_generate_visibility_rect() { + GPUParticles2D *particles = Object::cast_to<GPUParticles2D>(edited_node); + + double time = generate_seconds->get_value(); + + float running = 0.0; + + EditorProgress ep("gen_vrect", TTR("Generating Visibility Rect (Waiting for Particle Simulation)"), int(time)); + + bool was_emitting = particles->is_emitting(); + if (!was_emitting) { + particles->set_emitting(true); + OS::get_singleton()->delay_usec(1000); + } + + Rect2 rect; + while (running < time) { + uint64_t ticks = OS::get_singleton()->get_ticks_usec(); + ep.step(TTR("Generating..."), int(running), true); + OS::get_singleton()->delay_usec(1000); + + Rect2 capture = particles->capture_rect(); + if (rect == Rect2()) { + rect = capture; + } else { + rect = rect.merge(capture); + } + + running += (OS::get_singleton()->get_ticks_usec() - ticks) / 1000000.0; + } + + if (!was_emitting) { + particles->set_emitting(false); + } + + EditorUndoRedoManager *undo_redo = EditorUndoRedoManager::get_singleton(); + undo_redo->create_action(TTR("Generate Visibility Rect")); + undo_redo->add_do_method(particles, "set_visibility_rect", rect); + undo_redo->add_undo_method(particles, "set_visibility_rect", particles->get_visibility_rect()); + undo_redo->commit_action(); +} + +void GPUParticles2DEditorPlugin::_notification(int p_what) { + switch (p_what) { + case NOTIFICATION_ENTER_TREE: { + EditorNode::get_singleton()->get_editor_selection()->connect("selection_changed", callable_mp(this, &GPUParticles2DEditorPlugin::_selection_changed)); + } break; + } +} + +void Particles2DEditorPlugin::_menu_callback(int p_idx) { + if (p_idx == MENU_LOAD_EMISSION_MASK) { + file->popup_file_dialog(); + } else { + ParticlesEditorPlugin::_menu_callback(p_idx); + } +} + +void Particles2DEditorPlugin::_add_menu_options(PopupMenu *p_menu) { + p_menu->add_item(TTR("Load Emission Mask"), MENU_LOAD_EMISSION_MASK); +} + +Node *GPUParticles2DEditorPlugin::_convert_particles() { + GPUParticles2D *particles = Object::cast_to<GPUParticles2D>(edited_node); + + CPUParticles2D *cpu_particles = memnew(CPUParticles2D); + cpu_particles->convert_from_particles(particles); + cpu_particles->set_name(particles->get_name()); + cpu_particles->set_transform(particles->get_transform()); + cpu_particles->set_visible(particles->is_visible()); + cpu_particles->set_process_mode(particles->get_process_mode()); + cpu_particles->set_z_index(particles->get_z_index()); + return cpu_particles; +} + +void GPUParticles2DEditorPlugin::_generate_emission_mask() { + GPUParticles2D *particles = Object::cast_to<GPUParticles2D>(edited_node); + Ref<ParticleProcessMaterial> pm = particles->get_process_material(); + if (pm.is_null()) { + EditorNode::get_singleton()->show_warning(TTR("Can only set point into a ParticleProcessMaterial process material")); + return; + } + + PackedVector2Array valid_positions; + PackedVector2Array valid_normals; + PackedByteArray valid_colors; + Vector2i image_size; + _get_base_emission_mask(valid_positions, valid_normals, valid_colors, image_size); + + ERR_FAIL_COND_MSG(valid_positions.is_empty(), "No pixels with transparency > 128 in image..."); + + Vector<uint8_t> texdata; + + int vpc = valid_positions.size(); + int w = 2048; + int h = (vpc / 2048) + 1; + + texdata.resize(w * h * 2 * sizeof(float)); + + { + Vector2 offset; + if (emission_mask_centered->is_pressed()) { + offset = Vector2(-image_size.width * 0.5, -image_size.height * 0.5); + } + + uint8_t *tw = texdata.ptrw(); + float *twf = reinterpret_cast<float *>(tw); + for (int i = 0; i < vpc; i++) { + twf[i * 2 + 0] = valid_positions[i].x + offset.x; + twf[i * 2 + 1] = valid_positions[i].y + offset.y; + } + } + + Ref<Image> img; + img.instantiate(); + img->set_data(w, h, false, Image::FORMAT_RGF, texdata); + pm->set_emission_point_texture(ImageTexture::create_from_image(img)); + pm->set_emission_point_count(vpc); + + if (emission_colors->is_pressed()) { + Vector<uint8_t> colordata; + colordata.resize(w * h * 4); //use RG texture + + { + uint8_t *tw = colordata.ptrw(); + for (int i = 0; i < vpc * 4; i++) { + tw[i] = valid_colors[i]; + } + } + + img.instantiate(); + img->set_data(w, h, false, Image::FORMAT_RGBA8, colordata); + pm->set_emission_color_texture(ImageTexture::create_from_image(img)); + } + + if (valid_normals.size()) { + pm->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_DIRECTED_POINTS); + + Vector<uint8_t> normdata; + normdata.resize(w * h * 2 * sizeof(float)); //use RG texture + + { + uint8_t *tw = normdata.ptrw(); + float *twf = reinterpret_cast<float *>(tw); + for (int i = 0; i < vpc; i++) { + twf[i * 2 + 0] = valid_normals[i].x; + twf[i * 2 + 1] = valid_normals[i].y; + } + } + + img.instantiate(); + img->set_data(w, h, false, Image::FORMAT_RGF, normdata); + pm->set_emission_normal_texture(ImageTexture::create_from_image(img)); + + } else { + pm->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_POINTS); + } +} + +GPUParticles2DEditorPlugin::GPUParticles2DEditorPlugin() { + handled_type = "GPUParticles2D"; // TTR("GPUParticles2D") + conversion_option_name = TTR("Convert to CPUParticles2D"); + + generate_visibility_rect = memnew(ConfirmationDialog); + generate_visibility_rect->set_title(TTR("Generate Visibility Rect")); + + VBoxContainer *genvb = memnew(VBoxContainer); + generate_visibility_rect->add_child(genvb); + + generate_seconds = memnew(SpinBox); + generate_seconds->set_min(0.1); + generate_seconds->set_max(25); + generate_seconds->set_value(2); + genvb->add_margin_child(TTR("Generation Time (sec):"), generate_seconds); + + EditorNode::get_singleton()->get_gui_base()->add_child(generate_visibility_rect); + + generate_visibility_rect->connect(SceneStringName(confirmed), callable_mp(this, &GPUParticles2DEditorPlugin::_generate_visibility_rect)); +} + +Node *CPUParticles2DEditorPlugin::_convert_particles() { + CPUParticles2D *particles = Object::cast_to<CPUParticles2D>(edited_node); + + GPUParticles2D *gpu_particles = memnew(GPUParticles2D); + gpu_particles->convert_from_particles(particles); + gpu_particles->set_name(particles->get_name()); + gpu_particles->set_transform(particles->get_transform()); + gpu_particles->set_visible(particles->is_visible()); + gpu_particles->set_process_mode(particles->get_process_mode()); + return gpu_particles; +} + +void CPUParticles2DEditorPlugin::_generate_emission_mask() { + CPUParticles2D *particles = Object::cast_to<CPUParticles2D>(edited_node); + + PackedVector2Array valid_positions; + PackedVector2Array valid_normals; + PackedByteArray valid_colors; + Vector2i image_size; + _get_base_emission_mask(valid_positions, valid_normals, valid_colors, image_size); + + ERR_FAIL_COND_MSG(valid_positions.is_empty(), "No pixels with transparency > 128 in image..."); + + int vpc = valid_positions.size(); + if (emission_colors->is_pressed()) { + PackedColorArray pca; + pca.resize(vpc); + Color *pcaw = pca.ptrw(); + for (int i = 0; i < vpc; i += 1) { + Color color; + color.r = valid_colors[i * 4 + 0] / 255.0f; + color.g = valid_colors[i * 4 + 1] / 255.0f; + color.b = valid_colors[i * 4 + 2] / 255.0f; + color.a = valid_colors[i * 4 + 3] / 255.0f; + pcaw[i] = color; + } + particles->set_emission_colors(pca); + } + + if (valid_normals.size()) { + particles->set_emission_shape(CPUParticles2D::EMISSION_SHAPE_DIRECTED_POINTS); + PackedVector2Array norms; + norms.resize(valid_normals.size()); + Vector2 *normsw = norms.ptrw(); + for (int i = 0; i < valid_normals.size(); i += 1) { + normsw[i] = valid_normals[i]; + } + particles->set_emission_normals(norms); + } else { + particles->set_emission_shape(CPUParticles2D::EMISSION_SHAPE_POINTS); + } + + { + Vector2 offset; + if (emission_mask_centered->is_pressed()) { + offset = Vector2(-image_size.width * 0.5, -image_size.height * 0.5); + } + + PackedVector2Array points; + points.resize(valid_positions.size()); + Vector2 *pointsw = points.ptrw(); + for (int i = 0; i < valid_positions.size(); i += 1) { + pointsw[i] = valid_positions[i] + offset; + } + particles->set_emission_points(points); + } +} + +CPUParticles2DEditorPlugin::CPUParticles2DEditorPlugin() { + handled_type = "CPUParticles2D"; // TTR("CPUParticles2D") + conversion_option_name = TTR("Convert to GPUParticles2D"); +} + +// 3D ///////////////////////////////////////////// + +void Particles3DEditorPlugin::_generate_aabb() { + double time = generate_seconds->get_value(); + + double running = 0.0; + + EditorProgress ep("gen_aabb", TTR("Generating Visibility AABB (Waiting for Particle Simulation)"), int(time)); + + bool was_emitting = edited_node->get("emitting"); + if (!was_emitting) { + edited_node->set("emitting", true); + OS::get_singleton()->delay_usec(1000); + } + + AABB rect; + Callable capture_aabb = Callable(edited_node, "capture_aabb"); + + while (running < time) { + uint64_t ticks = OS::get_singleton()->get_ticks_usec(); + ep.step(TTR("Generating..."), int(running), true); + OS::get_singleton()->delay_usec(1000); + + AABB capture = capture_aabb.call(); + if (rect == AABB()) { + rect = capture; + } else { + rect.merge_with(capture); + } + + running += (OS::get_singleton()->get_ticks_usec() - ticks) / 1000000.0; + } + + if (!was_emitting) { + edited_node->set("emitting", false); + } + + EditorUndoRedoManager *ur = EditorUndoRedoManager::get_singleton(); + ur->create_action(TTR("Generate Visibility AABB")); + ur->add_do_property(edited_node, "visibility_aabb", rect); + ur->add_undo_property(edited_node, "visibility_aabb", edited_node->get("visibility_aabb")); + ur->commit_action(); +} + +void Particles3DEditorPlugin::_node_selected(const NodePath &p_path) { + Node *sel = get_node(p_path); + if (!sel) { + return; + } + + if (!sel->is_class("Node3D")) { + EditorNode::get_singleton()->show_warning(vformat(TTR("\"%s\" doesn't inherit from Node3D."), sel->get_name())); + return; + } + + MeshInstance3D *mi = Object::cast_to<MeshInstance3D>(sel); + if (!mi || mi->get_mesh().is_null()) { + EditorNode::get_singleton()->show_warning(vformat(TTR("\"%s\" doesn't contain geometry."), sel->get_name())); + return; + } + + geometry = mi->get_mesh()->get_faces(); + if (geometry.size() == 0) { + EditorNode::get_singleton()->show_warning(vformat(TTR("\"%s\" doesn't contain face geometry."), sel->get_name())); + return; + } + + Transform3D geom_xform = edited_node->get("global_transform"); + geom_xform = geom_xform.affine_inverse() * mi->get_global_transform(); + int gc = geometry.size(); + Face3 *w = geometry.ptrw(); + + for (int i = 0; i < gc; i++) { + for (int j = 0; j < 3; j++) { + w[i].vertex[j] = geom_xform.xform(w[i].vertex[j]); + } + } + emission_dialog->popup_centered(Size2(300, 130)); +} + +void Particles3DEditorPlugin::_menu_callback(int p_idx) { + switch (p_idx) { + case MENU_OPTION_GENERATE_AABB: { + if (need_show_lifetime_dialog(generate_seconds)) { + generate_aabb->popup_centered(); + } else { + _generate_aabb(); + } + } break; + + case MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE: { + if (_can_generate_points()) { + emission_tree_dialog->popup_scenetree_dialog(); + } + } break; + + default: { + ParticlesEditorPlugin::_menu_callback(p_idx); + } + } +} + +void Particles3DEditorPlugin::_add_menu_options(PopupMenu *p_menu) { + p_menu->add_item(TTR("Generate AABB"), MENU_OPTION_GENERATE_AABB); + p_menu->add_item(TTR("Create Emission Points From Node"), MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE); +} + +bool Particles3DEditorPlugin::_generate(Vector<Vector3> &r_points, Vector<Vector3> &r_normals) { + bool use_normals = emission_fill->get_selected() == 1; + + if (emission_fill->get_selected() < 2) { + float area_accum = 0; + RBMap<float, int> triangle_area_map; + + for (int i = 0; i < geometry.size(); i++) { + float area = geometry[i].get_area(); + if (area < CMP_EPSILON) { + continue; + } + triangle_area_map[area_accum] = i; + area_accum += area; + } + + if (!triangle_area_map.size() || area_accum == 0) { + EditorNode::get_singleton()->show_warning(TTR("The geometry's faces don't contain any area.")); + return false; + } + + int emissor_count = emission_amount->get_value(); + + for (int i = 0; i < emissor_count; i++) { + float areapos = Math::random(0.0f, area_accum); + + RBMap<float, int>::Iterator E = triangle_area_map.find_closest(areapos); + ERR_FAIL_COND_V(!E, false); + int index = E->value; + ERR_FAIL_INDEX_V(index, geometry.size(), false); + + // ok FINALLY get face + Face3 face = geometry[index]; + //now compute some position inside the face... + + Vector3 pos = face.get_random_point_inside(); + + r_points.push_back(pos); + + if (use_normals) { + Vector3 normal = face.get_plane().normal; + r_normals.push_back(normal); + } + } + } else { + int gcount = geometry.size(); + + if (gcount == 0) { + EditorNode::get_singleton()->show_warning(TTR("The geometry doesn't contain any faces.")); + return false; + } + + const Face3 *r = geometry.ptr(); + + AABB aabb; + + for (int i = 0; i < gcount; i++) { + for (int j = 0; j < 3; j++) { + if (i == 0 && j == 0) { + aabb.position = r[i].vertex[j]; + } else { + aabb.expand_to(r[i].vertex[j]); + } + } + } + + int emissor_count = emission_amount->get_value(); + + for (int i = 0; i < emissor_count; i++) { + int attempts = 5; + + for (int j = 0; j < attempts; j++) { + Vector3 dir; + dir[Math::rand() % 3] = 1.0; + Vector3 ofs = (Vector3(1, 1, 1) - dir) * Vector3(Math::randf(), Math::randf(), Math::randf()) * aabb.size + aabb.position; + + Vector3 ofsv = ofs + aabb.size * dir; + + //space it a little + ofs -= dir; + ofsv += dir; + + float max = -1e7, min = 1e7; + + for (int k = 0; k < gcount; k++) { + const Face3 &f3 = r[k]; + + Vector3 res; + if (f3.intersects_segment(ofs, ofsv, &res)) { + res -= ofs; + float d = dir.dot(res); + + if (d < min) { + min = d; + } + if (d > max) { + max = d; + } + } + } + + if (max < min) { + continue; //lost attempt + } + + float val = min + (max - min) * Math::randf(); + + Vector3 point = ofs + dir * val; + + r_points.push_back(point); + break; + } + } + } + return true; +} + +Particles3DEditorPlugin::Particles3DEditorPlugin() { + generate_aabb = memnew(ConfirmationDialog); + generate_aabb->set_title(TTR("Generate Visibility AABB")); + + VBoxContainer *genvb = memnew(VBoxContainer); + generate_aabb->add_child(genvb); + + generate_seconds = memnew(SpinBox); + generate_seconds->set_min(0.1); + generate_seconds->set_max(25); + generate_seconds->set_value(2); + genvb->add_margin_child(TTR("Generation Time (sec):"), generate_seconds); + + EditorNode::get_singleton()->get_gui_base()->add_child(generate_aabb); + + generate_aabb->connect(SceneStringName(confirmed), callable_mp(this, &Particles3DEditorPlugin::_generate_aabb)); + + emission_tree_dialog = memnew(SceneTreeDialog); + Vector<StringName> valid_types; + valid_types.push_back("MeshInstance3D"); + emission_tree_dialog->set_valid_types(valid_types); + EditorNode::get_singleton()->get_gui_base()->add_child(emission_tree_dialog); + emission_tree_dialog->connect("selected", callable_mp(this, &Particles3DEditorPlugin::_node_selected)); + + emission_dialog = memnew(ConfirmationDialog); + emission_dialog->set_title(TTR("Create Emitter")); + EditorNode::get_singleton()->get_gui_base()->add_child(emission_dialog); + + VBoxContainer *emd_vb = memnew(VBoxContainer); + emission_dialog->add_child(emd_vb); + + emission_amount = memnew(SpinBox); + emission_amount->set_min(1); + emission_amount->set_max(100000); + emission_amount->set_value(512); + emd_vb->add_margin_child(TTR("Emission Points:"), emission_amount); + + emission_fill = memnew(OptionButton); + emission_fill->add_item(TTR("Surface Points")); + emission_fill->add_item(TTR("Surface Points+Normal (Directed)")); + emission_fill->add_item(TTR("Volume")); + emd_vb->add_margin_child(TTR("Emission Source:"), emission_fill); + + emission_dialog->set_ok_button_text(TTR("Create")); + emission_dialog->connect(SceneStringName(confirmed), callable_mp(this, &Particles3DEditorPlugin::_generate_emission_points)); +} + +Node *GPUParticles3DEditorPlugin::_convert_particles() { + GPUParticles3D *particles = Object::cast_to<GPUParticles3D>(edited_node); + + CPUParticles3D *cpu_particles = memnew(CPUParticles3D); + cpu_particles->convert_from_particles(particles); + cpu_particles->set_name(particles->get_name()); + cpu_particles->set_transform(particles->get_transform()); + cpu_particles->set_visible(particles->is_visible()); + cpu_particles->set_process_mode(particles->get_process_mode()); + return cpu_particles; +} + +bool GPUParticles3DEditorPlugin::_can_generate_points() const { + GPUParticles3D *particles = Object::cast_to<GPUParticles3D>(edited_node); + Ref<ParticleProcessMaterial> mat = particles->get_process_material(); + if (mat.is_null()) { + EditorNode::get_singleton()->show_warning(TTR("A processor material of type 'ParticleProcessMaterial' is required.")); + return false; + } + return true; +} + +void GPUParticles3DEditorPlugin::_generate_emission_points() { + GPUParticles3D *particles = Object::cast_to<GPUParticles3D>(edited_node); + + /// hacer codigo aca + Vector<Vector3> points; + Vector<Vector3> normals; + + if (!_generate(points, normals)) { + return; + } + + int point_count = points.size(); + + int w = 2048; + int h = (point_count / 2048) + 1; + + Vector<uint8_t> point_img; + point_img.resize(w * h * 3 * sizeof(float)); + + { + uint8_t *iw = point_img.ptrw(); + memset(iw, 0, w * h * 3 * sizeof(float)); + const Vector3 *r = points.ptr(); + float *wf = reinterpret_cast<float *>(iw); + for (int i = 0; i < point_count; i++) { + wf[i * 3 + 0] = r[i].x; + wf[i * 3 + 1] = r[i].y; + wf[i * 3 + 2] = r[i].z; + } + } + + Ref<Image> image = memnew(Image(w, h, false, Image::FORMAT_RGBF, point_img)); + Ref<ImageTexture> tex = ImageTexture::create_from_image(image); + + Ref<ParticleProcessMaterial> mat = particles->get_process_material(); + ERR_FAIL_COND(mat.is_null()); + + if (normals.size() > 0) { + mat->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_DIRECTED_POINTS); + mat->set_emission_point_count(point_count); + mat->set_emission_point_texture(tex); + + Vector<uint8_t> point_img2; + point_img2.resize(w * h * 3 * sizeof(float)); + + { + uint8_t *iw = point_img2.ptrw(); + memset(iw, 0, w * h * 3 * sizeof(float)); + const Vector3 *r = normals.ptr(); + float *wf = reinterpret_cast<float *>(iw); + for (int i = 0; i < point_count; i++) { + wf[i * 3 + 0] = r[i].x; + wf[i * 3 + 1] = r[i].y; + wf[i * 3 + 2] = r[i].z; + } + } + + Ref<Image> image2 = memnew(Image(w, h, false, Image::FORMAT_RGBF, point_img2)); + mat->set_emission_normal_texture(ImageTexture::create_from_image(image2)); + } else { + mat->set_emission_shape(ParticleProcessMaterial::EMISSION_SHAPE_POINTS); + mat->set_emission_point_count(point_count); + mat->set_emission_point_texture(tex); + } +} + +GPUParticles3DEditorPlugin::GPUParticles3DEditorPlugin() { + handled_type = "GPUParticles3D"; // TTR("GPUParticles3D") + conversion_option_name = TTR("Convert to CPUParticles3D"); +} + +Node *CPUParticles3DEditorPlugin::_convert_particles() { + CPUParticles3D *particles = Object::cast_to<CPUParticles3D>(edited_node); + + GPUParticles3D *gpu_particles = memnew(GPUParticles3D); + gpu_particles->convert_from_particles(particles); + gpu_particles->set_name(particles->get_name()); + gpu_particles->set_transform(particles->get_transform()); + gpu_particles->set_visible(particles->is_visible()); + gpu_particles->set_process_mode(particles->get_process_mode()); + return gpu_particles; +} + +void CPUParticles3DEditorPlugin::_generate_emission_points() { + CPUParticles3D *particles = Object::cast_to<CPUParticles3D>(edited_node); + + /// hacer codigo aca + Vector<Vector3> points; + Vector<Vector3> normals; + + if (!_generate(points, normals)) { + return; + } + + if (normals.is_empty()) { + particles->set_emission_shape(CPUParticles3D::EMISSION_SHAPE_POINTS); + particles->set_emission_points(points); + } else { + particles->set_emission_shape(CPUParticles3D::EMISSION_SHAPE_DIRECTED_POINTS); + particles->set_emission_points(points); + particles->set_emission_normals(normals); + } +} + +CPUParticles3DEditorPlugin::CPUParticles3DEditorPlugin() { + handled_type = "CPUParticles3D"; // TTR("CPUParticles3D") + conversion_option_name = TTR("Convert to GPUParticles3D"); +} diff --git a/editor/plugins/particles_editor_plugin.h b/editor/plugins/particles_editor_plugin.h new file mode 100644 index 0000000000..d4459765c4 --- /dev/null +++ b/editor/plugins/particles_editor_plugin.h @@ -0,0 +1,216 @@ +/**************************************************************************/ +/* particles_editor_plugin.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef PARTICLES_EDITOR_PLUGIN_H +#define PARTICLES_EDITOR_PLUGIN_H + +#include "editor/plugins/editor_plugin.h" + +class CheckBox; +class ConfirmationDialog; +class EditorFileDialog; +class GPUParticles2D; +class HBoxContainer; +class MenuButton; +class OptionButton; +class SceneTreeDialog; +class SpinBox; + +class ParticlesEditorPlugin : public EditorPlugin { + GDCLASS(ParticlesEditorPlugin, EditorPlugin); + +private: + enum { + MENU_OPTION_CONVERT, + MENU_RESTART + }; + + HBoxContainer *toolbar = nullptr; + MenuButton *menu = nullptr; + +protected: + String handled_type; + String conversion_option_name; + + Node *edited_node = nullptr; + + void _notification(int p_what); + + bool need_show_lifetime_dialog(SpinBox *p_seconds); + virtual void _menu_callback(int p_idx); + + virtual void _add_menu_options(PopupMenu *p_menu) {} + virtual Node *_convert_particles() = 0; + +public: + virtual void edit(Object *p_object) override; + virtual bool handles(Object *p_object) const override; + virtual void make_visible(bool p_visible) override; + + ParticlesEditorPlugin(); +}; + +// 2D ///////////////////////////////////////////// + +class Particles2DEditorPlugin : public ParticlesEditorPlugin { + GDCLASS(Particles2DEditorPlugin, ParticlesEditorPlugin); + +protected: + enum { + MENU_LOAD_EMISSION_MASK = 100, + }; + + enum EmissionMode { + EMISSION_MODE_SOLID, + EMISSION_MODE_BORDER, + EMISSION_MODE_BORDER_DIRECTED + }; + + EditorFileDialog *file = nullptr; + ConfirmationDialog *emission_mask = nullptr; + OptionButton *emission_mask_mode = nullptr; + CheckBox *emission_mask_centered = nullptr; + CheckBox *emission_colors = nullptr; + String source_emission_file; + + virtual void _menu_callback(int p_idx) override; + virtual void _add_menu_options(PopupMenu *p_menu) override; + + void _file_selected(const String &p_file); + void _get_base_emission_mask(PackedVector2Array &r_valid_positions, PackedVector2Array &r_valid_normals, PackedByteArray &r_valid_colors, Vector2i &r_image_size); + virtual void _generate_emission_mask() = 0; + +public: + Particles2DEditorPlugin(); +}; + +class GPUParticles2DEditorPlugin : public Particles2DEditorPlugin { + GDCLASS(GPUParticles2DEditorPlugin, Particles2DEditorPlugin); + + enum { + MENU_GENERATE_VISIBILITY_RECT = 200, + }; + + List<GPUParticles2D *> selected_particles; + + ConfirmationDialog *generate_visibility_rect = nullptr; + SpinBox *generate_seconds = nullptr; + + void _selection_changed(); + void _generate_visibility_rect(); + +protected: + void _notification(int p_what); + + void _menu_callback(int p_idx) override; + void _add_menu_options(PopupMenu *p_menu) override; + + Node *_convert_particles() override; + + void _generate_emission_mask() override; + +public: + GPUParticles2DEditorPlugin(); +}; + +class CPUParticles2DEditorPlugin : public Particles2DEditorPlugin { + GDCLASS(CPUParticles2DEditorPlugin, Particles2DEditorPlugin); + +protected: + Node *_convert_particles() override; + + void _generate_emission_mask() override; + +public: + CPUParticles2DEditorPlugin(); +}; + +// 3D ///////////////////////////////////////////// + +class Particles3DEditorPlugin : public ParticlesEditorPlugin { + GDCLASS(Particles3DEditorPlugin, ParticlesEditorPlugin); + + enum { + MENU_OPTION_GENERATE_AABB = 300, + MENU_OPTION_CREATE_EMISSION_VOLUME_FROM_NODE, + }; + + ConfirmationDialog *generate_aabb = nullptr; + SpinBox *generate_seconds = nullptr; + + SceneTreeDialog *emission_tree_dialog = nullptr; + ConfirmationDialog *emission_dialog = nullptr; + SpinBox *emission_amount = nullptr; + OptionButton *emission_fill = nullptr; + + void _generate_aabb(); + void _node_selected(const NodePath &p_path); + +protected: + Vector<Face3> geometry; + + virtual void _menu_callback(int p_idx) override; + virtual void _add_menu_options(PopupMenu *p_menu) override; + + bool _generate(Vector<Vector3> &r_points, Vector<Vector3> &r_normals); + virtual bool _can_generate_points() const = 0; + virtual void _generate_emission_points() = 0; + +public: + Particles3DEditorPlugin(); +}; + +class GPUParticles3DEditorPlugin : public Particles3DEditorPlugin { + GDCLASS(GPUParticles3DEditorPlugin, Particles3DEditorPlugin); + +protected: + Node *_convert_particles() override; + + bool _can_generate_points() const override; + void _generate_emission_points() override; + +public: + GPUParticles3DEditorPlugin(); +}; + +class CPUParticles3DEditorPlugin : public Particles3DEditorPlugin { + GDCLASS(CPUParticles3DEditorPlugin, Particles3DEditorPlugin); + +protected: + Node *_convert_particles() override; + + bool _can_generate_points() const override { return true; } + void _generate_emission_points() override; + +public: + CPUParticles3DEditorPlugin(); +}; + +#endif // PARTICLES_EDITOR_PLUGIN_H diff --git a/editor/plugins/script_editor_plugin.cpp b/editor/plugins/script_editor_plugin.cpp index 7e0331d15c..96a9dd7a80 100644 --- a/editor/plugins/script_editor_plugin.cpp +++ b/editor/plugins/script_editor_plugin.cpp @@ -261,6 +261,52 @@ Ref<EditorSyntaxHighlighter> EditorJSONSyntaxHighlighter::_create() const { return syntax_highlighter; } +//// + +void EditorMarkdownSyntaxHighlighter::_update_cache() { + highlighter->set_text_edit(text_edit); + highlighter->clear_keyword_colors(); + highlighter->clear_member_keyword_colors(); + highlighter->clear_color_regions(); + + // Disable automatic symbolic highlights, as these don't make sense for prose. + highlighter->set_symbol_color(EDITOR_GET("text_editor/theme/highlighting/text_color")); + highlighter->set_number_color(EDITOR_GET("text_editor/theme/highlighting/text_color")); + highlighter->set_member_variable_color(EDITOR_GET("text_editor/theme/highlighting/text_color")); + highlighter->set_function_color(EDITOR_GET("text_editor/theme/highlighting/text_color")); + + // Headings (any level). + const Color function_color = EDITOR_GET("text_editor/theme/highlighting/function_color"); + highlighter->add_color_region("#", "", function_color); + + // Bold. + highlighter->add_color_region("**", "**", function_color); + // `__bold__` syntax is not supported as color regions must begin with a symbol, + // not a character that is valid in an identifier. + + // Code (both inline code and triple-backticks code blocks). + const Color code_color = EDITOR_GET("text_editor/theme/highlighting/engine_type_color"); + highlighter->add_color_region("`", "`", code_color); + + // Link (both references and inline links with URLs). The URL is not highlighted. + const Color link_color = EDITOR_GET("text_editor/theme/highlighting/keyword_color"); + highlighter->add_color_region("[", "]", link_color); + + // Quote. + const Color quote_color = EDITOR_GET("text_editor/theme/highlighting/string_color"); + highlighter->add_color_region(">", "", quote_color, true); + + // HTML comment, which is also supported in Markdown. + const Color comment_color = EDITOR_GET("text_editor/theme/highlighting/comment_color"); + highlighter->add_color_region("<!--", "-->", comment_color); +} + +Ref<EditorSyntaxHighlighter> EditorMarkdownSyntaxHighlighter::_create() const { + Ref<EditorMarkdownSyntaxHighlighter> syntax_highlighter; + syntax_highlighter.instantiate(); + return syntax_highlighter; +} + //////////////////////////////////////////////////////////////////////////////// /*** SCRIPT EDITOR ****/ @@ -4414,6 +4460,10 @@ ScriptEditor::ScriptEditor(WindowWrapper *p_wrapper) { json_syntax_highlighter.instantiate(); register_syntax_highlighter(json_syntax_highlighter); + Ref<EditorMarkdownSyntaxHighlighter> markdown_syntax_highlighter; + markdown_syntax_highlighter.instantiate(); + register_syntax_highlighter(markdown_syntax_highlighter); + _update_online_doc(); } diff --git a/editor/plugins/script_editor_plugin.h b/editor/plugins/script_editor_plugin.h index 8e82d60605..5de0aaa1e9 100644 --- a/editor/plugins/script_editor_plugin.h +++ b/editor/plugins/script_editor_plugin.h @@ -120,6 +120,24 @@ public: EditorJSONSyntaxHighlighter() { highlighter.instantiate(); } }; +class EditorMarkdownSyntaxHighlighter : public EditorSyntaxHighlighter { + GDCLASS(EditorMarkdownSyntaxHighlighter, EditorSyntaxHighlighter) + +private: + Ref<CodeHighlighter> highlighter; + +public: + virtual void _update_cache() override; + virtual Dictionary _get_line_syntax_highlighting_impl(int p_line) override { return highlighter->get_line_syntax_highlighting(p_line); } + + virtual PackedStringArray _get_supported_languages() const override { return PackedStringArray{ "md", "markdown" }; } + virtual String _get_name() const override { return TTR("Markdown"); } + + virtual Ref<EditorSyntaxHighlighter> _create() const override; + + EditorMarkdownSyntaxHighlighter() { highlighter.instantiate(); } +}; + /////////////////////////////////////////////////////////////////////////////// class ScriptEditorQuickOpen : public ConfirmationDialog { diff --git a/editor/plugins/shader_editor_plugin.cpp b/editor/plugins/shader_editor_plugin.cpp index 6b00a2c9c5..5166619f90 100644 --- a/editor/plugins/shader_editor_plugin.cpp +++ b/editor/plugins/shader_editor_plugin.cpp @@ -396,6 +396,7 @@ void ShaderEditorPlugin::_setup_popup_menu(PopupMenuType p_type, PopupMenu *p_me if (p_type == FILE) { p_menu->add_separator(); p_menu->add_item(TTR("Open File in Inspector"), FILE_INSPECT); + p_menu->add_item(TTR("Inspect Native Shader Code..."), FILE_INSPECT_NATIVE_SHADER_CODE); p_menu->add_separator(); p_menu->add_shortcut(ED_SHORTCUT("shader_editor/close_file", TTR("Close File"), KeyModifierMask::CMD_OR_CTRL | Key::W), FILE_CLOSE); } else { @@ -554,6 +555,12 @@ void ShaderEditorPlugin::_menu_item_pressed(int p_index) { EditorNode::get_singleton()->push_item(edited_shaders[index].shader_inc.ptr()); } } break; + case FILE_INSPECT_NATIVE_SHADER_CODE: { + int index = shader_tabs->get_current_tab(); + if (edited_shaders[index].shader.is_valid()) { + edited_shaders[index].shader->inspect_native_shader_code(); + } + } break; case FILE_CLOSE: { _close_shader(shader_tabs->get_current_tab()); } break; @@ -754,6 +761,7 @@ void ShaderEditorPlugin::_set_file_specific_items_disabled(bool p_disabled) { file_popup_menu->set_item_disabled(file_popup_menu->get_item_index(FILE_SAVE), p_disabled); file_popup_menu->set_item_disabled(file_popup_menu->get_item_index(FILE_SAVE_AS), p_disabled); file_popup_menu->set_item_disabled(file_popup_menu->get_item_index(FILE_INSPECT), p_disabled); + file_popup_menu->set_item_disabled(file_popup_menu->get_item_index(FILE_INSPECT_NATIVE_SHADER_CODE), p_disabled); file_popup_menu->set_item_disabled(file_popup_menu->get_item_index(FILE_CLOSE), p_disabled); } diff --git a/editor/plugins/shader_editor_plugin.h b/editor/plugins/shader_editor_plugin.h index 43e6af79fa..19e43921c3 100644 --- a/editor/plugins/shader_editor_plugin.h +++ b/editor/plugins/shader_editor_plugin.h @@ -69,6 +69,7 @@ class ShaderEditorPlugin : public EditorPlugin { FILE_SAVE, FILE_SAVE_AS, FILE_INSPECT, + FILE_INSPECT_NATIVE_SHADER_CODE, FILE_CLOSE, CLOSE_ALL, CLOSE_OTHER_TABS, diff --git a/editor/plugins/sprite_2d_editor_plugin.cpp b/editor/plugins/sprite_2d_editor_plugin.cpp index c7db243662..5333e10b56 100644 --- a/editor/plugins/sprite_2d_editor_plugin.cpp +++ b/editor/plugins/sprite_2d_editor_plugin.cpp @@ -593,12 +593,12 @@ Sprite2DEditor::Sprite2DEditor() { add_child(err_dialog); debug_uv_dialog = memnew(ConfirmationDialog); + debug_uv_dialog->set_size(Size2(960, 540) * EDSCALE); VBoxContainer *vb = memnew(VBoxContainer); debug_uv_dialog->add_child(vb); debug_uv = memnew(Panel); debug_uv->connect(SceneStringName(gui_input), callable_mp(this, &Sprite2DEditor::_debug_uv_input)); debug_uv->connect(SceneStringName(draw), callable_mp(this, &Sprite2DEditor::_debug_uv_draw)); - debug_uv->set_custom_minimum_size(Size2(800, 500) * EDSCALE); debug_uv->set_clip_contents(true); vb->add_margin_child(TTR("Preview:"), debug_uv, true); diff --git a/editor/plugins/sprite_frames_editor_plugin.cpp b/editor/plugins/sprite_frames_editor_plugin.cpp index 37d5b787eb..168a3b3ac2 100644 --- a/editor/plugins/sprite_frames_editor_plugin.cpp +++ b/editor/plugins/sprite_frames_editor_plugin.cpp @@ -1986,6 +1986,7 @@ SpriteFramesEditor::SpriteFramesEditor() { sub_vb->add_child(hfc); playback_container = memnew(HBoxContainer); + playback_container->set_layout_direction(LAYOUT_DIRECTION_LTR); hfc->add_child(playback_container); play_bw_from = memnew(Button); @@ -2013,7 +2014,7 @@ SpriteFramesEditor::SpriteFramesEditor() { play_from->set_tooltip_text(TTR("Play selected animation from current pos. (D)")); playback_container->add_child(play_from); - playback_container->add_child(memnew(VSeparator)); + hfc->add_child(memnew(VSeparator)); autoplay->connect(SceneStringName(pressed), callable_mp(this, &SpriteFramesEditor::_autoplay_pressed)); autoplay->set_toggle_mode(true); diff --git a/editor/plugins/visual_shader_editor_plugin.cpp b/editor/plugins/visual_shader_editor_plugin.cpp index ede8351e41..a5df9edcf0 100644 --- a/editor/plugins/visual_shader_editor_plugin.cpp +++ b/editor/plugins/visual_shader_editor_plugin.cpp @@ -2128,12 +2128,11 @@ void VisualShaderEditor::_update_nodes() { } } - Array keys = added.keys(); - keys.sort(); - - for (int i = 0; i < keys.size(); i++) { - const Variant &key = keys.get(i); + List<Variant> keys; + added.get_key_list(&keys); + keys.sort_custom<StringLikeVariantOrder>(); + for (const Variant &key : keys) { const Dictionary &value = (Dictionary)added[key]; add_custom_type(value["name"], value["type"], value["script"], value["description"], value["return_icon_type"], value["category"], value["highend"]); diff --git a/editor/project_manager/project_list.cpp b/editor/project_manager/project_list.cpp index 541ab01e62..39c1a78c4a 100644 --- a/editor/project_manager/project_list.cpp +++ b/editor/project_manager/project_list.cpp @@ -88,7 +88,7 @@ void ProjectListItemControl::_notification(int p_what) { draw_style_box(get_theme_stylebox(SNAME("selected"), SNAME("Tree")), Rect2(Point2(), get_size())); } if (is_hovering) { - draw_style_box(get_theme_stylebox(SNAME("hover"), SNAME("Tree")), Rect2(Point2(), get_size())); + draw_style_box(get_theme_stylebox(SNAME("hovered"), SNAME("Tree")), Rect2(Point2(), get_size())); } draw_line(Point2(0, get_size().y + 1), Point2(get_size().x, get_size().y + 1), get_theme_color(SNAME("guide_color"), SNAME("Tree"))); diff --git a/editor/register_editor_types.cpp b/editor/register_editor_types.cpp index 19aeeb0612..c4ebca7308 100644 --- a/editor/register_editor_types.cpp +++ b/editor/register_editor_types.cpp @@ -76,15 +76,11 @@ #include "editor/plugins/collision_polygon_2d_editor_plugin.h" #include "editor/plugins/collision_shape_2d_editor_plugin.h" #include "editor/plugins/control_editor_plugin.h" -#include "editor/plugins/cpu_particles_2d_editor_plugin.h" -#include "editor/plugins/cpu_particles_3d_editor_plugin.h" #include "editor/plugins/curve_editor_plugin.h" #include "editor/plugins/editor_context_menu_plugin.h" #include "editor/plugins/editor_debugger_plugin.h" #include "editor/plugins/editor_resource_tooltip_plugins.h" #include "editor/plugins/font_config_plugin.h" -#include "editor/plugins/gpu_particles_2d_editor_plugin.h" -#include "editor/plugins/gpu_particles_3d_editor_plugin.h" #include "editor/plugins/gpu_particles_collision_sdf_editor_plugin.h" #include "editor/plugins/gradient_editor_plugin.h" #include "editor/plugins/gradient_texture_2d_editor_plugin.h" @@ -105,6 +101,7 @@ #include "editor/plugins/occluder_instance_3d_editor_plugin.h" #include "editor/plugins/packed_scene_editor_plugin.h" #include "editor/plugins/parallax_background_editor_plugin.h" +#include "editor/plugins/particles_editor_plugin.h" #include "editor/plugins/path_2d_editor_plugin.h" #include "editor/plugins/path_3d_editor_plugin.h" #include "editor/plugins/physical_bone_3d_editor_plugin.h" diff --git a/editor/scene_tree_dock.cpp b/editor/scene_tree_dock.cpp index bcab0c2883..0f359a3af8 100644 --- a/editor/scene_tree_dock.cpp +++ b/editor/scene_tree_dock.cpp @@ -156,11 +156,12 @@ void SceneTreeDock::shortcut_input(const Ref<InputEvent> &p_event) { } if (ED_IS_SHORTCUT("scene_tree/rename", p_event)) { - // Prevent renaming if a button is focused - // to avoid conflict with Enter shortcut on macOS - if (!focus_owner || !Object::cast_to<BaseButton>(focus_owner)) { - _tool_selected(TOOL_RENAME); + // Prevent renaming if a button or a range is focused + // to avoid conflict with Enter shortcut on macOS. + if (focus_owner && (Object::cast_to<BaseButton>(focus_owner) || Object::cast_to<Range>(focus_owner))) { + return; } + _tool_selected(TOOL_RENAME); #ifdef MODULE_REGEX_ENABLED } else if (ED_IS_SHORTCUT("scene_tree/batch_rename", p_event)) { _tool_selected(TOOL_BATCH_RENAME); @@ -1667,6 +1668,7 @@ void SceneTreeDock::_notification(int p_what) { button_instance->set_icon(get_editor_theme_icon(SNAME("Instance"))); button_create_script->set_icon(get_editor_theme_icon(SNAME("ScriptCreate"))); button_detach_script->set_icon(get_editor_theme_icon(SNAME("ScriptRemove"))); + button_extend_script->set_icon(get_editor_theme_icon(SNAME("ScriptExtend"))); button_tree_menu->set_icon(get_editor_theme_icon(SNAME("GuiTabMenuHl"))); filter->set_right_icon(get_editor_theme_icon(SNAME("Search"))); @@ -2784,33 +2786,49 @@ void SceneTreeDock::_delete_confirm(bool p_cut) { } void SceneTreeDock::_update_script_button() { - if (!profile_allow_script_editing) { - button_create_script->hide(); - button_detach_script->hide(); - } else if (editor_selection->get_selection().size() == 0) { - button_create_script->hide(); - button_detach_script->hide(); - } else if (editor_selection->get_selection().size() == 1) { - Node *n = editor_selection->get_selected_node_list().front()->get(); - if (n->get_script().is_null()) { - button_create_script->show(); - button_detach_script->hide(); - } else { - button_create_script->hide(); - button_detach_script->show(); - } - } else { - button_create_script->hide(); + bool can_create_script = false; + bool can_detach_script = false; + bool can_extend_script = false; + + if (profile_allow_script_editing) { Array selection = editor_selection->get_selected_nodes(); + for (int i = 0; i < selection.size(); i++) { Node *n = Object::cast_to<Node>(selection[i]); - if (!n->get_script().is_null()) { - button_detach_script->show(); - return; + Ref<Script> s = n->get_script(); + Ref<Script> cts; + + if (n->has_meta(SceneStringName(_custom_type_script))) { + cts = n->get_meta(SceneStringName(_custom_type_script)); + } + + if (selection.size() == 1) { + if (s.is_valid()) { + if (cts.is_valid() && s == cts) { + can_extend_script = true; + } + } else { + can_create_script = true; + } + } + + if (s.is_valid()) { + if (cts.is_valid()) { + if (s != cts) { + can_detach_script = true; + break; + } + } else { + can_detach_script = true; + break; + } } } - button_detach_script->hide(); } + + button_create_script->set_visible(can_create_script); + button_detach_script->set_visible(can_detach_script); + button_extend_script->set_visible(can_extend_script); } void SceneTreeDock::_selection_changed() { @@ -3057,7 +3075,28 @@ void SceneTreeDock::_replace_node(Node *p_node, Node *p_by_node, bool p_keep_pro Node *newnode = p_by_node; if (p_keep_properties) { - Node *default_oldnode = Object::cast_to<Node>(ClassDB::instantiate(oldnode->get_class())); + Node *default_oldnode = nullptr; + + // If we're dealing with a custom node type, we need to create a default instance of the custom type instead of the native type for property comparison. + if (oldnode->has_meta(SceneStringName(_custom_type_script))) { + Ref<Script> cts = oldnode->get_meta(SceneStringName(_custom_type_script)); + default_oldnode = Object::cast_to<Node>(get_editor_data()->script_class_instance(cts->get_global_name())); + if (default_oldnode) { + default_oldnode->set_name(cts->get_global_name()); + get_editor_data()->instantiate_object_properties(default_oldnode); + } else { + // Legacy custom type, registered with "add_custom_type()". + // TODO: Should probably be deprecated in 4.x. + const EditorData::CustomType *custom_type = get_editor_data()->get_custom_type_by_path(cts->get_path()); + if (custom_type) { + default_oldnode = Object::cast_to<Node>(get_editor_data()->instantiate_custom_type(custom_type->name, cts->get_instance_base_type())); + } + } + } + + if (!default_oldnode) { + default_oldnode = Object::cast_to<Node>(ClassDB::instantiate(oldnode->get_class())); + } List<PropertyInfo> pinfo; oldnode->get_property_list(&pinfo); @@ -3542,6 +3581,27 @@ void SceneTreeDock::_script_dropped(const String &p_file, NodePath p_to) { undo_redo->add_undo_method(ed, "live_debug_remove_node", NodePath(String(edited_scene->get_path_to(n)).path_join(new_node->get_name()))); undo_redo->commit_action(); } else { + // Check if dropped script is compatible. + if (n->has_meta(SceneStringName(_custom_type_script))) { + Ref<Script> ct_scr = n->get_meta(SceneStringName(_custom_type_script)); + if (!scr->inherits_script(ct_scr)) { + String custom_type_name = ct_scr->get_global_name(); + + // Legacy custom type, registered with "add_custom_type()". + if (custom_type_name.is_empty()) { + const EditorData::CustomType *custom_type = get_editor_data()->get_custom_type_by_path(ct_scr->get_path()); + if (custom_type) { + custom_type_name = custom_type->name; + } else { + custom_type_name = TTR("<unknown>"); + } + } + + WARN_PRINT_ED(vformat("Script does not extend type: '%s'.", custom_type_name)); + return; + } + } + undo_redo->create_action(TTR("Attach Script"), UndoRedo::MERGE_DISABLE, n); undo_redo->add_do_method(InspectorDock::get_singleton(), "store_script_properties", n); undo_redo->add_undo_method(InspectorDock::get_singleton(), "store_script_properties", n); @@ -3649,6 +3709,7 @@ void SceneTreeDock::_tree_rmb(const Vector2 &p_menu_pos) { Ref<Script> existing_script; bool existing_script_removable = true; + bool allow_attach_new_script = true; if (selection.size() == 1) { Node *selected = selection.front()->get(); @@ -3672,6 +3733,10 @@ void SceneTreeDock::_tree_rmb(const Vector2 &p_menu_pos) { if (EditorNode::get_singleton()->get_object_custom_type_base(selected) == existing_script) { existing_script_removable = false; } + + if (selected->has_meta(SceneStringName(_custom_type_script))) { + allow_attach_new_script = false; + } } if (profile_allow_editing) { @@ -3692,7 +3757,10 @@ void SceneTreeDock::_tree_rmb(const Vector2 &p_menu_pos) { if (full_selection.size() == 1) { add_separator = true; - menu->add_icon_shortcut(get_editor_theme_icon(SNAME("ScriptCreate")), ED_GET_SHORTCUT("scene_tree/attach_script"), TOOL_ATTACH_SCRIPT); + if (allow_attach_new_script) { + menu->add_icon_shortcut(get_editor_theme_icon(SNAME("ScriptCreate")), ED_GET_SHORTCUT("scene_tree/attach_script"), TOOL_ATTACH_SCRIPT); + } + if (existing_script.is_valid()) { menu->add_icon_shortcut(get_editor_theme_icon(SNAME("ScriptExtend")), ED_GET_SHORTCUT("scene_tree/extend_script"), TOOL_EXTEND_SCRIPT); } @@ -4601,6 +4669,14 @@ SceneTreeDock::SceneTreeDock(Node *p_scene_root, EditorSelection *p_editor_selec filter_hbc->add_child(button_detach_script); button_detach_script->hide(); + button_extend_script = memnew(Button); + button_extend_script->set_flat(true); + button_extend_script->connect(SceneStringName(pressed), callable_mp(this, &SceneTreeDock::_tool_selected).bind(TOOL_EXTEND_SCRIPT, false)); + button_extend_script->set_tooltip_text(TTR("Extend the script of the selected node.")); + button_extend_script->set_shortcut(ED_GET_SHORTCUT("scene_tree/extend_script")); + filter_hbc->add_child(button_extend_script); + button_extend_script->hide(); + button_tree_menu = memnew(MenuButton); button_tree_menu->set_flat(false); button_tree_menu->set_theme_type_variation("FlatMenuButton"); diff --git a/editor/scene_tree_dock.h b/editor/scene_tree_dock.h index 05ad0f36e4..8cee2870f6 100644 --- a/editor/scene_tree_dock.h +++ b/editor/scene_tree_dock.h @@ -115,6 +115,7 @@ class SceneTreeDock : public VBoxContainer { Button *button_instance = nullptr; Button *button_create_script = nullptr; Button *button_detach_script = nullptr; + Button *button_extend_script = nullptr; MenuButton *button_tree_menu = nullptr; Button *node_shortcuts_toggle = nullptr; diff --git a/editor/themes/editor_theme_manager.cpp b/editor/themes/editor_theme_manager.cpp index 17bcbacfc2..4db43f0703 100644 --- a/editor/themes/editor_theme_manager.cpp +++ b/editor/themes/editor_theme_manager.cpp @@ -633,6 +633,16 @@ void EditorThemeManager::_create_shared_styles(const Ref<EditorTheme> &p_theme, // in 4.0, and even if it was, it may not always work in practice (e.g. running with compositing disabled). p_config.popup_style->set_corner_radius_all(0); + p_config.popup_border_style = p_config.popup_style->duplicate(); + p_config.popup_border_style->set_content_margin_all(MAX(Math::round(EDSCALE), p_config.border_width) + 2 + (p_config.base_margin * 1.5) * EDSCALE); + // Always display a border for popups like PopupMenus so they can be distinguished from their background. + p_config.popup_border_style->set_border_width_all(MAX(Math::round(EDSCALE), p_config.border_width)); + if (p_config.draw_extra_borders) { + p_config.popup_border_style->set_border_color(p_config.extra_border_color_2); + } else { + p_config.popup_border_style->set_border_color(p_config.dark_color_2); + } + p_config.window_style = p_config.popup_style->duplicate(); p_config.window_style->set_border_color(p_config.base_color); p_config.window_style->set_border_width(SIDE_TOP, 24 * EDSCALE); @@ -707,7 +717,7 @@ void EditorThemeManager::_populate_standard_styles(const Ref<EditorTheme> &p_the } // PopupPanel - p_theme->set_stylebox(SceneStringName(panel), "PopupPanel", p_config.popup_style); + p_theme->set_stylebox(SceneStringName(panel), "PopupPanel", p_config.popup_border_style); } // Buttons. @@ -945,6 +955,8 @@ void EditorThemeManager::_populate_standard_styles(const Ref<EditorTheme> &p_the p_theme->set_color("custom_button_font_highlight", "Tree", p_config.font_hover_color); p_theme->set_color(SceneStringName(font_color), "Tree", p_config.font_color); + p_theme->set_color("font_hovered_color", "Tree", p_config.mono_color); + p_theme->set_color("font_hovered_dimmed_color", "Tree", p_config.font_color); p_theme->set_color("font_selected_color", "Tree", p_config.mono_color); p_theme->set_color("font_disabled_color", "Tree", p_config.font_disabled_color); p_theme->set_color("font_outline_color", "Tree", p_config.font_outline_color); @@ -997,7 +1009,13 @@ void EditorThemeManager::_populate_standard_styles(const Ref<EditorTheme> &p_the Ref<StyleBoxFlat> style_tree_hover = p_config.base_style->duplicate(); style_tree_hover->set_bg_color(p_config.highlight_color * Color(1, 1, 1, 0.4)); style_tree_hover->set_border_width_all(0); - p_theme->set_stylebox("hover", "Tree", style_tree_hover); + p_theme->set_stylebox("hovered", "Tree", style_tree_hover); + p_theme->set_stylebox("button_hover", "Tree", style_tree_hover); + + Ref<StyleBoxFlat> style_tree_hover_dimmed = p_config.base_style->duplicate(); + style_tree_hover_dimmed->set_bg_color(p_config.highlight_color * Color(1, 1, 1, 0.2)); + style_tree_hover_dimmed->set_border_width_all(0); + p_theme->set_stylebox("hovered_dimmed", "Tree", style_tree_hover_dimmed); p_theme->set_stylebox("selected_focus", "Tree", style_tree_focus); p_theme->set_stylebox("selected", "Tree", style_tree_selected); @@ -1306,18 +1324,11 @@ void EditorThemeManager::_populate_standard_styles(const Ref<EditorTheme> &p_the // PopupMenu. { - Ref<StyleBoxFlat> style_popup_menu = p_config.popup_style->duplicate(); + Ref<StyleBoxFlat> style_popup_menu = p_config.popup_border_style->duplicate(); // Use 1 pixel for the sides, since if 0 is used, the highlight of hovered items is drawn // on top of the popup border. This causes a 'gap' in the panel border when an item is highlighted, // and it looks weird. 1px solves this. - style_popup_menu->set_content_margin_individual(EDSCALE, 2 * EDSCALE, EDSCALE, 2 * EDSCALE); - // Always display a border for PopupMenus so they can be distinguished from their background. - style_popup_menu->set_border_width_all(EDSCALE); - if (p_config.draw_extra_borders) { - style_popup_menu->set_border_color(p_config.extra_border_color_2); - } else { - style_popup_menu->set_border_color(p_config.dark_color_2); - } + style_popup_menu->set_content_margin_individual(Math::round(EDSCALE), 2 * EDSCALE, Math::round(EDSCALE), 2 * EDSCALE); p_theme->set_stylebox(SceneStringName(panel), "PopupMenu", style_popup_menu); Ref<StyleBoxFlat> style_menu_hover = p_config.button_style_hover->duplicate(); @@ -1327,17 +1338,17 @@ void EditorThemeManager::_populate_standard_styles(const Ref<EditorTheme> &p_the Ref<StyleBoxLine> style_popup_separator(memnew(StyleBoxLine)); style_popup_separator->set_color(p_config.separator_color); - style_popup_separator->set_grow_begin(p_config.popup_margin - MAX(Math::round(EDSCALE), p_config.border_width)); - style_popup_separator->set_grow_end(p_config.popup_margin - MAX(Math::round(EDSCALE), p_config.border_width)); + style_popup_separator->set_grow_begin(Math::round(EDSCALE) - MAX(Math::round(EDSCALE), p_config.border_width)); + style_popup_separator->set_grow_end(Math::round(EDSCALE) - MAX(Math::round(EDSCALE), p_config.border_width)); style_popup_separator->set_thickness(MAX(Math::round(EDSCALE), p_config.border_width)); Ref<StyleBoxLine> style_popup_labeled_separator_left(memnew(StyleBoxLine)); - style_popup_labeled_separator_left->set_grow_begin(p_config.popup_margin - MAX(Math::round(EDSCALE), p_config.border_width)); + style_popup_labeled_separator_left->set_grow_begin(Math::round(EDSCALE) - MAX(Math::round(EDSCALE), p_config.border_width)); style_popup_labeled_separator_left->set_color(p_config.separator_color); style_popup_labeled_separator_left->set_thickness(MAX(Math::round(EDSCALE), p_config.border_width)); Ref<StyleBoxLine> style_popup_labeled_separator_right(memnew(StyleBoxLine)); - style_popup_labeled_separator_right->set_grow_end(p_config.popup_margin - MAX(Math::round(EDSCALE), p_config.border_width)); + style_popup_labeled_separator_right->set_grow_end(Math::round(EDSCALE) - MAX(Math::round(EDSCALE), p_config.border_width)); style_popup_labeled_separator_right->set_color(p_config.separator_color); style_popup_labeled_separator_right->set_thickness(MAX(Math::round(EDSCALE), p_config.border_width)); @@ -2115,21 +2126,6 @@ void EditorThemeManager::_populate_editor_styles(const Ref<EditorTheme> &p_theme // EditorValidationPanel. p_theme->set_stylebox(SceneStringName(panel), "EditorValidationPanel", p_config.tree_panel_style); - - // ControlEditor. - { - p_theme->set_type_variation("ControlEditorPopupPanel", "PopupPanel"); - - Ref<StyleBoxFlat> control_editor_popup_style = p_config.popup_style->duplicate(); - control_editor_popup_style->set_shadow_size(0); - control_editor_popup_style->set_content_margin(SIDE_LEFT, p_config.base_margin * EDSCALE); - control_editor_popup_style->set_content_margin(SIDE_TOP, p_config.base_margin * EDSCALE); - control_editor_popup_style->set_content_margin(SIDE_RIGHT, p_config.base_margin * EDSCALE); - control_editor_popup_style->set_content_margin(SIDE_BOTTOM, p_config.base_margin * EDSCALE); - control_editor_popup_style->set_border_width_all(0); - - p_theme->set_stylebox(SceneStringName(panel), "ControlEditorPopupPanel", control_editor_popup_style); - } } // Editor inspector. diff --git a/editor/themes/editor_theme_manager.h b/editor/themes/editor_theme_manager.h index 5e7bd00083..ca5e1a4e2d 100644 --- a/editor/themes/editor_theme_manager.h +++ b/editor/themes/editor_theme_manager.h @@ -135,6 +135,7 @@ class EditorThemeManager { Ref<StyleBoxFlat> button_style_hover; Ref<StyleBoxFlat> popup_style; + Ref<StyleBoxFlat> popup_border_style; Ref<StyleBoxFlat> window_style; Ref<StyleBoxFlat> dialog_style; Ref<StyleBoxFlat> panel_container_style; diff --git a/editor/window_wrapper.cpp b/editor/window_wrapper.cpp index 9496ba016c..7f61623980 100644 --- a/editor/window_wrapper.cpp +++ b/editor/window_wrapper.cpp @@ -391,7 +391,6 @@ void ScreenSelect::_notification(int p_what) { } break; case NOTIFICATION_THEME_CHANGED: { set_icon(get_editor_theme_icon("MakeFloating")); - popup_background->add_theme_style_override(SceneStringName(panel), get_theme_stylebox("PanelForeground", EditorStringName(EditorStyles))); const real_t popup_height = real_t(get_theme_font_size(SceneStringName(font_size))) * 2.0; popup->set_min_size(Size2(0, popup_height * 3)); @@ -454,14 +453,10 @@ ScreenSelect::ScreenSelect() { // Create the popup. const Size2 borders = Size2(4, 4) * EDSCALE; - popup = memnew(Popup); + popup = memnew(PopupPanel); popup->connect("popup_hide", callable_mp(static_cast<BaseButton *>(this), &ScreenSelect::set_pressed).bind(false)); add_child(popup); - popup_background = memnew(Panel); - popup_background->set_anchors_and_offsets_preset(PRESET_FULL_RECT); - popup->add_child(popup_background); - MarginContainer *popup_root = memnew(MarginContainer); popup_root->add_theme_constant_override("margin_right", borders.width); popup_root->add_theme_constant_override("margin_top", borders.height); diff --git a/editor/window_wrapper.h b/editor/window_wrapper.h index a07e95f09e..3597276de9 100644 --- a/editor/window_wrapper.h +++ b/editor/window_wrapper.h @@ -88,7 +88,6 @@ class ScreenSelect : public Button { GDCLASS(ScreenSelect, Button); Popup *popup = nullptr; - Panel *popup_background = nullptr; HBoxContainer *screen_list = nullptr; void _build_advanced_menu(); diff --git a/main/main.cpp b/main/main.cpp index 5206e9b84c..743b67f89b 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -42,6 +42,7 @@ #include "core/io/dir_access.h" #include "core/io/file_access_pack.h" #include "core/io/file_access_zip.h" +#include "core/io/image.h" #include "core/io/image_loader.h" #include "core/io/ip.h" #include "core/io/resource_loader.h" @@ -2412,6 +2413,9 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph if (bool(GLOBAL_GET("display/window/size/no_focus"))) { window_flags |= DisplayServer::WINDOW_FLAG_NO_FOCUS_BIT; } + if (bool(GLOBAL_GET("display/window/size/sharp_corners"))) { + window_flags |= DisplayServer::WINDOW_FLAG_SHARP_CORNERS_BIT; + } window_mode = (DisplayServer::WindowMode)(GLOBAL_GET("display/window/size/mode").operator int()); int initial_position_type = GLOBAL_GET("display/window/size/initial_position_type").operator int(); if (initial_position_type == 0) { // Absolute. @@ -3197,6 +3201,10 @@ Error Main::setup2(bool p_show_boot_logo) { } id->set_emulate_mouse_from_touch(bool(GLOBAL_DEF_BASIC("input_devices/pointing/emulate_mouse_from_touch", true))); + + if (editor) { + id->set_emulate_mouse_from_touch(true); + } } OS::get_singleton()->benchmark_end_measure("Startup", "Setup Window and Boot"); diff --git a/modules/basis_universal/SCsub b/modules/basis_universal/SCsub index 9bea0a0ca9..e201452385 100644 --- a/modules/basis_universal/SCsub +++ b/modules/basis_universal/SCsub @@ -14,6 +14,8 @@ thirdparty_obj = [] thirdparty_dir = "#thirdparty/basis_universal/" # Sync list with upstream CMakeLists.txt encoder_sources = [ + "3rdparty/android_astc_decomp.cpp", + "basisu_astc_hdr_enc.cpp", "basisu_backend.cpp", "basisu_basis_file.cpp", "basisu_bc7enc.cpp", @@ -45,6 +47,8 @@ else: if env["builtin_zstd"]: env_basisu.Prepend(CPPPATH=["#thirdparty/zstd"]) +env_basisu.Prepend(CPPPATH=["#thirdparty/tinyexr"]) + if env.dev_build: env_basisu.Append(CPPDEFINES=[("BASISU_DEVEL_MESSAGES", 1), ("BASISD_ENABLE_DEBUG_FLAGS", 1)]) diff --git a/modules/basis_universal/image_compress_basisu.cpp b/modules/basis_universal/image_compress_basisu.cpp index ab20d00b5b..8ca5dba225 100644 --- a/modules/basis_universal/image_compress_basisu.cpp +++ b/modules/basis_universal/image_compress_basisu.cpp @@ -30,6 +30,9 @@ #include "image_compress_basisu.h" +#include "core/io/image.h" +#include "core/os/os.h" +#include "core/string/print_string.h" #include "servers/rendering_server.h" #include <transcoder/basisu_transcoder.h> @@ -46,9 +49,48 @@ void basis_universal_init() { } #ifdef TOOLS_ENABLED +template <typename T> +inline void _basisu_pad_mipmap(const uint8_t *p_image_mip_data, Vector<uint8_t> &r_mip_data_padded, int p_next_width, int p_next_height, int p_width, int p_height, int64_t p_size) { + // Source mip's data interpreted as 32-bit RGBA blocks to help with copying pixel data. + const T *mip_src_data = reinterpret_cast<const T *>(p_image_mip_data); + + // Reserve space in the padded buffer. + r_mip_data_padded.resize(p_next_width * p_next_height * sizeof(T)); + T *data_padded_ptr = reinterpret_cast<T *>(r_mip_data_padded.ptrw()); + + // Pad mipmap to the nearest block by smearing. + int x = 0, y = 0; + for (y = 0; y < p_height; y++) { + for (x = 0; x < p_width; x++) { + data_padded_ptr[p_next_width * y + x] = mip_src_data[p_width * y + x]; + } + + // First, smear in x. + for (; x < p_next_width; x++) { + data_padded_ptr[p_next_width * y + x] = data_padded_ptr[p_next_width * y + x - 1]; + } + } + + // Then, smear in y. + for (; y < p_next_height; y++) { + for (x = 0; x < p_next_width; x++) { + data_padded_ptr[p_next_width * y + x] = data_padded_ptr[p_next_width * y + x - p_next_width]; + } + } +} + Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedChannels p_channels) { + uint64_t start_time = OS::get_singleton()->get_ticks_msec(); + Ref<Image> image = p_image->duplicate(); - image->convert(Image::FORMAT_RGBA8); + bool is_hdr = false; + + if (image->get_format() <= Image::FORMAT_RGB565) { + image->convert(Image::FORMAT_RGBA8); + } else if (image->get_format() <= Image::FORMAT_RGBE9995) { + image->convert(Image::FORMAT_RGBAF); + is_hdr = true; + } basisu::basis_compressor_params params; @@ -74,32 +116,42 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha basisu::job_pool job_pool(OS::get_singleton()->get_processor_count()); params.m_pJob_pool = &job_pool; - BasisDecompressFormat decompress_format = BASIS_DECOMPRESS_RG; - switch (p_channels) { - case Image::USED_CHANNELS_L: { - decompress_format = BASIS_DECOMPRESS_RGB; - } break; - case Image::USED_CHANNELS_LA: { - params.m_force_alpha = true; - decompress_format = BASIS_DECOMPRESS_RGBA; - } break; - case Image::USED_CHANNELS_R: { - decompress_format = BASIS_DECOMPRESS_R; - } break; - case Image::USED_CHANNELS_RG: { - params.m_force_alpha = true; - image->convert_rg_to_ra_rgba8(); - decompress_format = BASIS_DECOMPRESS_RG; - } break; - case Image::USED_CHANNELS_RGB: { - decompress_format = BASIS_DECOMPRESS_RGB; - } break; - case Image::USED_CHANNELS_RGBA: { - params.m_force_alpha = true; - decompress_format = BASIS_DECOMPRESS_RGBA; - } break; + BasisDecompressFormat decompress_format = BASIS_DECOMPRESS_MAX; + + if (is_hdr) { + decompress_format = BASIS_DECOMPRESS_HDR_RGB; + params.m_hdr = true; + params.m_uastc_hdr_options.set_quality_level(0); + + } else { + switch (p_channels) { + case Image::USED_CHANNELS_L: { + decompress_format = BASIS_DECOMPRESS_RGB; + } break; + case Image::USED_CHANNELS_LA: { + params.m_force_alpha = true; + decompress_format = BASIS_DECOMPRESS_RGBA; + } break; + case Image::USED_CHANNELS_R: { + decompress_format = BASIS_DECOMPRESS_R; + } break; + case Image::USED_CHANNELS_RG: { + params.m_force_alpha = true; + image->convert_rg_to_ra_rgba8(); + decompress_format = BASIS_DECOMPRESS_RG; + } break; + case Image::USED_CHANNELS_RGB: { + decompress_format = BASIS_DECOMPRESS_RGB; + } break; + case Image::USED_CHANNELS_RGBA: { + params.m_force_alpha = true; + decompress_format = BASIS_DECOMPRESS_RGBA; + } break; + } } + ERR_FAIL_COND_V(decompress_format == BASIS_DECOMPRESS_MAX, Vector<uint8_t>()); + // Copy the source image data with mipmaps into BasisU. { const int orig_width = image->get_width(); @@ -113,9 +165,10 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha Vector<uint8_t> image_data = image->get_data(); basisu::vector<basisu::image> basisu_mipmaps; + basisu::vector<basisu::imagef> basisu_mipmaps_hdr; // Buffer for storing padded mipmap data. - Vector<uint32_t> mip_data_padded; + Vector<uint8_t> mip_data_padded; for (int32_t i = 0; i <= image->get_mipmap_count(); i++) { int64_t ofs, size; @@ -126,31 +179,10 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha // Pad the mipmap's data if its resolution isn't divisible by 4. if (image->has_mipmaps() && !is_res_div_4 && (width > 2 && height > 2) && (width != next_width || height != next_height)) { - // Source mip's data interpreted as 32-bit RGBA blocks to help with copying pixel data. - const uint32_t *mip_src_data = reinterpret_cast<const uint32_t *>(image_mip_data); - - // Reserve space in the padded buffer. - mip_data_padded.resize(next_width * next_height); - uint32_t *data_padded_ptr = mip_data_padded.ptrw(); - - // Pad mipmap to the nearest block by smearing. - int x = 0, y = 0; - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - data_padded_ptr[next_width * y + x] = mip_src_data[width * y + x]; - } - - // First, smear in x. - for (; x < next_width; x++) { - data_padded_ptr[next_width * y + x] = data_padded_ptr[next_width * y + x - 1]; - } - } - - // Then, smear in y. - for (; y < next_height; y++) { - for (x = 0; x < next_width; x++) { - data_padded_ptr[next_width * y + x] = data_padded_ptr[next_width * y + x - next_width]; - } + if (is_hdr) { + _basisu_pad_mipmap<BasisRGBAF>(image_mip_data, mip_data_padded, next_width, next_height, width, height, size); + } else { + _basisu_pad_mipmap<uint32_t>(image_mip_data, mip_data_padded, next_width, next_height, width, height, size); } // Override the image_mip_data pointer with our temporary Vector. @@ -159,7 +191,7 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha // Override the mipmap's properties. width = next_width; height = next_height; - size = mip_data_padded.size() * 4; + size = mip_data_padded.size(); } // Get the next mipmap's resolution. @@ -167,44 +199,61 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha next_height /= 2; // Copy the source mipmap's data to a BasisU image. - basisu::image basisu_image(width, height); - memcpy(basisu_image.get_ptr(), image_mip_data, size); + if (is_hdr) { + basisu::imagef basisu_image(width, height); + memcpy(reinterpret_cast<uint8_t *>(basisu_image.get_ptr()), image_mip_data, size); + + if (i == 0) { + params.m_source_images_hdr.push_back(basisu_image); + } else { + basisu_mipmaps_hdr.push_back(basisu_image); + } - if (i == 0) { - params.m_source_images.push_back(basisu_image); } else { - basisu_mipmaps.push_back(basisu_image); + basisu::image basisu_image(width, height); + memcpy(basisu_image.get_ptr(), image_mip_data, size); + + if (i == 0) { + params.m_source_images.push_back(basisu_image); + } else { + basisu_mipmaps.push_back(basisu_image); + } } } - params.m_source_mipmap_images.push_back(basisu_mipmaps); + if (is_hdr) { + params.m_source_mipmap_images_hdr.push_back(basisu_mipmaps_hdr); + } else { + params.m_source_mipmap_images.push_back(basisu_mipmaps); + } } // Encode the image data. - Vector<uint8_t> basisu_data; - basisu::basis_compressor compressor; compressor.init(params); int basisu_err = compressor.process(); - ERR_FAIL_COND_V(basisu_err != basisu::basis_compressor::cECSuccess, basisu_data); + ERR_FAIL_COND_V(basisu_err != basisu::basis_compressor::cECSuccess, Vector<uint8_t>()); - const basisu::uint8_vec &basisu_out = compressor.get_output_basis_file(); - basisu_data.resize(basisu_out.size() + 4); + const basisu::uint8_vec &basisu_encoded = compressor.get_output_basis_file(); - // Copy the encoded data to the buffer. - { - uint8_t *wb = basisu_data.ptrw(); - *(uint32_t *)wb = decompress_format; + Vector<uint8_t> basisu_data; + basisu_data.resize(basisu_encoded.size() + 4); + uint8_t *basisu_data_ptr = basisu_data.ptrw(); - memcpy(wb + 4, basisu_out.get_ptr(), basisu_out.size()); - } + // Copy the encoded BasisU data into the output buffer. + *(uint32_t *)basisu_data_ptr = decompress_format; + memcpy(basisu_data_ptr + 4, basisu_encoded.get_ptr(), basisu_encoded.size()); + + print_verbose(vformat("BasisU: Encoding a %dx%d image with %d mipmaps took %d ms.", p_image->get_width(), p_image->get_height(), p_image->get_mipmap_count(), OS::get_singleton()->get_ticks_msec() - start_time)); return basisu_data; } #endif // TOOLS_ENABLED Ref<Image> basis_universal_unpacker_ptr(const uint8_t *p_data, int p_size) { + uint64_t start_time = OS::get_singleton()->get_ticks_msec(); + Ref<Image> image; ERR_FAIL_NULL_V_MSG(p_data, image, "Cannot unpack invalid BasisUniversal data."); @@ -320,6 +369,23 @@ Ref<Image> basis_universal_unpacker_ptr(const uint8_t *p_data, int p_size) { } } break; + case BASIS_DECOMPRESS_HDR_RGB: { + if (bptc_supported) { + basisu_format = basist::transcoder_texture_format::cTFBC6H; + image_format = Image::FORMAT_BPTC_RGBFU; + } else if (astc_supported) { + basisu_format = basist::transcoder_texture_format::cTFASTC_HDR_4x4_RGBA; + image_format = Image::FORMAT_ASTC_4x4_HDR; + } else { + // No supported VRAM compression formats, decompress. + basisu_format = basist::transcoder_texture_format::cTFRGB_9E5; + image_format = Image::FORMAT_RGBE9995; + } + + } break; + default: { + ERR_FAIL_V(image); + } break; } src_ptr += 4; @@ -371,6 +437,9 @@ Ref<Image> basis_universal_unpacker_ptr(const uint8_t *p_data, int p_size) { } } + print_verbose(vformat("BasisU: Transcoding a %dx%d image with %d mipmaps into %s took %d ms.", + image->get_width(), image->get_height(), image->get_mipmap_count(), Image::get_format_name(image_format), OS::get_singleton()->get_ticks_msec() - start_time)); + return image; } diff --git a/modules/basis_universal/image_compress_basisu.h b/modules/basis_universal/image_compress_basisu.h index 5e36d448f6..81c8511f60 100644 --- a/modules/basis_universal/image_compress_basisu.h +++ b/modules/basis_universal/image_compress_basisu.h @@ -39,11 +39,20 @@ enum BasisDecompressFormat { BASIS_DECOMPRESS_RGBA, BASIS_DECOMPRESS_RG_AS_RA, BASIS_DECOMPRESS_R, + BASIS_DECOMPRESS_HDR_RGB, + BASIS_DECOMPRESS_MAX }; void basis_universal_init(); #ifdef TOOLS_ENABLED +struct BasisRGBAF { + uint32_t r; + uint32_t g; + uint32_t b; + uint32_t a; +}; + Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedChannels p_channels); #endif diff --git a/modules/betsy/SCsub b/modules/betsy/SCsub index 2735116cc3..621ba58ae3 100644 --- a/modules/betsy/SCsub +++ b/modules/betsy/SCsub @@ -7,6 +7,7 @@ Import("env_modules") env_betsy = env_modules.Clone() env_betsy.GLSL_HEADER("bc6h.glsl") env_betsy.GLSL_HEADER("bc1.glsl") +env_betsy.GLSL_HEADER("bc4.glsl") env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"]) # Thirdparty source files diff --git a/modules/betsy/bc4.glsl b/modules/betsy/bc4.glsl new file mode 100644 index 0000000000..b7a5f6a686 --- /dev/null +++ b/modules/betsy/bc4.glsl @@ -0,0 +1,151 @@ +#[versions] + +unsigned = ""; +signed = "#define SNORM"; + +#[compute] +#version 450 + +#include "CrossPlatformSettings_piece_all.glsl" +#include "UavCrossPlatform_piece_all.glsl" + +#VERSION_DEFINES + +shared float2 g_minMaxValues[4u * 4u * 4u]; +shared uint2 g_mask[4u * 4u]; + +layout(binding = 0) uniform sampler2D srcTex; +layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture; + +layout(push_constant, std430) uniform Params { + uint p_channelIdx; + uint p_padding[3]; +} +params; + +layout(local_size_x = 4, // + local_size_y = 4, // + local_size_z = 4) in; + +/// Each block is 16 pixels +/// Each thread works on 4 pixels +/// Therefore each block needs 4 threads, generating 8 masks +/// At the end these 8 masks get merged into 2 and results written to output +/// +/// **Q: Why 4 pixels per thread? Why not 1 pixel per thread? Why not 2? Why not 16?** +/// +/// A: It's a sweetspot. +/// - Very short threads cannot fill expensive GPUs with enough work (dispatch bound) +/// - Lots of threads means lots of synchronization (e.g. evaluating min/max, merging masks) +/// overhead, and also more LDS usage which reduces occupancy. +/// - Long threads (e.g. 1 thread per block) misses parallelism opportunities +void main() { + float minVal, maxVal; + float4 srcPixel; + + const uint blockThreadId = gl_LocalInvocationID.x; + + const uint2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u; + + for (uint i = 0u; i < 4u; ++i) { + const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i, blockThreadId); + + const float4 value = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyzw; + srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w); + srcPixel[i] *= 255.0f; + } + + minVal = min3(srcPixel.x, srcPixel.y, srcPixel.z); + maxVal = max3(srcPixel.x, srcPixel.y, srcPixel.z); + minVal = min(minVal, srcPixel.w); + maxVal = max(maxVal, srcPixel.w); + + const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u); + const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y; + + g_minMaxValues[minMaxIdxBase + blockThreadId] = float2(minVal, maxVal); + g_mask[maskIdxBase] = uint2(0u, 0u); + + memoryBarrierShared(); + barrier(); + + // Have all 4 threads in the block grab the min/max value by comparing what all 4 threads uploaded + for (uint i = 0u; i < 4u; ++i) { + minVal = min(g_minMaxValues[minMaxIdxBase + i].x, minVal); + maxVal = max(g_minMaxValues[minMaxIdxBase + i].y, maxVal); + } + + // determine bias and emit color indices + // given the choice of maxVal/minVal, these indices are optimal: + // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/ + float dist = maxVal - minVal; + float dist4 = dist * 4.0f; + float dist2 = dist * 2.0f; + float bias = (dist < 8) ? (dist - 1) : (trunc(dist * 0.5f) + 2); + bias -= minVal * 7; + + uint mask0 = 0u, mask1 = 0u; + + for (uint i = 0u; i < 4u; ++i) { + float a = srcPixel[i] * 7.0f + bias; + + int ind = 0; + + // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max). + if (a >= dist4) { + ind = 4; + a -= dist4; + } + + if (a >= dist2) { + ind += 2; + a -= dist2; + } + + if (a >= dist) + ind += 1; + + // turn linear scale into DXT index (0/1 are extremal pts) + ind = -ind & 7; + ind ^= (2 > ind) ? 1 : 0; + + // write index + const uint bits = 16u + ((blockThreadId << 2u) + i) * 3u; + if (bits < 32u) { + mask0 |= uint(ind) << bits; + if (bits + 3u > 32u) { + mask1 |= uint(ind) >> (32u - bits); + } + } else { + mask1 |= uint(ind) << (bits - 32u); + } + } + + if (mask0 != 0u) + atomicOr(g_mask[maskIdxBase].x, mask0); + if (mask1 != 0u) + atomicOr(g_mask[maskIdxBase].y, mask1); + + memoryBarrierShared(); + barrier(); + + if (blockThreadId == 0u) { + // Save data + uint2 outputBytes; + +#ifdef SNORM + outputBytes.x = + packSnorm4x8(float4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f, + minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f)); +#else + outputBytes.x = packUnorm4x8( + float4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f)); +#endif + + outputBytes.x |= g_mask[maskIdxBase].x; + outputBytes.y = g_mask[maskIdxBase].y; + + uint2 dstUV = gl_GlobalInvocationID.yz; + imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); + } +} diff --git a/modules/betsy/image_compress_betsy.cpp b/modules/betsy/image_compress_betsy.cpp index 7b4d8b3dfb..1ad9bed721 100644 --- a/modules/betsy/image_compress_betsy.cpp +++ b/modules/betsy/image_compress_betsy.cpp @@ -35,12 +35,18 @@ #include "betsy_bc1.h" #include "bc1.glsl.gen.h" +#include "bc4.glsl.gen.h" #include "bc6h.glsl.gen.h" +#include "servers/display_server.h" static Mutex betsy_mutex; static BetsyCompressor *betsy = nullptr; void BetsyCompressor::_init() { + if (!DisplayServer::can_create_rendering_device()) { + return; + } + // Create local RD. RenderingContextDriver *rcd = nullptr; RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device(); @@ -165,6 +171,10 @@ static String get_shader_name(BetsyFormat p_format) { case BETSY_FORMAT_BC3: return "BC3"; + case BETSY_FORMAT_BC4_SIGNED: + case BETSY_FORMAT_BC4_UNSIGNED: + return "BC4"; + case BETSY_FORMAT_BC6_SIGNED: case BETSY_FORMAT_BC6_UNSIGNED: return "BC6"; @@ -177,6 +187,11 @@ static String get_shader_name(BetsyFormat p_format) { Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { uint64_t start_time = OS::get_singleton()->get_ticks_msec(); + // Return an error so that the compression can fall back to cpu compression + if (compress_rd == nullptr) { + return ERR_CANT_CREATE; + } + if (r_img->is_compressed()) { return ERR_INVALID_DATA; } @@ -202,6 +217,12 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { dest_format = Image::FORMAT_DXT1; break; + case BETSY_FORMAT_BC4_UNSIGNED: + version = "unsigned"; + dst_rd_format = RD::DATA_FORMAT_R32G32_UINT; + dest_format = Image::FORMAT_RGTC_R; + break; + case BETSY_FORMAT_BC6_SIGNED: version = "signed"; dst_rd_format = RD::DATA_FORMAT_R32G32B32A32_UINT; @@ -235,8 +256,13 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { err = source->parse_versions_from_text(bc1_shader_glsl); break; - case BETSY_FORMAT_BC6_UNSIGNED: + case BETSY_FORMAT_BC4_SIGNED: + case BETSY_FORMAT_BC4_UNSIGNED: + err = source->parse_versions_from_text(bc4_shader_glsl); + break; + case BETSY_FORMAT_BC6_SIGNED: + case BETSY_FORMAT_BC6_UNSIGNED: err = source->parse_versions_from_text(bc6h_shader_glsl); break; @@ -430,26 +456,45 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline); compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); - if (dest_format == Image::FORMAT_BPTC_RGBFU || dest_format == Image::FORMAT_BPTC_RGBF) { - BC6PushConstant push_constant; - push_constant.sizeX = 1.0f / width; - push_constant.sizeY = 1.0f / height; - push_constant.padding[0] = 0; - push_constant.padding[1] = 0; - - compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant)); - - } else { - BC1PushConstant push_constant; - push_constant.num_refines = 2; - push_constant.padding[0] = 0; - push_constant.padding[1] = 0; - push_constant.padding[2] = 0; - - compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant)); + switch (dest_format) { + case Image::FORMAT_BPTC_RGBFU: + case Image::FORMAT_BPTC_RGBF: { + BC6PushConstant push_constant; + push_constant.sizeX = 1.0f / width; + push_constant.sizeY = 1.0f / height; + push_constant.padding[0] = 0; + push_constant.padding[1] = 0; + + compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant)); + compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); + } break; + + case Image::FORMAT_DXT1: { + BC1PushConstant push_constant; + push_constant.num_refines = 2; + push_constant.padding[0] = 0; + push_constant.padding[1] = 0; + push_constant.padding[2] = 0; + + compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant)); + compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); + } break; + + case Image::FORMAT_RGTC_R: { + BC4PushConstant push_constant; + push_constant.channel_idx = 0; + push_constant.padding[0] = 0; + push_constant.padding[1] = 0; + push_constant.padding[2] = 0; + + compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant)); + compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16); + } break; + + default: { + } break; } - compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); compress_rd->compute_list_end(); compress_rd->submit(); @@ -511,13 +556,14 @@ Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels) { switch (p_channels) { case Image::USED_CHANNELS_RGB: - result = betsy->compress(BETSY_FORMAT_BC1_DITHER, r_img); - break; - case Image::USED_CHANNELS_L: result = betsy->compress(BETSY_FORMAT_BC1, r_img); break; + case Image::USED_CHANNELS_R: + result = betsy->compress(BETSY_FORMAT_BC4_UNSIGNED, r_img); + break; + default: break; } diff --git a/modules/betsy/image_compress_betsy.h b/modules/betsy/image_compress_betsy.h index 70e4ae85ed..4e0bf0538f 100644 --- a/modules/betsy/image_compress_betsy.h +++ b/modules/betsy/image_compress_betsy.h @@ -50,6 +50,8 @@ enum BetsyFormat { BETSY_FORMAT_BC1, BETSY_FORMAT_BC1_DITHER, BETSY_FORMAT_BC3, + BETSY_FORMAT_BC4_SIGNED, + BETSY_FORMAT_BC4_UNSIGNED, BETSY_FORMAT_BC6_SIGNED, BETSY_FORMAT_BC6_UNSIGNED, }; @@ -65,6 +67,11 @@ struct BC1PushConstant { uint32_t padding[3]; }; +struct BC4PushConstant { + uint32_t channel_idx; + uint32_t padding[3]; +}; + void free_device(); Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels); diff --git a/modules/gdscript/doc_classes/@GDScript.xml b/modules/gdscript/doc_classes/@GDScript.xml index 5fe47d69df..ede4ce6617 100644 --- a/modules/gdscript/doc_classes/@GDScript.xml +++ b/modules/gdscript/doc_classes/@GDScript.xml @@ -4,8 +4,8 @@ Built-in GDScript constants, functions, and annotations. </brief_description> <description> - A list of GDScript-specific utility functions and annotations accessible from any script. - For the list of the global functions and constants see [@GlobalScope]. + A list of utility functions and annotations accessible from any script written in GDScript. + For the list of global functions and constants that can be accessed in any scripting language, see [@GlobalScope]. </description> <tutorials> <link title="GDScript exports">$DOCS_URL/tutorials/scripting/gdscript/gdscript_exports.html</link> @@ -61,7 +61,7 @@ <method name="convert" deprecated="Use [method @GlobalScope.type_convert] instead."> <return type="Variant" /> <param index="0" name="what" type="Variant" /> - <param index="1" name="type" type="int" /> + <param index="1" name="type" type="int" enum="Variant.Type" /> <description> Converts [param what] to [param type] in the best way possible. The [param type] uses the [enum Variant.Type] values. [codeblock] diff --git a/modules/gdscript/editor/gdscript_docgen.cpp b/modules/gdscript/editor/gdscript_docgen.cpp index 32ef429b0d..758887a723 100644 --- a/modules/gdscript/editor/gdscript_docgen.cpp +++ b/modules/gdscript/editor/gdscript_docgen.cpp @@ -217,7 +217,7 @@ String GDScriptDocGen::_docvalue_from_variant(const Variant &p_variant, int p_re List<Variant> keys; dict.get_key_list(&keys); - keys.sort(); + keys.sort_custom<StringLikeVariantOrder>(); for (List<Variant>::Element *E = keys.front(); E; E = E->next()) { if (E->prev()) { diff --git a/modules/gdscript/editor/gdscript_highlighter.cpp b/modules/gdscript/editor/gdscript_highlighter.cpp index d765cfa1ea..0b12f2ff76 100644 --- a/modules/gdscript/editor/gdscript_highlighter.cpp +++ b/modules/gdscript/editor/gdscript_highlighter.cpp @@ -701,7 +701,9 @@ void GDScriptSyntaxHighlighter::_update_cache() { List<StringName> types; ClassDB::get_class_list(&types); for (const StringName &E : types) { - class_names[E] = types_color; + if (ClassDB::is_class_exposed(E)) { + class_names[E] = types_color; + } } /* User types. */ diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp index 7b9aa70686..64287cce99 100644 --- a/modules/gdscript/gdscript.cpp +++ b/modules/gdscript/gdscript.cpp @@ -76,9 +76,16 @@ bool GDScriptNativeClass::_get(const StringName &p_name, Variant &r_ret) const { if (ok) { r_ret = v; return true; - } else { - return false; } + + MethodBind *method = ClassDB::get_method(name, p_name); + if (method && method->is_static()) { + // Native static method. + r_ret = Callable(this, p_name); + return true; + } + + return false; } void GDScriptNativeClass::_bind_methods() { @@ -693,10 +700,16 @@ void GDScript::_static_default_init() { continue; } if (type.builtin_type == Variant::ARRAY && type.has_container_element_type(0)) { + const GDScriptDataType element_type = type.get_container_element_type(0); Array default_value; - const GDScriptDataType &element_type = type.get_container_element_type(0); default_value.set_typed(element_type.builtin_type, element_type.native_type, element_type.script_type); static_variables.write[E.value.index] = default_value; + } else if (type.builtin_type == Variant::DICTIONARY && type.has_container_element_types()) { + const GDScriptDataType key_type = type.get_container_element_type_or_variant(0); + const GDScriptDataType value_type = type.get_container_element_type_or_variant(1); + Dictionary default_value; + default_value.set_typed(key_type.builtin_type, key_type.native_type, key_type.script_type, value_type.builtin_type, value_type.native_type, value_type.script_type); + static_variables.write[E.value.index] = default_value; } else { Variant default_value; Callable::CallError err; @@ -2540,11 +2553,11 @@ void GDScriptLanguage::reload_all_scripts() { } } } -#endif +#endif // TOOLS_ENABLED } reload_scripts(scripts, true); -#endif +#endif // DEBUG_ENABLED } void GDScriptLanguage::reload_scripts(const Array &p_scripts, bool p_soft_reload) { @@ -2614,7 +2627,7 @@ void GDScriptLanguage::reload_scripts(const Array &p_scripts, bool p_soft_reload } } -#endif +#endif // TOOLS_ENABLED for (const KeyValue<ObjectID, List<Pair<StringName, Variant>>> &F : scr->pending_reload_state) { map[F.key] = F.value; //pending to reload, use this one instead @@ -2682,7 +2695,7 @@ void GDScriptLanguage::reload_scripts(const Array &p_scripts, bool p_soft_reload //if instance states were saved, set them! } -#endif +#endif // DEBUG_ENABLED } void GDScriptLanguage::reload_tool_script(const Ref<Script> &p_script, bool p_soft_reload) { diff --git a/modules/gdscript/gdscript_analyzer.cpp b/modules/gdscript/gdscript_analyzer.cpp index 4a3a3a4b61..93d4a512a9 100644 --- a/modules/gdscript/gdscript_analyzer.cpp +++ b/modules/gdscript/gdscript_analyzer.cpp @@ -940,8 +940,8 @@ void GDScriptAnalyzer::resolve_class_member(GDScriptParser::ClassNode *p_class, Finally finally([&]() { ensure_cached_external_parser_for_class(member.get_datatype().class_type, p_class, "Trying to resolve datatype of class member", p_source); GDScriptParser::DataType member_type = member.get_datatype(); - if (member_type.has_container_element_type(0)) { - ensure_cached_external_parser_for_class(member_type.get_container_element_type(0).class_type, p_class, "Trying to resolve datatype of class member", p_source); + for (int i = 0; i < member_type.get_container_element_type_count(); ++i) { + ensure_cached_external_parser_for_class(member_type.get_container_element_type(i).class_type, p_class, "Trying to resolve datatype of class member", p_source); } }); @@ -3816,6 +3816,12 @@ GDScriptParser::DataType GDScriptAnalyzer::make_global_class_meta_type(const Str } Ref<GDScriptParserRef> GDScriptAnalyzer::ensure_cached_external_parser_for_class(const GDScriptParser::ClassNode *p_class, const GDScriptParser::ClassNode *p_from_class, const char *p_context, const GDScriptParser::Node *p_source) { + // Delicate piece of code that intentionally doesn't use the GDScript cache or `get_depended_parser_for`. + // Search dependencies for the parser that owns `p_class` and make a cache entry for it. + // Required for how we store pointers to classes owned by other parser trees and need to call `resolve_class_member` and such on the same parser tree. + // Since https://github.com/godotengine/godot/pull/94871 there can technically be multiple parsers for the same script in the same parser tree. + // Even if unlikely, getting the wrong parser could lead to strange undefined behavior without errors. + if (p_class == nullptr) { return nullptr; } @@ -3832,8 +3838,6 @@ Ref<GDScriptParserRef> GDScriptAnalyzer::ensure_cached_external_parser_for_class p_from_class = parser->head; } - String script_path = p_class->get_datatype().script_path; - Ref<GDScriptParserRef> parser_ref; for (const GDScriptParser::ClassNode *look_class = p_from_class; look_class != nullptr; look_class = look_class->base_type.class_type) { if (parser->has_class(look_class)) { @@ -5867,7 +5871,7 @@ void GDScriptAnalyzer::is_shadowing(GDScriptParser::IdentifierNode *p_identifier parent = ClassDB::get_parent_class(parent); } } -#endif +#endif // DEBUG_ENABLED GDScriptParser::DataType GDScriptAnalyzer::get_operation_type(Variant::Operator p_operation, const GDScriptParser::DataType &p_a, bool &r_valid, const GDScriptParser::Node *p_source) { // Unary version. diff --git a/modules/gdscript/gdscript_editor.cpp b/modules/gdscript/gdscript_editor.cpp index 5689b151f0..951ae6ce99 100644 --- a/modules/gdscript/gdscript_editor.cpp +++ b/modules/gdscript/gdscript_editor.cpp @@ -3536,13 +3536,13 @@ static void _find_call_arguments(GDScriptParser::CompletionContext &p_context, c return OK; } -#else +#else // !TOOLS_ENABLED Error GDScriptLanguage::complete_code(const String &p_code, const String &p_path, Object *p_owner, List<ScriptLanguage::CodeCompletionOption> *r_options, bool &r_forced, String &r_call_hint) { return OK; } -#endif +#endif // TOOLS_ENABLED //////// END COMPLETION ////////// @@ -4127,4 +4127,4 @@ static Error _lookup_symbol_from_base(const GDScriptParser::DataType &p_base, co return ERR_CANT_RESOLVE; } -#endif +#endif // TOOLS_ENABLED diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index 111a39d730..ee8d53639c 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -244,7 +244,7 @@ void GDScriptParser::apply_pending_warnings() { pending_warnings.clear(); } -#endif +#endif // DEBUG_ENABLED void GDScriptParser::override_completion_context(const Node *p_for_node, CompletionType p_type, Node *p_node, int p_argument) { if (!for_completion) { @@ -1624,15 +1624,17 @@ GDScriptParser::AnnotationNode *GDScriptParser::parse_annotation(uint32_t p_vali valid = false; } - annotation->info = &valid_annotations[annotation->name]; + if (valid) { + annotation->info = &valid_annotations[annotation->name]; - if (!annotation->applies_to(p_valid_targets)) { - if (annotation->applies_to(AnnotationInfo::SCRIPT)) { - push_error(vformat(R"(Annotation "%s" must be at the top of the script, before "extends" and "class_name".)", annotation->name)); - } else { - push_error(vformat(R"(Annotation "%s" is not allowed in this level.)", annotation->name)); + if (!annotation->applies_to(p_valid_targets)) { + if (annotation->applies_to(AnnotationInfo::SCRIPT)) { + push_error(vformat(R"(Annotation "%s" must be at the top of the script, before "extends" and "class_name".)", annotation->name)); + } else { + push_error(vformat(R"(Annotation "%s" is not allowed in this level.)", annotation->name)); + } + valid = false; } - valid = false; } if (check(GDScriptTokenizer::Token::PARENTHESIS_OPEN)) { diff --git a/modules/gdscript/gdscript_parser.h b/modules/gdscript/gdscript_parser.h index 7f64ae902b..2ec33831a2 100644 --- a/modules/gdscript/gdscript_parser.h +++ b/modules/gdscript/gdscript_parser.h @@ -165,6 +165,10 @@ public: container_element_types.write[p_index] = DataType(p_type); } + _FORCE_INLINE_ int get_container_element_type_count() const { + return container_element_types.size(); + } + _FORCE_INLINE_ DataType get_container_element_type(int p_index) const { ERR_FAIL_INDEX_V(p_index, container_element_types.size(), get_variant_type()); return container_element_types[p_index]; diff --git a/modules/gdscript/gdscript_utility_functions.cpp b/modules/gdscript/gdscript_utility_functions.cpp index 59dd983ed2..8246069696 100644 --- a/modules/gdscript/gdscript_utility_functions.cpp +++ b/modules/gdscript/gdscript_utility_functions.cpp @@ -34,7 +34,6 @@ #include "core/io/resource_loader.h" #include "core/object/class_db.h" -#include "core/object/method_bind.h" #include "core/object/object.h" #include "core/templates/oa_hash_map.h" #include "core/templates/vector.h" @@ -42,101 +41,105 @@ #ifdef DEBUG_ENABLED -#define VALIDATE_ARG_COUNT(m_count) \ - if (p_arg_count < m_count) { \ - r_error.error = Callable::CallError::CALL_ERROR_TOO_FEW_ARGUMENTS; \ - r_error.expected = m_count; \ +#define DEBUG_VALIDATE_ARG_COUNT(m_min_count, m_max_count) \ + if (unlikely(p_arg_count < m_min_count)) { \ *r_ret = Variant(); \ + r_error.error = Callable::CallError::CALL_ERROR_TOO_FEW_ARGUMENTS; \ + r_error.expected = m_min_count; \ return; \ } \ - if (p_arg_count > m_count) { \ - r_error.error = Callable::CallError::CALL_ERROR_TOO_MANY_ARGUMENTS; \ - r_error.expected = m_count; \ + if (unlikely(p_arg_count > m_max_count)) { \ *r_ret = Variant(); \ + r_error.error = Callable::CallError::CALL_ERROR_TOO_MANY_ARGUMENTS; \ + r_error.expected = m_max_count; \ return; \ } -#define VALIDATE_ARG_INT(m_arg) \ - if (p_args[m_arg]->get_type() != Variant::INT) { \ +#define DEBUG_VALIDATE_ARG_TYPE(m_arg, m_type) \ + if (unlikely(!Variant::can_convert_strict(p_args[m_arg]->get_type(), m_type))) { \ + *r_ret = Variant(); \ + r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; \ + r_error.argument = m_arg; \ + r_error.expected = m_type; \ + return; \ + } + +#define DEBUG_VALIDATE_ARG_CUSTOM(m_arg, m_type, m_cond, m_msg) \ + if (unlikely(m_cond)) { \ + *r_ret = m_msg; \ r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; \ r_error.argument = m_arg; \ - r_error.expected = Variant::INT; \ - *r_ret = Variant(); \ + r_error.expected = m_type; \ return; \ } -#define VALIDATE_ARG_NUM(m_arg) \ - if (!p_args[m_arg]->is_num()) { \ +#else // !DEBUG_ENABLED + +#define DEBUG_VALIDATE_ARG_COUNT(m_min_count, m_max_count) +#define DEBUG_VALIDATE_ARG_TYPE(m_arg, m_type) +#define DEBUG_VALIDATE_ARG_CUSTOM(m_arg, m_type, m_cond, m_msg) + +#endif // DEBUG_ENABLED + +#define VALIDATE_ARG_CUSTOM(m_arg, m_type, m_cond, m_msg) \ + if (unlikely(m_cond)) { \ + *r_ret = m_msg; \ r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; \ r_error.argument = m_arg; \ - r_error.expected = Variant::FLOAT; \ - *r_ret = Variant(); \ + r_error.expected = m_type; \ return; \ } -#else - -#define VALIDATE_ARG_COUNT(m_count) -#define VALIDATE_ARG_INT(m_arg) -#define VALIDATE_ARG_NUM(m_arg) - -#endif +#define GDFUNC_FAIL_COND_MSG(m_cond, m_msg) \ + if (unlikely(m_cond)) { \ + *r_ret = m_msg; \ + r_error.error = Callable::CallError::CALL_ERROR_INVALID_METHOD; \ + return; \ + } struct GDScriptUtilityFunctionsDefinitions { #ifndef DISABLE_DEPRECATED static inline void convert(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(2); - VALIDATE_ARG_INT(1); + DEBUG_VALIDATE_ARG_COUNT(2, 2); + DEBUG_VALIDATE_ARG_TYPE(1, Variant::INT); + int type = *p_args[1]; - if (type < 0 || type >= Variant::VARIANT_MAX) { - *r_ret = RTR("Invalid type argument to convert(), use TYPE_* constants."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::INT; - return; + DEBUG_VALIDATE_ARG_CUSTOM(1, Variant::INT, type < 0 || type >= Variant::VARIANT_MAX, + RTR("Invalid type argument to convert(), use TYPE_* constants.")); - } else { - Variant::construct(Variant::Type(type), *r_ret, p_args, 1, r_error); - if (r_error.error != Callable::CallError::CALL_OK) { - *r_ret = vformat(RTR(R"(Cannot convert "%s" to "%s".)"), Variant::get_type_name(p_args[0]->get_type()), Variant::get_type_name(Variant::Type(type))); - } - } + Variant::construct(Variant::Type(type), *r_ret, p_args, 1, r_error); } #endif // DISABLE_DEPRECATED static inline void type_exists(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(1); + DEBUG_VALIDATE_ARG_COUNT(1, 1); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::STRING_NAME); *r_ret = ClassDB::class_exists(*p_args[0]); } static inline void _char(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(1); - VALIDATE_ARG_INT(0); + DEBUG_VALIDATE_ARG_COUNT(1, 1); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::INT); char32_t result[2] = { *p_args[0], 0 }; *r_ret = String(result); } static inline void range(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { + DEBUG_VALIDATE_ARG_COUNT(1, 3); switch (p_arg_count) { - case 0: { - r_error.error = Callable::CallError::CALL_ERROR_TOO_FEW_ARGUMENTS; - r_error.expected = 1; - *r_ret = Variant(); - } break; case 1: { - VALIDATE_ARG_NUM(0); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::INT); + int count = *p_args[0]; + Array arr; if (count <= 0) { *r_ret = arr; return; } + Error err = arr.resize(count); - if (err != OK) { - *r_ret = RTR("Cannot resize array."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_METHOD; - return; - } + GDFUNC_FAIL_COND_MSG(err != OK, RTR("Cannot resize array.")); for (int i = 0; i < count; i++) { arr[i] = i; @@ -145,8 +148,8 @@ struct GDScriptUtilityFunctionsDefinitions { *r_ret = arr; } break; case 2: { - VALIDATE_ARG_NUM(0); - VALIDATE_ARG_NUM(1); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::INT); + DEBUG_VALIDATE_ARG_TYPE(1, Variant::INT); int from = *p_args[0]; int to = *p_args[1]; @@ -156,30 +159,26 @@ struct GDScriptUtilityFunctionsDefinitions { *r_ret = arr; return; } + Error err = arr.resize(to - from); - if (err != OK) { - *r_ret = RTR("Cannot resize array."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_METHOD; - return; - } + GDFUNC_FAIL_COND_MSG(err != OK, RTR("Cannot resize array.")); + for (int i = from; i < to; i++) { arr[i - from] = i; } + *r_ret = arr; } break; case 3: { - VALIDATE_ARG_NUM(0); - VALIDATE_ARG_NUM(1); - VALIDATE_ARG_NUM(2); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::INT); + DEBUG_VALIDATE_ARG_TYPE(1, Variant::INT); + DEBUG_VALIDATE_ARG_TYPE(2, Variant::INT); int from = *p_args[0]; int to = *p_args[1]; int incr = *p_args[2]; - if (incr == 0) { - *r_ret = RTR("Step argument is zero!"); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_METHOD; - return; - } + + VALIDATE_ARG_CUSTOM(2, Variant::INT, incr == 0, RTR("Step argument is zero!")); Array arr; if (from >= to && incr > 0) { @@ -200,12 +199,7 @@ struct GDScriptUtilityFunctionsDefinitions { } Error err = arr.resize(count); - - if (err != OK) { - *r_ret = RTR("Cannot resize array."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_METHOD; - return; - } + GDFUNC_FAIL_COND_MSG(err != OK, RTR("Cannot resize array.")); if (incr > 0) { int idx = 0; @@ -221,138 +215,79 @@ struct GDScriptUtilityFunctionsDefinitions { *r_ret = arr; } break; - default: { - r_error.error = Callable::CallError::CALL_ERROR_TOO_MANY_ARGUMENTS; - r_error.expected = 3; - *r_ret = Variant(); - - } break; } } static inline void load(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(1); - if (!p_args[0]->is_string()) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::STRING; - *r_ret = Variant(); - } else { - *r_ret = ResourceLoader::load(*p_args[0]); - } + DEBUG_VALIDATE_ARG_COUNT(1, 1); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::STRING); + *r_ret = ResourceLoader::load(*p_args[0]); } static inline void inst_to_dict(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(1); + DEBUG_VALIDATE_ARG_COUNT(1, 1); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::OBJECT); if (p_args[0]->get_type() == Variant::NIL) { *r_ret = Variant(); - } else if (p_args[0]->get_type() != Variant::OBJECT) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::OBJECT; + return; + } + + Object *obj = *p_args[0]; + if (!obj) { *r_ret = Variant(); - } else { - Object *obj = *p_args[0]; - if (!obj) { - *r_ret = Variant(); + return; + } - } else if (!obj->get_script_instance() || obj->get_script_instance()->get_language() != GDScriptLanguage::get_singleton()) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::DICTIONARY; - *r_ret = RTR("Not a script with an instance"); - return; - } else { - GDScriptInstance *ins = static_cast<GDScriptInstance *>(obj->get_script_instance()); - Ref<GDScript> base = ins->get_script(); - if (base.is_null()) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::DICTIONARY; - *r_ret = RTR("Not based on a script"); - return; - } + VALIDATE_ARG_CUSTOM(0, Variant::OBJECT, + !obj->get_script_instance() || obj->get_script_instance()->get_language() != GDScriptLanguage::get_singleton(), + RTR("Not a script with an instance.")); - GDScript *p = base.ptr(); - String path = p->get_script_path(); - Vector<StringName> sname; + GDScriptInstance *inst = static_cast<GDScriptInstance *>(obj->get_script_instance()); - while (p->_owner) { - sname.push_back(p->local_name); - p = p->_owner; - } - sname.reverse(); + Ref<GDScript> base = inst->get_script(); + VALIDATE_ARG_CUSTOM(0, Variant::OBJECT, base.is_null(), RTR("Not based on a script.")); - if (!path.is_resource_file()) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::DICTIONARY; - *r_ret = Variant(); + GDScript *p = base.ptr(); + String path = p->get_script_path(); + Vector<StringName> sname; - *r_ret = RTR("Not based on a resource file"); + while (p->_owner) { + sname.push_back(p->local_name); + p = p->_owner; + } + sname.reverse(); - return; - } + VALIDATE_ARG_CUSTOM(0, Variant::OBJECT, !path.is_resource_file(), RTR("Not based on a resource file.")); - NodePath cp(sname, Vector<StringName>(), false); + NodePath cp(sname, Vector<StringName>(), false); - Dictionary d; - d["@subpath"] = cp; - d["@path"] = path; + Dictionary d; + d["@subpath"] = cp; + d["@path"] = path; - for (const KeyValue<StringName, GDScript::MemberInfo> &E : base->member_indices) { - if (!d.has(E.key)) { - d[E.key] = ins->members[E.value.index]; - } - } - *r_ret = d; + for (const KeyValue<StringName, GDScript::MemberInfo> &E : base->member_indices) { + if (!d.has(E.key)) { + d[E.key] = inst->members[E.value.index]; } } + + *r_ret = d; } static inline void dict_to_inst(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(1); - - if (p_args[0]->get_type() != Variant::DICTIONARY) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::DICTIONARY; - *r_ret = Variant(); - - return; - } + DEBUG_VALIDATE_ARG_COUNT(1, 1); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::DICTIONARY); Dictionary d = *p_args[0]; - if (!d.has("@path")) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::OBJECT; - *r_ret = RTR("Invalid instance dictionary format (missing @path)"); - - return; - } + VALIDATE_ARG_CUSTOM(0, Variant::DICTIONARY, !d.has("@path"), RTR("Invalid instance dictionary format (missing @path).")); Ref<Script> scr = ResourceLoader::load(d["@path"]); - if (!scr.is_valid()) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::OBJECT; - *r_ret = RTR("Invalid instance dictionary format (can't load script at @path)"); - return; - } + VALIDATE_ARG_CUSTOM(0, Variant::DICTIONARY, !scr.is_valid(), RTR("Invalid instance dictionary format (can't load script at @path).")); Ref<GDScript> gdscr = scr; - - if (!gdscr.is_valid()) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::OBJECT; - *r_ret = Variant(); - *r_ret = RTR("Invalid instance dictionary format (invalid script at @path)"); - return; - } + VALIDATE_ARG_CUSTOM(0, Variant::DICTIONARY, !gdscr.is_valid(), RTR("Invalid instance dictionary format (invalid script at @path).")); NodePath sub; if (d.has("@subpath")) { @@ -361,54 +296,35 @@ struct GDScriptUtilityFunctionsDefinitions { for (int i = 0; i < sub.get_name_count(); i++) { gdscr = gdscr->subclasses[sub.get_name(i)]; - if (!gdscr.is_valid()) { - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::OBJECT; - *r_ret = Variant(); - *r_ret = RTR("Invalid instance dictionary (invalid subclasses)"); - return; - } + VALIDATE_ARG_CUSTOM(0, Variant::DICTIONARY, !gdscr.is_valid(), RTR("Invalid instance dictionary (invalid subclasses).")); } - *r_ret = gdscr->_new(nullptr, -1 /*skip initializer*/, r_error); + *r_ret = gdscr->_new(nullptr, -1 /* skip initializer */, r_error); if (r_error.error != Callable::CallError::CALL_OK) { *r_ret = RTR("Cannot instantiate GDScript class."); return; } - GDScriptInstance *ins = static_cast<GDScriptInstance *>(static_cast<Object *>(*r_ret)->get_script_instance()); - Ref<GDScript> gd_ref = ins->get_script(); + GDScriptInstance *inst = static_cast<GDScriptInstance *>(static_cast<Object *>(*r_ret)->get_script_instance()); + Ref<GDScript> gd_ref = inst->get_script(); for (KeyValue<StringName, GDScript::MemberInfo> &E : gd_ref->member_indices) { if (d.has(E.key)) { - ins->members.write[E.value.index] = d[E.key]; + inst->members.write[E.value.index] = d[E.key]; } } } static inline void Color8(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - if (p_arg_count < 3) { - r_error.error = Callable::CallError::CALL_ERROR_TOO_FEW_ARGUMENTS; - r_error.expected = 3; - *r_ret = Variant(); - return; - } - if (p_arg_count > 4) { - r_error.error = Callable::CallError::CALL_ERROR_TOO_MANY_ARGUMENTS; - r_error.expected = 4; - *r_ret = Variant(); - return; - } - - VALIDATE_ARG_INT(0); - VALIDATE_ARG_INT(1); - VALIDATE_ARG_INT(2); + DEBUG_VALIDATE_ARG_COUNT(3, 4); + DEBUG_VALIDATE_ARG_TYPE(0, Variant::INT); + DEBUG_VALIDATE_ARG_TYPE(1, Variant::INT); + DEBUG_VALIDATE_ARG_TYPE(2, Variant::INT); Color color((int64_t)*p_args[0] / 255.0f, (int64_t)*p_args[1] / 255.0f, (int64_t)*p_args[2] / 255.0f); if (p_arg_count == 4) { - VALIDATE_ARG_INT(3); + DEBUG_VALIDATE_ARG_TYPE(3, Variant::INT); color.a = (int64_t)*p_args[3] / 255.0f; } @@ -435,7 +351,8 @@ struct GDScriptUtilityFunctionsDefinitions { } static inline void print_stack(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(0); + DEBUG_VALIDATE_ARG_COUNT(0, 0); + if (Thread::get_caller_id() != Thread::get_main_id()) { print_line("Cannot retrieve debug info outside the main thread. Thread ID: " + itos(Thread::get_caller_id())); return; @@ -449,7 +366,8 @@ struct GDScriptUtilityFunctionsDefinitions { } static inline void get_stack(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(0); + DEBUG_VALIDATE_ARG_COUNT(0, 0); + if (Thread::get_caller_id() != Thread::get_main_id()) { *r_ret = TypedArray<Dictionary>(); return; @@ -468,7 +386,7 @@ struct GDScriptUtilityFunctionsDefinitions { } static inline void len(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(1); + DEBUG_VALIDATE_ARG_COUNT(1, 1); switch (p_args[0]->get_type()) { case Variant::STRING: case Variant::STRING_NAME: { @@ -524,56 +442,34 @@ struct GDScriptUtilityFunctionsDefinitions { *r_ret = d.size(); } break; default: { + *r_ret = vformat(RTR("Value of type '%s' can't provide a length."), Variant::get_type_name(p_args[0]->get_type())); r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; r_error.argument = 0; r_error.expected = Variant::NIL; - *r_ret = vformat(RTR("Value of type '%s' can't provide a length."), Variant::get_type_name(p_args[0]->get_type())); - } + } break; } } static inline void is_instance_of(Variant *r_ret, const Variant **p_args, int p_arg_count, Callable::CallError &r_error) { - VALIDATE_ARG_COUNT(2); + DEBUG_VALIDATE_ARG_COUNT(2, 2); if (p_args[1]->get_type() == Variant::INT) { int builtin_type = *p_args[1]; - if (builtin_type < 0 || builtin_type >= Variant::VARIANT_MAX) { - *r_ret = RTR("Invalid type argument for is_instance_of(), use TYPE_* constants for built-in types."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 1; - r_error.expected = Variant::NIL; - return; - } + DEBUG_VALIDATE_ARG_CUSTOM(1, Variant::NIL, builtin_type < 0 || builtin_type >= Variant::VARIANT_MAX, + RTR("Invalid type argument for is_instance_of(), use TYPE_* constants for built-in types.")); *r_ret = p_args[0]->get_type() == builtin_type; return; } bool was_type_freed = false; Object *type_object = p_args[1]->get_validated_object_with_check(was_type_freed); - if (was_type_freed) { - *r_ret = RTR("Type argument is a previously freed instance."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 1; - r_error.expected = Variant::NIL; - return; - } - if (!type_object) { - *r_ret = RTR("Invalid type argument for is_instance_of(), should be a TYPE_* constant, a class or a script."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 1; - r_error.expected = Variant::NIL; - return; - } + VALIDATE_ARG_CUSTOM(1, Variant::NIL, was_type_freed, RTR("Type argument is a previously freed instance.")); + VALIDATE_ARG_CUSTOM(1, Variant::NIL, !type_object, + RTR("Invalid type argument for is_instance_of(), should be a TYPE_* constant, a class or a script.")); bool was_value_freed = false; Object *value_object = p_args[0]->get_validated_object_with_check(was_value_freed); - if (was_value_freed) { - *r_ret = RTR("Value argument is a previously freed instance."); - r_error.error = Callable::CallError::CALL_ERROR_INVALID_ARGUMENT; - r_error.argument = 0; - r_error.expected = Variant::NIL; - return; - } + VALIDATE_ARG_CUSTOM(0, Variant::NIL, was_value_freed, RTR("Value argument is a previously freed instance.")); if (!value_object) { *r_ret = false; return; @@ -618,113 +514,77 @@ struct GDScriptUtilityFunctionInfo { static OAHashMap<StringName, GDScriptUtilityFunctionInfo> utility_function_table; static List<StringName> utility_function_name_table; -static void _register_function(const String &p_name, const MethodInfo &p_method_info, GDScriptUtilityFunctions::FunctionPtr p_function, bool p_is_const) { - StringName sname(p_name); - - ERR_FAIL_COND(utility_function_table.has(sname)); +static void _register_function(const StringName &p_name, const MethodInfo &p_method_info, GDScriptUtilityFunctions::FunctionPtr p_function, bool p_is_const) { + ERR_FAIL_COND(utility_function_table.has(p_name)); GDScriptUtilityFunctionInfo function; function.function = p_function; function.info = p_method_info; function.is_constant = p_is_const; - utility_function_table.insert(sname, function); - utility_function_name_table.push_back(sname); + utility_function_table.insert(p_name, function); + utility_function_name_table.push_back(p_name); } -#define REGISTER_FUNC(m_func, m_is_const, m_return_type, ...) \ +#define REGISTER_FUNC(m_func, m_is_const, m_return, m_args, m_is_vararg, m_default_args) \ { \ String name(#m_func); \ if (name.begins_with("_")) { \ - name = name.substr(1, name.length() - 1); \ + name = name.substr(1); \ } \ - MethodInfo info = MethodInfo(name, __VA_ARGS__); \ - info.return_val.type = m_return_type; \ - _register_function(name, info, GDScriptUtilityFunctionsDefinitions::m_func, m_is_const); \ - } - -#define REGISTER_FUNC_NO_ARGS(m_func, m_is_const, m_return_type) \ - { \ - String name(#m_func); \ - if (name.begins_with("_")) { \ - name = name.substr(1, name.length() - 1); \ + MethodInfo info = m_args; \ + info.name = name; \ + info.return_val = m_return; \ + info.default_arguments = m_default_args; \ + if (m_is_vararg) { \ + info.flags |= METHOD_FLAG_VARARG; \ } \ - MethodInfo info = MethodInfo(name); \ - info.return_val.type = m_return_type; \ _register_function(name, info, GDScriptUtilityFunctionsDefinitions::m_func, m_is_const); \ } -#define REGISTER_VARARG_FUNC(m_func, m_is_const, m_return_type) \ - { \ - String name(#m_func); \ - if (name.begins_with("_")) { \ - name = name.substr(1, name.length() - 1); \ - } \ - MethodInfo info = MethodInfo(name); \ - info.return_val.type = m_return_type; \ - info.flags |= METHOD_FLAG_VARARG; \ - _register_function(name, info, GDScriptUtilityFunctionsDefinitions::m_func, m_is_const); \ - } +#define RET(m_type) \ + PropertyInfo(Variant::m_type, "") -#define REGISTER_VARIANT_FUNC(m_func, m_is_const, ...) \ - { \ - String name(#m_func); \ - if (name.begins_with("_")) { \ - name = name.substr(1, name.length() - 1); \ - } \ - MethodInfo info = MethodInfo(name, __VA_ARGS__); \ - info.return_val.type = Variant::NIL; \ - info.return_val.usage |= PROPERTY_USAGE_NIL_IS_VARIANT; \ - _register_function(name, info, GDScriptUtilityFunctionsDefinitions::m_func, m_is_const); \ - } +#define RETVAR \ + PropertyInfo(Variant::NIL, "", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NIL_IS_VARIANT) -#define REGISTER_CLASS_FUNC(m_func, m_is_const, m_return_type, ...) \ - { \ - String name(#m_func); \ - if (name.begins_with("_")) { \ - name = name.substr(1, name.length() - 1); \ - } \ - MethodInfo info = MethodInfo(name, __VA_ARGS__); \ - info.return_val.type = Variant::OBJECT; \ - info.return_val.hint = PROPERTY_HINT_RESOURCE_TYPE; \ - info.return_val.class_name = m_return_type; \ - _register_function(name, info, GDScriptUtilityFunctionsDefinitions::m_func, m_is_const); \ - } +#define RETCLS(m_class) \ + PropertyInfo(Variant::OBJECT, "", PROPERTY_HINT_RESOURCE_TYPE, m_class) -#define REGISTER_FUNC_DEF(m_func, m_is_const, m_default, m_return_type, ...) \ - { \ - String name(#m_func); \ - if (name.begins_with("_")) { \ - name = name.substr(1, name.length() - 1); \ - } \ - MethodInfo info = MethodInfo(name, __VA_ARGS__); \ - info.return_val.type = m_return_type; \ - info.default_arguments.push_back(m_default); \ - _register_function(name, info, GDScriptUtilityFunctionsDefinitions::m_func, m_is_const); \ - } +#define NOARGS \ + MethodInfo() + +#define ARGS(...) \ + MethodInfo("", __VA_ARGS__) #define ARG(m_name, m_type) \ - PropertyInfo(m_type, m_name) + PropertyInfo(Variant::m_type, m_name) + +#define ARGVAR(m_name) \ + PropertyInfo(Variant::NIL, m_name, PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NIL_IS_VARIANT) -#define VARARG(m_name) \ - PropertyInfo(Variant::NIL, m_name, PROPERTY_HINT_NONE, "", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_NIL_IS_VARIANT) +#define ARGTYPE(m_name) \ + PropertyInfo(Variant::INT, m_name, PROPERTY_HINT_NONE, "", PROPERTY_USAGE_CLASS_IS_ENUM, "Variant.Type") void GDScriptUtilityFunctions::register_functions() { + /* clang-format off */ #ifndef DISABLE_DEPRECATED - REGISTER_VARIANT_FUNC(convert, true, VARARG("what"), ARG("type", Variant::INT)); + REGISTER_FUNC( convert, true, RETVAR, ARGS( ARGVAR("what"), ARGTYPE("type") ), false, varray( )); #endif // DISABLE_DEPRECATED - REGISTER_FUNC(type_exists, true, Variant::BOOL, ARG("type", Variant::STRING_NAME)); - REGISTER_FUNC(_char, true, Variant::STRING, ARG("char", Variant::INT)); - REGISTER_VARARG_FUNC(range, false, Variant::ARRAY); - REGISTER_CLASS_FUNC(load, false, "Resource", ARG("path", Variant::STRING)); - REGISTER_FUNC(inst_to_dict, false, Variant::DICTIONARY, ARG("instance", Variant::OBJECT)); - REGISTER_FUNC(dict_to_inst, false, Variant::OBJECT, ARG("dictionary", Variant::DICTIONARY)); - REGISTER_FUNC_DEF(Color8, true, 255, Variant::COLOR, ARG("r8", Variant::INT), ARG("g8", Variant::INT), ARG("b8", Variant::INT), ARG("a8", Variant::INT)); - REGISTER_VARARG_FUNC(print_debug, false, Variant::NIL); - REGISTER_FUNC_NO_ARGS(print_stack, false, Variant::NIL); - REGISTER_FUNC_NO_ARGS(get_stack, false, Variant::ARRAY); - REGISTER_FUNC(len, true, Variant::INT, VARARG("var")); - REGISTER_FUNC(is_instance_of, true, Variant::BOOL, VARARG("value"), VARARG("type")); + REGISTER_FUNC( type_exists, true, RET(BOOL), ARGS( ARG("type", STRING_NAME) ), false, varray( )); + REGISTER_FUNC( _char, true, RET(STRING), ARGS( ARG("char", INT) ), false, varray( )); + REGISTER_FUNC( range, false, RET(ARRAY), NOARGS, true, varray( )); + REGISTER_FUNC( load, false, RETCLS("Resource"), ARGS( ARG("path", STRING) ), false, varray( )); + REGISTER_FUNC( inst_to_dict, false, RET(DICTIONARY), ARGS( ARG("instance", OBJECT) ), false, varray( )); + REGISTER_FUNC( dict_to_inst, false, RET(OBJECT), ARGS( ARG("dictionary", DICTIONARY) ), false, varray( )); + REGISTER_FUNC( Color8, true, RET(COLOR), ARGS( ARG("r8", INT), ARG("g8", INT), + ARG("b8", INT), ARG("a8", INT) ), false, varray( 255 )); + REGISTER_FUNC( print_debug, false, RET(NIL), NOARGS, true, varray( )); + REGISTER_FUNC( print_stack, false, RET(NIL), NOARGS, false, varray( )); + REGISTER_FUNC( get_stack, false, RET(ARRAY), NOARGS, false, varray( )); + REGISTER_FUNC( len, true, RET(INT), ARGS( ARGVAR("var") ), false, varray( )); + REGISTER_FUNC( is_instance_of, true, RET(BOOL), ARGS( ARGVAR("value"), ARGVAR("type") ), false, varray( )); + /* clang-format on */ } void GDScriptUtilityFunctions::unregister_functions() { diff --git a/modules/gdscript/gdscript_vm.cpp b/modules/gdscript/gdscript_vm.cpp index d8139d913a..26c5cfe23c 100644 --- a/modules/gdscript/gdscript_vm.cpp +++ b/modules/gdscript/gdscript_vm.cpp @@ -397,32 +397,36 @@ void (*type_init_function_table[])(Variant *) = { #define OPCODES_OUT \ OPSOUT: #define OPCODE_SWITCH(m_test) goto *switch_table_ops[m_test]; + #ifdef DEBUG_ENABLED #define DISPATCH_OPCODE \ last_opcode = _code_ptr[ip]; \ goto *switch_table_ops[last_opcode] -#else +#else // !DEBUG_ENABLED #define DISPATCH_OPCODE goto *switch_table_ops[_code_ptr[ip]] -#endif +#endif // DEBUG_ENABLED + #define OPCODE_BREAK goto OPSEXIT #define OPCODE_OUT goto OPSOUT -#else +#else // !(defined(__GNUC__) || defined(__clang__)) #define OPCODES_TABLE #define OPCODE(m_op) case m_op: #define OPCODE_WHILE(m_test) while (m_test) #define OPCODES_END #define OPCODES_OUT #define DISPATCH_OPCODE continue + #ifdef _MSC_VER #define OPCODE_SWITCH(m_test) \ __assume(m_test <= OPCODE_END); \ switch (m_test) -#else +#else // !_MSC_VER #define OPCODE_SWITCH(m_test) switch (m_test) -#endif +#endif // _MSC_VER + #define OPCODE_BREAK break #define OPCODE_OUT break -#endif +#endif // defined(__GNUC__) || defined(__clang__) // Helpers for VariantInternal methods in macros. #define OP_GET_BOOL get_bool @@ -663,7 +667,7 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a OPCODE_BREAK; \ } -#else +#else // !DEBUG_ENABLED #define GD_ERR_BREAK(m_cond) #define CHECK_SPACE(m_space) @@ -676,7 +680,7 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a OPCODE_BREAK; \ } -#endif +#endif // DEBUG_ENABLED #define LOAD_INSTRUCTION_ARGS \ int instr_arg_count = _code_ptr[ip + 1]; \ @@ -1965,7 +1969,7 @@ Variant GDScriptFunction::call(GDScriptInstance *p_instance, const Variant **p_a err_text = _get_call_error("function '" + methodstr + (is_callable ? "" : "' in base '" + basestr) + "'", (const Variant **)argptrs, temp_ret, err); OPCODE_BREAK; } -#endif +#endif // DEBUG_ENABLED ip += 3; } diff --git a/modules/gdscript/tests/scripts/parser/errors/annotation_inapplicable.gd b/modules/gdscript/tests/scripts/parser/errors/annotation_inapplicable.gd new file mode 100644 index 0000000000..cfacb6a010 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/errors/annotation_inapplicable.gd @@ -0,0 +1,3 @@ +@export +func test(): + pass diff --git a/modules/gdscript/tests/scripts/parser/errors/annotation_inapplicable.out b/modules/gdscript/tests/scripts/parser/errors/annotation_inapplicable.out new file mode 100644 index 0000000000..ed677cd39a --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/errors/annotation_inapplicable.out @@ -0,0 +1,2 @@ +GDTEST_PARSER_ERROR +Annotation "@export" cannot be applied to a function. diff --git a/modules/gdscript/tests/scripts/parser/errors/annotation_unrecognized.gd b/modules/gdscript/tests/scripts/parser/errors/annotation_unrecognized.gd new file mode 100644 index 0000000000..a6b171a428 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/errors/annotation_unrecognized.gd @@ -0,0 +1,3 @@ +@hello_world +func test(): + pass diff --git a/modules/gdscript/tests/scripts/parser/errors/annotation_unrecognized.out b/modules/gdscript/tests/scripts/parser/errors/annotation_unrecognized.out new file mode 100644 index 0000000000..540e66f15b --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/errors/annotation_unrecognized.out @@ -0,0 +1,2 @@ +GDTEST_PARSER_ERROR +Unrecognized annotation: "@hello_world". diff --git a/modules/gdscript/tests/scripts/runtime/features/gdscript_utility_implicit_conversion.gd b/modules/gdscript/tests/scripts/runtime/features/gdscript_utility_implicit_conversion.gd new file mode 100644 index 0000000000..59bdb6eceb --- /dev/null +++ b/modules/gdscript/tests/scripts/runtime/features/gdscript_utility_implicit_conversion.gd @@ -0,0 +1,12 @@ +func test(): + const COLOR = Color8(255, 0.0, false) + var false_value := false + @warning_ignore("narrowing_conversion") + var color = Color8(255, 0.0, false_value) + print(var_to_str(COLOR)) + print(var_to_str(color)) + + var string := "Node" + var string_name := &"Node" + print(type_exists(string)) + print(type_exists(string_name)) diff --git a/modules/gdscript/tests/scripts/runtime/features/gdscript_utility_implicit_conversion.out b/modules/gdscript/tests/scripts/runtime/features/gdscript_utility_implicit_conversion.out new file mode 100644 index 0000000000..00913faa49 --- /dev/null +++ b/modules/gdscript/tests/scripts/runtime/features/gdscript_utility_implicit_conversion.out @@ -0,0 +1,5 @@ +GDTEST_OK +Color(1, 0, 0, 1) +Color(1, 0, 0, 1) +true +true diff --git a/modules/gdscript/tests/scripts/runtime/features/native_static_method_as_callable.gd b/modules/gdscript/tests/scripts/runtime/features/native_static_method_as_callable.gd new file mode 100644 index 0000000000..63d5935d1e --- /dev/null +++ b/modules/gdscript/tests/scripts/runtime/features/native_static_method_as_callable.gd @@ -0,0 +1,8 @@ +func get_parse_string(t: Variant): + return t.parse_string + +func test(): + var a: Callable = JSON.parse_string + var b: Callable = get_parse_string(JSON) + prints(a.call("{\"test\": \"a\"}"), a.is_valid()) + prints(b.call("{\"test\": \"b\"}"), b.is_valid()) diff --git a/modules/gdscript/tests/scripts/runtime/features/native_static_method_as_callable.out b/modules/gdscript/tests/scripts/runtime/features/native_static_method_as_callable.out new file mode 100644 index 0000000000..a2cb4b9a07 --- /dev/null +++ b/modules/gdscript/tests/scripts/runtime/features/native_static_method_as_callable.out @@ -0,0 +1,3 @@ +GDTEST_OK +{ "test": "a" } false +{ "test": "b" } false diff --git a/modules/gltf/gltf_document.cpp b/modules/gltf/gltf_document.cpp index 992075e980..0a487430a3 100644 --- a/modules/gltf/gltf_document.cpp +++ b/modules/gltf/gltf_document.cpp @@ -613,12 +613,8 @@ Error GLTFDocument::_parse_nodes(Ref<GLTFState> p_state) { if (n.has("scale")) { node->set_scale(_arr_to_vec3(n["scale"])); } - - Transform3D godot_rest_transform; - godot_rest_transform.basis.set_quaternion_scale(node->transform.basis.get_rotation_quaternion(), node->transform.basis.get_scale()); - godot_rest_transform.origin = node->transform.origin; - node->set_additional_data("GODOT_rest_transform", godot_rest_transform); } + node->set_additional_data("GODOT_rest_transform", node->transform); if (n.has("extensions")) { Dictionary extensions = n["extensions"]; diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/EventSignals_ScriptSignals.generated.cs b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/EventSignals_ScriptSignals.generated.cs index 188972e6fe..f54058b0d9 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/EventSignals_ScriptSignals.generated.cs +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/EventSignals_ScriptSignals.generated.cs @@ -32,7 +32,7 @@ partial class EventSignals add => backing_MySignal += value; remove => backing_MySignal -= value; } - protected void OnMySignal(string str, int num) + protected void EmitSignalMySignal(string str, int num) { EmitSignal(SignalName.MySignal, str, num); } diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedFields_ScriptProperties.generated.cs b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedFields_ScriptProperties.generated.cs index c734dc7be1..ffde135930 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedFields_ScriptProperties.generated.cs +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedFields_ScriptProperties.generated.cs @@ -807,7 +807,7 @@ partial class ExportedFields properties.Add(new(type: (global::Godot.Variant.Type)23, name: PropertyName.@_fieldRid, hint: (global::Godot.PropertyHint)0, hintString: "", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); properties.Add(new(type: (global::Godot.Variant.Type)27, name: PropertyName.@_fieldGodotDictionary, hint: (global::Godot.PropertyHint)0, hintString: "", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); properties.Add(new(type: (global::Godot.Variant.Type)28, name: PropertyName.@_fieldGodotArray, hint: (global::Godot.PropertyHint)0, hintString: "", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); - properties.Add(new(type: (global::Godot.Variant.Type)27, name: PropertyName.@_fieldGodotGenericDictionary, hint: (global::Godot.PropertyHint)38, hintString: "4/0:;1/0:", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); + properties.Add(new(type: (global::Godot.Variant.Type)27, name: PropertyName.@_fieldGodotGenericDictionary, hint: (global::Godot.PropertyHint)23, hintString: "4/0:;1/0:", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); properties.Add(new(type: (global::Godot.Variant.Type)28, name: PropertyName.@_fieldGodotGenericArray, hint: (global::Godot.PropertyHint)23, hintString: "2/0:", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); properties.Add(new(type: (global::Godot.Variant.Type)31, name: PropertyName.@_fieldEmptyInt64Array, hint: (global::Godot.PropertyHint)0, hintString: "", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); return properties; diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedProperties_ScriptProperties.generated.cs b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedProperties_ScriptProperties.generated.cs index 0de840aa34..94d447f61a 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedProperties_ScriptProperties.generated.cs +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/GeneratedSources/ExportedProperties_ScriptProperties.generated.cs @@ -925,7 +925,7 @@ partial class ExportedProperties properties.Add(new(type: (global::Godot.Variant.Type)23, name: PropertyName.@PropertyRid, hint: (global::Godot.PropertyHint)0, hintString: "", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); properties.Add(new(type: (global::Godot.Variant.Type)27, name: PropertyName.@PropertyGodotDictionary, hint: (global::Godot.PropertyHint)0, hintString: "", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); properties.Add(new(type: (global::Godot.Variant.Type)28, name: PropertyName.@PropertyGodotArray, hint: (global::Godot.PropertyHint)0, hintString: "", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); - properties.Add(new(type: (global::Godot.Variant.Type)27, name: PropertyName.@PropertyGodotGenericDictionary, hint: (global::Godot.PropertyHint)38, hintString: "4/0:;1/0:", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); + properties.Add(new(type: (global::Godot.Variant.Type)27, name: PropertyName.@PropertyGodotGenericDictionary, hint: (global::Godot.PropertyHint)23, hintString: "4/0:;1/0:", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); properties.Add(new(type: (global::Godot.Variant.Type)28, name: PropertyName.@PropertyGodotGenericArray, hint: (global::Godot.PropertyHint)23, hintString: "2/0:", usage: (global::Godot.PropertyUsageFlags)4102, exported: true)); return properties; } diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/Sources/MustBeVariant.GD0301.cs b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/Sources/MustBeVariant.GD0301.cs index 053ae36ae7..5654060d38 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/Sources/MustBeVariant.GD0301.cs +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators.Tests/TestData/Sources/MustBeVariant.GD0301.cs @@ -395,6 +395,11 @@ public class MustBeVariantAnnotatedMethods public void MethodWithWrongAttribute() { } + + [NestedGenericTypeAttributeContainer.NestedGenericTypeAttribute<bool>()] + public void MethodWithNestedAttribute() + { + } } [GenericTypeAttribute<bool>()] @@ -657,3 +662,11 @@ public class ClassNonVariantAnnotated public class GenericTypeAttribute<[MustBeVariant] T> : Attribute { } + +public class NestedGenericTypeAttributeContainer +{ + [AttributeUsage(AttributeTargets.Method, AllowMultiple = true)] + public class NestedGenericTypeAttribute<[MustBeVariant] T> : Attribute + { + } +} diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/MustBeVariantAnalyzer.cs b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/MustBeVariantAnalyzer.cs index e894e7a86c..a72a8c5880 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/MustBeVariantAnalyzer.cs +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/MustBeVariantAnalyzer.cs @@ -135,7 +135,7 @@ namespace Godot.SourceGenerators { ITypeParameterSymbol? typeParamSymbol = parentSymbol switch { - IMethodSymbol methodSymbol when parentSyntax.Parent is AttributeSyntax && + IMethodSymbol methodSymbol when parentSyntax.Ancestors().Any(s => s is AttributeSyntax) && methodSymbol.ContainingType.TypeParameters.Length > 0 => methodSymbol.ContainingType.TypeParameters[typeArgumentIndex], diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptPropertiesGenerator.cs b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptPropertiesGenerator.cs index 0f86b3b91c..fc67e4f592 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptPropertiesGenerator.cs +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptPropertiesGenerator.cs @@ -791,7 +791,7 @@ namespace Godot.SourceGenerators } } - hint = PropertyHint.DictionaryType; + hint = PropertyHint.TypeString; hintString = keyHintString != null && valueHintString != null ? $"{keyHintString};{valueHintString}" : null; return hintString != null; diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptSignalsGenerator.cs b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptSignalsGenerator.cs index 0dda43ab4c..702c50d461 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptSignalsGenerator.cs +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.SourceGenerators/ScriptSignalsGenerator.cs @@ -282,7 +282,7 @@ namespace Godot.SourceGenerators .Append(" -= value;\n") .Append("}\n"); - // Generate On{EventName} method to raise the event + // Generate EmitSignal{EventName} method to raise the event var invokeMethodSymbol = signalDelegate.InvokeMethodData.Method; int paramCount = invokeMethodSymbol.Parameters.Length; @@ -291,7 +291,7 @@ namespace Godot.SourceGenerators "private" : "protected"; - source.Append($" {raiseMethodModifiers} void On{signalName}("); + source.Append($" {raiseMethodModifiers} void EmitSignal{signalName}("); for (int i = 0; i < paramCount; i++) { var paramSymbol = invokeMethodSymbol.Parameters[i]; diff --git a/modules/mono/editor/bindings_generator.cpp b/modules/mono/editor/bindings_generator.cpp index 946e997c1b..e97229c621 100644 --- a/modules/mono/editor/bindings_generator.cpp +++ b/modules/mono/editor/bindings_generator.cpp @@ -3275,10 +3275,10 @@ Error BindingsGenerator::_generate_cs_signal(const BindingsGenerator::TypeInterf p_output.append(CLOSE_BLOCK_L1); - // Generate On{EventName} method to raise the event. + // Generate EmitSignal{EventName} method to raise the event. if (!p_itype.is_singleton) { p_output.append(MEMBER_BEGIN "protected void "); - p_output << "On" << p_isignal.proxy_name; + p_output << "EmitSignal" << p_isignal.proxy_name; if (is_parameterless) { p_output.append("()\n" OPEN_BLOCK_L1 INDENT2); p_output << "EmitSignal(SignalName." << p_isignal.proxy_name << ");\n"; diff --git a/modules/navigation/nav_map.cpp b/modules/navigation/nav_map.cpp index dd77e81b45..8df1db533d 100644 --- a/modules/navigation/nav_map.cpp +++ b/modules/navigation/nav_map.cpp @@ -480,6 +480,8 @@ void NavMap::sync() { // connection, integration and path finding. _new_pm_edge_free_count = free_edges.size(); + real_t sqr_edge_connection_margin = edge_connection_margin * edge_connection_margin; + for (int i = 0; i < free_edges.size(); i++) { const gd::Edge::Connection &free_edge = free_edges[i]; Vector3 edge_p1 = free_edge.polygon->points[free_edge.edge].pos; @@ -510,7 +512,7 @@ void NavMap::sync() { } else { other1 = other_edge_p1.lerp(other_edge_p2, (1.0 - projected_p1_ratio) / (projected_p2_ratio - projected_p1_ratio)); } - if (other1.distance_to(self1) > edge_connection_margin) { + if (other1.distance_squared_to(self1) > sqr_edge_connection_margin) { continue; } @@ -521,7 +523,7 @@ void NavMap::sync() { } else { other2 = other_edge_p1.lerp(other_edge_p2, (0.0 - projected_p1_ratio) / (projected_p2_ratio - projected_p1_ratio)); } - if (other2.distance_to(self2) > edge_connection_margin) { + if (other2.distance_squared_to(self2) > sqr_edge_connection_margin) { continue; } @@ -549,11 +551,11 @@ void NavMap::sync() { const Vector3 end = link->get_end_position(); gd::Polygon *closest_start_polygon = nullptr; - real_t closest_start_distance = link_connection_radius; + real_t closest_start_sqr_dist = link_connection_radius * link_connection_radius; Vector3 closest_start_point; gd::Polygon *closest_end_polygon = nullptr; - real_t closest_end_distance = link_connection_radius; + real_t closest_end_sqr_dist = link_connection_radius * link_connection_radius; Vector3 closest_end_point; // Create link to any polygons within the search radius of the start point. @@ -564,11 +566,11 @@ void NavMap::sync() { for (uint32_t start_point_id = 2; start_point_id < start_poly.points.size(); start_point_id += 1) { const Face3 start_face(start_poly.points[0].pos, start_poly.points[start_point_id - 1].pos, start_poly.points[start_point_id].pos); const Vector3 start_point = start_face.get_closest_point_to(start); - const real_t start_distance = start_point.distance_to(start); + const real_t sqr_dist = start_point.distance_squared_to(start); // Pick the polygon that is within our radius and is closer than anything we've seen yet. - if (start_distance <= link_connection_radius && start_distance < closest_start_distance) { - closest_start_distance = start_distance; + if (sqr_dist < closest_start_sqr_dist) { + closest_start_sqr_dist = sqr_dist; closest_start_point = start_point; closest_start_polygon = &start_poly; } @@ -581,11 +583,11 @@ void NavMap::sync() { for (uint32_t end_point_id = 2; end_point_id < end_poly.points.size(); end_point_id += 1) { const Face3 end_face(end_poly.points[0].pos, end_poly.points[end_point_id - 1].pos, end_poly.points[end_point_id].pos); const Vector3 end_point = end_face.get_closest_point_to(end); - const real_t end_distance = end_point.distance_to(end); + const real_t sqr_dist = end_point.distance_squared_to(end); // Pick the polygon that is within our radius and is closer than anything we've seen yet. - if (end_distance <= link_connection_radius && end_distance < closest_end_distance) { - closest_end_distance = end_distance; + if (sqr_dist < closest_end_sqr_dist) { + closest_end_sqr_dist = sqr_dist; closest_end_point = end_point; closest_end_polygon = &end_poly; } diff --git a/modules/openxr/action_map/openxr_action_map.cpp b/modules/openxr/action_map/openxr_action_map.cpp index 5430a41d6d..f924386ecf 100644 --- a/modules/openxr/action_map/openxr_action_map.cpp +++ b/modules/openxr/action_map/openxr_action_map.cpp @@ -576,20 +576,15 @@ PackedStringArray OpenXRActionMap::get_top_level_paths(const Ref<OpenXRAction> p const OpenXRInteractionProfileMetadata::InteractionProfile *profile = OpenXRInteractionProfileMetadata::get_singleton()->get_profile(ip->get_interaction_profile_path()); if (profile != nullptr) { - for (int j = 0; j < ip->get_binding_count(); j++) { - Ref<OpenXRIPBinding> binding = ip->get_binding(j); - if (binding->get_action() == p_action) { - PackedStringArray paths = binding->get_paths(); - - for (int k = 0; k < paths.size(); k++) { - const OpenXRInteractionProfileMetadata::IOPath *io_path = profile->get_io_path(paths[k]); - if (io_path != nullptr) { - String top_path = io_path->top_level_path; - - if (!arr.has(top_path)) { - arr.push_back(top_path); - } - } + Vector<Ref<OpenXRIPBinding>> bindings = ip->get_bindings_for_action(p_action); + for (const Ref<OpenXRIPBinding> &binding : bindings) { + String binding_path = binding->get_binding_path(); + const OpenXRInteractionProfileMetadata::IOPath *io_path = profile->get_io_path(binding_path); + if (io_path != nullptr) { + String top_path = io_path->top_level_path; + + if (!arr.has(top_path)) { + arr.push_back(top_path); } } } diff --git a/modules/openxr/action_map/openxr_interaction_profile.cpp b/modules/openxr/action_map/openxr_interaction_profile.cpp index 1266457113..2aab55f6ec 100644 --- a/modules/openxr/action_map/openxr_interaction_profile.cpp +++ b/modules/openxr/action_map/openxr_interaction_profile.cpp @@ -35,23 +35,30 @@ void OpenXRIPBinding::_bind_methods() { ClassDB::bind_method(D_METHOD("get_action"), &OpenXRIPBinding::get_action); ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "action", PROPERTY_HINT_RESOURCE_TYPE, "OpenXRAction"), "set_action", "get_action"); - ClassDB::bind_method(D_METHOD("get_path_count"), &OpenXRIPBinding::get_path_count); + ClassDB::bind_method(D_METHOD("set_binding_path", "binding_path"), &OpenXRIPBinding::set_binding_path); + ClassDB::bind_method(D_METHOD("get_binding_path"), &OpenXRIPBinding::get_binding_path); + ADD_PROPERTY(PropertyInfo(Variant::STRING, "binding_path"), "set_binding_path", "get_binding_path"); + + // Deprecated +#ifndef DISABLE_DEPRECATED ClassDB::bind_method(D_METHOD("set_paths", "paths"), &OpenXRIPBinding::set_paths); ClassDB::bind_method(D_METHOD("get_paths"), &OpenXRIPBinding::get_paths); - ADD_PROPERTY(PropertyInfo(Variant::PACKED_STRING_ARRAY, "paths"), "set_paths", "get_paths"); + ADD_PROPERTY(PropertyInfo(Variant::PACKED_STRING_ARRAY, "paths", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NONE), "set_paths", "get_paths"); + ClassDB::bind_method(D_METHOD("get_path_count"), &OpenXRIPBinding::get_path_count); ClassDB::bind_method(D_METHOD("has_path", "path"), &OpenXRIPBinding::has_path); ClassDB::bind_method(D_METHOD("add_path", "path"), &OpenXRIPBinding::add_path); ClassDB::bind_method(D_METHOD("remove_path", "path"), &OpenXRIPBinding::remove_path); +#endif // DISABLE_DEPRECATED } -Ref<OpenXRIPBinding> OpenXRIPBinding::new_binding(const Ref<OpenXRAction> p_action, const char *p_paths) { +Ref<OpenXRIPBinding> OpenXRIPBinding::new_binding(const Ref<OpenXRAction> p_action, const String &p_binding_path) { // This is a helper function to help build our default action sets Ref<OpenXRIPBinding> binding; binding.instantiate(); binding->set_action(p_action); - binding->parse_paths(String(p_paths)); + binding->set_binding_path(p_binding_path); return binding; } @@ -65,42 +72,68 @@ Ref<OpenXRAction> OpenXRIPBinding::get_action() const { return action; } -int OpenXRIPBinding::get_path_count() const { - return paths.size(); +void OpenXRIPBinding::set_binding_path(const String &path) { + binding_path = path; + emit_changed(); } -void OpenXRIPBinding::set_paths(const PackedStringArray p_paths) { - paths = p_paths; - emit_changed(); +String OpenXRIPBinding::get_binding_path() const { + return binding_path; } -PackedStringArray OpenXRIPBinding::get_paths() const { +#ifndef DISABLE_DEPRECATED + +void OpenXRIPBinding::set_paths(const PackedStringArray p_paths) { // Deprecated, but needed for loading old action maps. + // Fallback logic, this should ONLY be called when loading older action maps. + // We'll parse this momentarily and extract individual bindings. + binding_path = ""; + for (const String &path : p_paths) { + if (!binding_path.is_empty()) { + binding_path += ","; + } + binding_path += path; + } +} + +PackedStringArray OpenXRIPBinding::get_paths() const { // Deprecated, but needed for converting old action maps. + // Fallback logic, return an array. + // If we just loaded an old action map from disc, this will be a comma separated list of actions. + // Once parsed there should be only one path in our array. + PackedStringArray paths = binding_path.split(",", false); + return paths; } -void OpenXRIPBinding::parse_paths(const String p_paths) { - paths = p_paths.split(",", false); - emit_changed(); +int OpenXRIPBinding::get_path_count() const { // Deprecated. + // Fallback logic, we only have one entry. + return binding_path.is_empty() ? 0 : 1; } -bool OpenXRIPBinding::has_path(const String p_path) const { - return paths.has(p_path); +bool OpenXRIPBinding::has_path(const String p_path) const { // Deprecated. + // Fallback logic, return true if this is our path. + return binding_path == p_path; } -void OpenXRIPBinding::add_path(const String p_path) { - if (!paths.has(p_path)) { - paths.push_back(p_path); +void OpenXRIPBinding::add_path(const String p_path) { // Deprecated. + // Fallback logic, only assign first time this is called. + if (binding_path != p_path) { + ERR_FAIL_COND_MSG(!binding_path.is_empty(), "Method add_path has been deprecated. A binding path was already set, create separate binding resources for each path and use set_binding_path instead."); + + binding_path = p_path; emit_changed(); } } -void OpenXRIPBinding::remove_path(const String p_path) { - if (paths.has(p_path)) { - paths.erase(p_path); - emit_changed(); - } +void OpenXRIPBinding::remove_path(const String p_path) { // Deprecated. + ERR_FAIL_COND_MSG(binding_path != p_path, "Method remove_path has been deprecated. Attempt at removing a different binding path, remove the correct binding record from the interaction profile instead."); + + // Fallback logic, clear if this is our path. + binding_path = p_path; + emit_changed(); } +#endif // DISABLE_DEPRECATED + OpenXRIPBinding::~OpenXRIPBinding() { action.unref(); } @@ -151,9 +184,18 @@ Ref<OpenXRIPBinding> OpenXRInteractionProfile::get_binding(int p_index) const { } void OpenXRInteractionProfile::set_bindings(Array p_bindings) { - // TODO add check here that our bindings don't contain duplicate actions + bindings.clear(); + + for (Ref<OpenXRIPBinding> binding : p_bindings) { + String binding_path = binding->get_binding_path(); + if (binding_path.find_char(',') >= 0) { + // Convert old binding approach to new... + add_new_binding(binding->get_action(), binding_path); + } else { + add_binding(binding); + } + } - bindings = p_bindings; emit_changed(); } @@ -161,10 +203,9 @@ Array OpenXRInteractionProfile::get_bindings() const { return bindings; } -Ref<OpenXRIPBinding> OpenXRInteractionProfile::get_binding_for_action(const Ref<OpenXRAction> p_action) const { - for (int i = 0; i < bindings.size(); i++) { - Ref<OpenXRIPBinding> binding = bindings[i]; - if (binding->get_action() == p_action) { +Ref<OpenXRIPBinding> OpenXRInteractionProfile::find_binding(const Ref<OpenXRAction> p_action, const String &p_binding_path) const { + for (Ref<OpenXRIPBinding> binding : bindings) { + if (binding->get_action() == p_action && binding->get_binding_path() == p_binding_path) { return binding; } } @@ -172,11 +213,23 @@ Ref<OpenXRIPBinding> OpenXRInteractionProfile::get_binding_for_action(const Ref< return Ref<OpenXRIPBinding>(); } +Vector<Ref<OpenXRIPBinding>> OpenXRInteractionProfile::get_bindings_for_action(const Ref<OpenXRAction> p_action) const { + Vector<Ref<OpenXRIPBinding>> ret_bindings; + + for (Ref<OpenXRIPBinding> binding : bindings) { + if (binding->get_action() == p_action) { + ret_bindings.push_back(binding); + } + } + + return ret_bindings; +} + void OpenXRInteractionProfile::add_binding(Ref<OpenXRIPBinding> p_binding) { ERR_FAIL_COND(p_binding.is_null()); if (!bindings.has(p_binding)) { - ERR_FAIL_COND_MSG(get_binding_for_action(p_binding->get_action()).is_valid(), "There is already a binding for this action in this interaction profile"); + ERR_FAIL_COND_MSG(find_binding(p_binding->get_action(), p_binding->get_binding_path()).is_valid(), "There is already a binding for this action and binding path in this interaction profile."); bindings.push_back(p_binding); emit_changed(); @@ -191,11 +244,15 @@ void OpenXRInteractionProfile::remove_binding(Ref<OpenXRIPBinding> p_binding) { } } -void OpenXRInteractionProfile::add_new_binding(const Ref<OpenXRAction> p_action, const char *p_paths) { +void OpenXRInteractionProfile::add_new_binding(const Ref<OpenXRAction> p_action, const String &p_paths) { // This is a helper function to help build our default action sets - Ref<OpenXRIPBinding> binding = OpenXRIPBinding::new_binding(p_action, p_paths); - add_binding(binding); + PackedStringArray paths = p_paths.split(",", false); + + for (const String &path : paths) { + Ref<OpenXRIPBinding> binding = OpenXRIPBinding::new_binding(p_action, path); + add_binding(binding); + } } void OpenXRInteractionProfile::remove_binding_for_action(const Ref<OpenXRAction> p_action) { diff --git a/modules/openxr/action_map/openxr_interaction_profile.h b/modules/openxr/action_map/openxr_interaction_profile.h index 479cc3c527..952f87a09d 100644 --- a/modules/openxr/action_map/openxr_interaction_profile.h +++ b/modules/openxr/action_map/openxr_interaction_profile.h @@ -41,26 +41,29 @@ class OpenXRIPBinding : public Resource { private: Ref<OpenXRAction> action; - PackedStringArray paths; + String binding_path; protected: static void _bind_methods(); public: - static Ref<OpenXRIPBinding> new_binding(const Ref<OpenXRAction> p_action, const char *p_paths); // Helper function for adding a new binding + static Ref<OpenXRIPBinding> new_binding(const Ref<OpenXRAction> p_action, const String &p_binding_path); // Helper function for adding a new binding. - void set_action(const Ref<OpenXRAction> p_action); // Set the action for this binding - Ref<OpenXRAction> get_action() const; // Get the action for this binding + void set_action(const Ref<OpenXRAction> p_action); // Set the action for this binding. + Ref<OpenXRAction> get_action() const; // Get the action for this binding. - int get_path_count() const; // Get the number of io paths - void set_paths(const PackedStringArray p_paths); // Set our paths (for loading from resource) - PackedStringArray get_paths() const; // Get our paths (for saving to resource) + void set_binding_path(const String &path); + String get_binding_path() const; - void parse_paths(const String p_paths); // Parse a comma separated string of io paths. - - bool has_path(const String p_path) const; // Has this io path - void add_path(const String p_path); // Add an io path - void remove_path(const String p_path); // Remove an io path + // Deprecated. +#ifndef DISABLE_DEPRECATED + void set_paths(const PackedStringArray p_paths); // Set our paths (for loading from resource), needed for loading old action maps. + PackedStringArray get_paths() const; // Get our paths (for saving to resource), needed for converted old action maps. + int get_path_count() const; // Get the number of io paths. + bool has_path(const String p_path) const; // Has this io path. + void add_path(const String p_path); // Add an io path. + void remove_path(const String p_path); // Remove an io path. +#endif // DISABLE_DEPRECATED // TODO add validation that we can display in the interface that checks if no two paths belong to the same top level path @@ -88,11 +91,12 @@ public: void set_bindings(Array p_bindings); // Set the bindings (for loading from a resource) Array get_bindings() const; // Get the bindings (for saving to a resource) - Ref<OpenXRIPBinding> get_binding_for_action(const Ref<OpenXRAction> p_action) const; // Get our binding record for a given action + Ref<OpenXRIPBinding> find_binding(const Ref<OpenXRAction> p_action, const String &p_binding_path) const; // Get our binding record + Vector<Ref<OpenXRIPBinding>> get_bindings_for_action(const Ref<OpenXRAction> p_action) const; // Get our binding record for a given action void add_binding(Ref<OpenXRIPBinding> p_binding); // Add a binding object void remove_binding(Ref<OpenXRIPBinding> p_binding); // Remove a binding object - void add_new_binding(const Ref<OpenXRAction> p_action, const char *p_paths); // Create a new binding for this profile + void add_new_binding(const Ref<OpenXRAction> p_action, const String &p_paths); // Create a new binding for this profile void remove_binding_for_action(const Ref<OpenXRAction> p_action); // Remove all bindings for this action bool has_binding_for_action(const Ref<OpenXRAction> p_action); // Returns true if we have a binding for this action diff --git a/modules/openxr/doc_classes/OpenXRIPBinding.xml b/modules/openxr/doc_classes/OpenXRIPBinding.xml index f274f0868e..ddd6fbe268 100644 --- a/modules/openxr/doc_classes/OpenXRIPBinding.xml +++ b/modules/openxr/doc_classes/OpenXRIPBinding.xml @@ -4,32 +4,32 @@ Defines a binding between an [OpenXRAction] and an XR input or output. </brief_description> <description> - This binding resource binds an [OpenXRAction] to inputs or outputs. As most controllers have left hand and right versions that are handled by the same interaction profile we can specify multiple bindings. For instance an action "Fire" could be bound to both "/user/hand/left/input/trigger" and "/user/hand/right/input/trigger". + This binding resource binds an [OpenXRAction] to an input or output. As most controllers have left hand and right versions that are handled by the same interaction profile we can specify multiple bindings. For instance an action "Fire" could be bound to both "/user/hand/left/input/trigger" and "/user/hand/right/input/trigger". This would require two binding entries. </description> <tutorials> </tutorials> <methods> - <method name="add_path"> + <method name="add_path" deprecated="Binding is for a single path."> <return type="void" /> <param index="0" name="path" type="String" /> <description> Add an input/output path to this binding. </description> </method> - <method name="get_path_count" qualifiers="const"> + <method name="get_path_count" qualifiers="const" deprecated="Binding is for a single path."> <return type="int" /> <description> Get the number of input/output paths in this binding. </description> </method> - <method name="has_path" qualifiers="const"> + <method name="has_path" qualifiers="const" deprecated="Binding is for a single path."> <return type="bool" /> <param index="0" name="path" type="String" /> <description> Returns [code]true[/code] if this input/output path is part of this binding. </description> </method> - <method name="remove_path"> + <method name="remove_path" deprecated="Binding is for a single path."> <return type="void" /> <param index="0" name="path" type="String" /> <description> @@ -39,9 +39,13 @@ </methods> <members> <member name="action" type="OpenXRAction" setter="set_action" getter="get_action"> - [OpenXRAction] that is bound to these paths. + [OpenXRAction] that is bound to [member binding_path]. </member> - <member name="paths" type="PackedStringArray" setter="set_paths" getter="get_paths" default="PackedStringArray()"> + <member name="binding_path" type="String" setter="set_binding_path" getter="get_binding_path" default=""""> + Binding path that defines the input or output bound to [member action]. + [b]Note:[/b] Binding paths are suggestions, an XR runtime may choose to bind the action to a different input or output emulating this input or output. + </member> + <member name="paths" type="PackedStringArray" setter="set_paths" getter="get_paths" deprecated="Use [member binding_path] instead."> Paths that define the inputs or outputs bound on the device. </member> </members> diff --git a/modules/openxr/editor/openxr_interaction_profile_editor.cpp b/modules/openxr/editor/openxr_interaction_profile_editor.cpp index 651171358c..09a9a990ed 100644 --- a/modules/openxr/editor/openxr_interaction_profile_editor.cpp +++ b/modules/openxr/editor/openxr_interaction_profile_editor.cpp @@ -73,17 +73,19 @@ void OpenXRInteractionProfileEditorBase::_add_binding(const String p_action, con Ref<OpenXRAction> action = action_map->get_action(p_action); ERR_FAIL_COND(action.is_null()); - Ref<OpenXRIPBinding> binding = interaction_profile->get_binding_for_action(action); + Ref<OpenXRIPBinding> binding = interaction_profile->find_binding(action, p_path); if (binding.is_null()) { // create a new binding binding.instantiate(); binding->set_action(action); + binding->set_binding_path(p_path); + + // add it to our interaction profile interaction_profile->add_binding(binding); interaction_profile->set_edited(true); - } - binding->add_path(p_path); - binding->set_edited(true); + binding->set_edited(true); + } // Update our toplevel paths action->set_toplevel_paths(action_map->get_top_level_paths(action)); @@ -98,15 +100,10 @@ void OpenXRInteractionProfileEditorBase::_remove_binding(const String p_action, Ref<OpenXRAction> action = action_map->get_action(p_action); ERR_FAIL_COND(action.is_null()); - Ref<OpenXRIPBinding> binding = interaction_profile->get_binding_for_action(action); + Ref<OpenXRIPBinding> binding = interaction_profile->find_binding(action, p_path); if (binding.is_valid()) { - binding->remove_path(p_path); - binding->set_edited(true); - - if (binding->get_path_count() == 0) { - interaction_profile->remove_binding(binding); - interaction_profile->set_edited(true); - } + interaction_profile->remove_binding(binding); + interaction_profile->set_edited(true); // Update our toplevel paths action->set_toplevel_paths(action_map->get_top_level_paths(action)); @@ -116,21 +113,22 @@ void OpenXRInteractionProfileEditorBase::_remove_binding(const String p_action, } void OpenXRInteractionProfileEditorBase::remove_all_bindings_for_action(Ref<OpenXRAction> p_action) { - Ref<OpenXRIPBinding> binding = interaction_profile->get_binding_for_action(p_action); - if (binding.is_valid()) { + Vector<Ref<OpenXRIPBinding>> bindings = interaction_profile->get_bindings_for_action(p_action); + if (bindings.size() > 0) { String action_name = p_action->get_name_with_set(); // for our undo/redo we process all paths undo_redo->create_action(TTR("Remove action from interaction profile")); - PackedStringArray paths = binding->get_paths(); - for (const String &path : paths) { - undo_redo->add_do_method(this, "_remove_binding", action_name, path); - undo_redo->add_undo_method(this, "_add_binding", action_name, path); + for (const Ref<OpenXRIPBinding> &binding : bindings) { + undo_redo->add_do_method(this, "_remove_binding", action_name, binding->get_binding_path()); + undo_redo->add_undo_method(this, "_add_binding", action_name, binding->get_binding_path()); } undo_redo->commit_action(false); // but we take a shortcut here :) - interaction_profile->remove_binding(binding); + for (const Ref<OpenXRIPBinding> &binding : bindings) { + interaction_profile->remove_binding(binding); + } interaction_profile->set_edited(true); // Update our toplevel paths @@ -228,9 +226,8 @@ void OpenXRInteractionProfileEditor::_add_io_path(VBoxContainer *p_container, co if (interaction_profile.is_valid()) { String io_path = String(p_io_path->openxr_path); Array bindings = interaction_profile->get_bindings(); - for (int i = 0; i < bindings.size(); i++) { - Ref<OpenXRIPBinding> binding = bindings[i]; - if (binding->has_path(io_path)) { + for (Ref<OpenXRIPBinding> binding : bindings) { + if (binding->get_binding_path() == io_path) { Ref<OpenXRAction> action = binding->get_action(); HBoxContainer *action_hb = memnew(HBoxContainer); diff --git a/modules/openxr/openxr_interface.cpp b/modules/openxr/openxr_interface.cpp index 73ac529537..500a58acc3 100644 --- a/modules/openxr/openxr_interface.cpp +++ b/modules/openxr/openxr_interface.cpp @@ -300,10 +300,7 @@ void OpenXRInterface::_load_action_map() { continue; } - PackedStringArray paths = xr_binding->get_paths(); - for (int k = 0; k < paths.size(); k++) { - openxr_api->interaction_profile_add_binding(ip, action->action_rid, paths[k]); - } + openxr_api->interaction_profile_add_binding(ip, action->action_rid, xr_binding->get_binding_path()); } // Now submit our suggestions diff --git a/modules/openxr/scene/openxr_composition_layer.cpp b/modules/openxr/scene/openxr_composition_layer.cpp index 697369d516..bc429e4632 100644 --- a/modules/openxr/scene/openxr_composition_layer.cpp +++ b/modules/openxr/scene/openxr_composition_layer.cpp @@ -56,6 +56,10 @@ OpenXRCompositionLayer::OpenXRCompositionLayer(XrCompositionLayerBaseHeader *p_c openxr_api = OpenXRAPI::get_singleton(); composition_layer_extension = OpenXRCompositionLayerExtension::get_singleton(); + if (openxr_api) { + openxr_session_running = openxr_api->is_running(); + } + Ref<OpenXRInterface> openxr_interface = XRServer::get_singleton()->find_interface("OpenXR"); if (openxr_interface.is_valid()) { openxr_interface->connect("session_begun", callable_mp(this, &OpenXRCompositionLayer::_on_openxr_session_begun)); diff --git a/modules/raycast/raycast_occlusion_cull.cpp b/modules/raycast/raycast_occlusion_cull.cpp index 94d8b267d1..54dc040583 100644 --- a/modules/raycast/raycast_occlusion_cull.cpp +++ b/modules/raycast/raycast_occlusion_cull.cpp @@ -181,17 +181,7 @@ void RaycastOcclusionCull::RaycastHZBuffer::sort_rays(const Vector3 &p_camera_di } int k = tile_i * TILE_SIZE + tile_j; int tile_index = i * tile_grid_size.x + j; - float d = camera_rays[tile_index].ray.tfar[k]; - - if (!p_orthogonal) { - const float &dir_x = camera_rays[tile_index].ray.dir_x[k]; - const float &dir_y = camera_rays[tile_index].ray.dir_y[k]; - const float &dir_z = camera_rays[tile_index].ray.dir_z[k]; - float cos_theta = p_camera_dir.x * dir_x + p_camera_dir.y * dir_y + p_camera_dir.z * dir_z; - d *= cos_theta; - } - - mips[0][y * buffer_size.x + x] = d; + mips[0][y * buffer_size.x + x] = camera_rays[tile_index].ray.tfar[k]; } } } diff --git a/modules/svg/image_loader_svg.cpp b/modules/svg/image_loader_svg.cpp index d903137195..b9d493b844 100644 --- a/modules/svg/image_loader_svg.cpp +++ b/modules/svg/image_loader_svg.cpp @@ -104,51 +104,33 @@ Error ImageLoaderSVG::create_image_from_utf8_buffer(Ref<Image> p_image, const ui picture->size(width, height); std::unique_ptr<tvg::SwCanvas> sw_canvas = tvg::SwCanvas::gen(); - // Note: memalloc here, be sure to memfree before any return. - uint32_t *buffer = (uint32_t *)memalloc(sizeof(uint32_t) * width * height); + Vector<uint8_t> buffer; + buffer.resize(sizeof(uint32_t) * width * height); - tvg::Result res = sw_canvas->target(buffer, width, width, height, tvg::SwCanvas::ARGB8888S); + tvg::Result res = sw_canvas->target((uint32_t *)buffer.ptrw(), width, width, height, tvg::SwCanvas::ABGR8888S); if (res != tvg::Result::Success) { - memfree(buffer); ERR_FAIL_V_MSG(FAILED, "ImageLoaderSVG: Couldn't set target on ThorVG canvas."); } res = sw_canvas->push(std::move(picture)); if (res != tvg::Result::Success) { - memfree(buffer); ERR_FAIL_V_MSG(FAILED, "ImageLoaderSVG: Couldn't insert ThorVG picture on canvas."); } res = sw_canvas->draw(); if (res != tvg::Result::Success) { - memfree(buffer); ERR_FAIL_V_MSG(FAILED, "ImageLoaderSVG: Couldn't draw ThorVG pictures on canvas."); } res = sw_canvas->sync(); if (res != tvg::Result::Success) { - memfree(buffer); ERR_FAIL_V_MSG(FAILED, "ImageLoaderSVG: Couldn't sync ThorVG canvas."); } - Vector<uint8_t> image; - image.resize(width * height * sizeof(uint32_t)); - - for (uint32_t y = 0; y < height; y++) { - for (uint32_t x = 0; x < width; x++) { - uint32_t n = buffer[y * width + x]; - const size_t offset = sizeof(uint32_t) * width * y + sizeof(uint32_t) * x; - image.write[offset + 0] = (n >> 16) & 0xff; - image.write[offset + 1] = (n >> 8) & 0xff; - image.write[offset + 2] = n & 0xff; - image.write[offset + 3] = (n >> 24) & 0xff; - } - } + p_image->set_data(width, height, false, Image::FORMAT_RGBA8, buffer); res = sw_canvas->clear(true); - memfree(buffer); - p_image->set_data(width, height, false, Image::FORMAT_RGBA8, image); return OK; } diff --git a/modules/tga/image_loader_tga.cpp b/modules/tga/image_loader_tga.cpp index e2bb89811c..b205dbbbf2 100644 --- a/modules/tga/image_loader_tga.cpp +++ b/modules/tga/image_loader_tga.cpp @@ -32,6 +32,7 @@ #include "core/error/error_macros.h" #include "core/io/file_access_memory.h" +#include "core/io/image.h" #include "core/os/os.h" #include "core/string/print_string.h" diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp index d964fd7627..2725a61a82 100644 --- a/modules/theora/video_stream_theora.cpp +++ b/modules/theora/video_stream_theora.cpp @@ -31,6 +31,7 @@ #include "video_stream_theora.h" #include "core/config/project_settings.h" +#include "core/io/image.h" #include "core/os/os.h" #include "scene/resources/image_texture.h" diff --git a/modules/tinyexr/image_saver_tinyexr.cpp b/modules/tinyexr/image_saver_tinyexr.cpp index 31c1e06dee..b0a977f647 100644 --- a/modules/tinyexr/image_saver_tinyexr.cpp +++ b/modules/tinyexr/image_saver_tinyexr.cpp @@ -31,6 +31,7 @@ #include "image_saver_tinyexr.h" #include "core/math/math_funcs.h" +#include "core/os/os.h" #include <zlib.h> // Should come before including tinyexr. diff --git a/modules/tinyexr/image_saver_tinyexr.h b/modules/tinyexr/image_saver_tinyexr.h index 058eeae58e..014c2f2e19 100644 --- a/modules/tinyexr/image_saver_tinyexr.h +++ b/modules/tinyexr/image_saver_tinyexr.h @@ -31,7 +31,7 @@ #ifndef IMAGE_SAVER_TINYEXR_H #define IMAGE_SAVER_TINYEXR_H -#include "core/os/os.h" +#include "core/io/image.h" Error save_exr(const String &p_path, const Ref<Image> &p_img, bool p_grayscale); Vector<uint8_t> save_exr_buffer(const Ref<Image> &p_img, bool p_grayscale); diff --git a/platform/android/detect.py b/platform/android/detect.py index 0a10754e24..937bdbaa07 100644 --- a/platform/android/detect.py +++ b/platform/android/detect.py @@ -5,6 +5,7 @@ import sys from typing import TYPE_CHECKING from methods import print_error, print_warning +from platform_methods import validate_arch if TYPE_CHECKING: from SCons.Script.SConscript import SConsEnvironment @@ -98,12 +99,7 @@ def install_ndk_if_needed(env: "SConsEnvironment"): def configure(env: "SConsEnvironment"): # Validate arch. supported_arches = ["x86_32", "x86_64", "arm32", "arm64"] - if env["arch"] not in supported_arches: - print_error( - 'Unsupported CPU architecture "%s" for Android. Supported architectures are: %s.' - % (env["arch"], ", ".join(supported_arches)) - ) - sys.exit(255) + validate_arch(env["arch"], get_name(), supported_arches) if get_min_sdk_version(env["ndk_platform"]) < get_min_target_api(): print_warning( @@ -171,9 +167,7 @@ def configure(env: "SConsEnvironment"): env["AS"] = compiler_path + "/clang" env.Append( - CCFLAGS=( - "-fpic -ffunction-sections -funwind-tables -fstack-protector-strong -fvisibility=hidden -fno-strict-aliasing".split() - ) + CCFLAGS=("-fpic -ffunction-sections -funwind-tables -fstack-protector-strong -fvisibility=hidden".split()) ) if get_min_sdk_version(env["ndk_platform"]) >= 24: diff --git a/platform/android/doc_classes/EditorExportPlatformAndroid.xml b/platform/android/doc_classes/EditorExportPlatformAndroid.xml index 2fe5539e56..8c8bca2b7c 100644 --- a/platform/android/doc_classes/EditorExportPlatformAndroid.xml +++ b/platform/android/doc_classes/EditorExportPlatformAndroid.xml @@ -577,7 +577,7 @@ Allows an application to write to the user dictionary. </member> <member name="screen/immersive_mode" type="bool" setter="" getter=""> - If [code]true[/code], hides navigation and status bar. + If [code]true[/code], hides navigation and status bar. See [method DisplayServer.window_set_mode] to toggle it at runtime. </member> <member name="screen/support_large" type="bool" setter="" getter=""> Indicates whether the application supports larger screen form-factors. diff --git a/platform/android/export/export_plugin.h b/platform/android/export/export_plugin.h index 7e1d626486..15e80f824d 100644 --- a/platform/android/export/export_plugin.h +++ b/platform/android/export/export_plugin.h @@ -35,6 +35,7 @@ #include "godot_plugin_config.h" #endif // DISABLE_DEPRECATED +#include "core/io/image.h" #include "core/io/zip_io.h" #include "core/os/os.h" #include "editor/export/editor_export_platform.h" diff --git a/platform/android/java/lib/src/org/godotengine/godot/utils/PermissionsUtil.java b/platform/android/java/lib/src/org/godotengine/godot/utils/PermissionsUtil.java index 4e8e82a70a..c44a6dd472 100644 --- a/platform/android/java/lib/src/org/godotengine/godot/utils/PermissionsUtil.java +++ b/platform/android/java/lib/src/org/godotengine/godot/utils/PermissionsUtil.java @@ -108,7 +108,7 @@ public final class PermissionsUtil { } else { PermissionInfo permissionInfo = getPermissionInfo(activity, permission); int protectionLevel = Build.VERSION.SDK_INT >= Build.VERSION_CODES.P ? permissionInfo.getProtection() : permissionInfo.protectionLevel; - if (protectionLevel == PermissionInfo.PROTECTION_DANGEROUS && ContextCompat.checkSelfPermission(activity, permission) != PackageManager.PERMISSION_GRANTED) { + if ((protectionLevel & PermissionInfo.PROTECTION_DANGEROUS) == PermissionInfo.PROTECTION_DANGEROUS && ContextCompat.checkSelfPermission(activity, permission) != PackageManager.PERMISSION_GRANTED) { Log.d(TAG, "Requesting permission " + permission); requestedPermissions.add(permission); } @@ -174,7 +174,7 @@ public final class PermissionsUtil { try { PermissionInfo permissionInfo = getPermissionInfo(activity, permissionName); int protectionLevel = Build.VERSION.SDK_INT >= Build.VERSION_CODES.P ? permissionInfo.getProtection() : permissionInfo.protectionLevel; - if (protectionLevel == PermissionInfo.PROTECTION_DANGEROUS && ContextCompat.checkSelfPermission(activity, permissionName) != PackageManager.PERMISSION_GRANTED) { + if ((protectionLevel & PermissionInfo.PROTECTION_DANGEROUS) == PermissionInfo.PROTECTION_DANGEROUS && ContextCompat.checkSelfPermission(activity, permissionName) != PackageManager.PERMISSION_GRANTED) { activity.requestPermissions(new String[] { permissionName }, REQUEST_SINGLE_PERMISSION_REQ_CODE); return false; } @@ -259,7 +259,7 @@ public final class PermissionsUtil { } else { PermissionInfo permissionInfo = getPermissionInfo(context, manifestPermission); int protectionLevel = Build.VERSION.SDK_INT >= Build.VERSION_CODES.P ? permissionInfo.getProtection() : permissionInfo.protectionLevel; - if (protectionLevel == PermissionInfo.PROTECTION_DANGEROUS && ContextCompat.checkSelfPermission(context, manifestPermission) == PackageManager.PERMISSION_GRANTED) { + if ((protectionLevel & PermissionInfo.PROTECTION_DANGEROUS) == PermissionInfo.PROTECTION_DANGEROUS && ContextCompat.checkSelfPermission(context, manifestPermission) == PackageManager.PERMISSION_GRANTED) { grantedPermissions.add(manifestPermission); } } diff --git a/platform/ios/detect.py b/platform/ios/detect.py index 20a3a996bc..0f7f938852 100644 --- a/platform/ios/detect.py +++ b/platform/ios/detect.py @@ -3,6 +3,7 @@ import sys from typing import TYPE_CHECKING from methods import detect_darwin_sdk_path, print_error, print_warning +from platform_methods import validate_arch if TYPE_CHECKING: from SCons.Script.SConscript import SConsEnvironment @@ -60,12 +61,7 @@ def get_flags(): def configure(env: "SConsEnvironment"): # Validate arch. supported_arches = ["x86_64", "arm64"] - if env["arch"] not in supported_arches: - print_error( - 'Unsupported CPU architecture "%s" for iOS. Supported architectures are: %s.' - % (env["arch"], ", ".join(supported_arches)) - ) - sys.exit(255) + validate_arch(env["arch"], get_name(), supported_arches) ## LTO @@ -134,7 +130,7 @@ def configure(env: "SConsEnvironment"): elif env["arch"] == "arm64": env.Append( CCFLAGS=( - "-fobjc-arc -arch arm64 -fmessage-length=0 -fno-strict-aliasing" + "-fobjc-arc -arch arm64 -fmessage-length=0" " -fdiagnostics-print-source-range-info -fdiagnostics-show-category=id -fdiagnostics-parseable-fixits" " -fpascal-strings -fblocks -fvisibility=hidden -MMD -MT dependencies" " -isysroot $IOS_SDK_PATH".split() diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py index a67434527c..2fd573da75 100644 --- a/platform/linuxbsd/detect.py +++ b/platform/linuxbsd/detect.py @@ -4,7 +4,7 @@ import sys from typing import TYPE_CHECKING from methods import get_compiler_version, print_error, print_warning, using_gcc -from platform_methods import detect_arch +from platform_methods import detect_arch, validate_arch if TYPE_CHECKING: from SCons.Script.SConscript import SConsEnvironment @@ -74,12 +74,7 @@ def get_flags(): def configure(env: "SConsEnvironment"): # Validate arch. supported_arches = ["x86_32", "x86_64", "arm32", "arm64", "rv64", "ppc32", "ppc64"] - if env["arch"] not in supported_arches: - print_error( - 'Unsupported CPU architecture "%s" for Linux / *BSD. Supported architectures are: %s.' - % (env["arch"], ", ".join(supported_arches)) - ) - sys.exit(255) + validate_arch(env["arch"], get_name(), supported_arches) ## Build type diff --git a/platform/linuxbsd/wayland/display_server_wayland.cpp b/platform/linuxbsd/wayland/display_server_wayland.cpp index 3fbbc263a0..71c721ca1d 100644 --- a/platform/linuxbsd/wayland/display_server_wayland.cpp +++ b/platform/linuxbsd/wayland/display_server_wayland.cpp @@ -1479,12 +1479,12 @@ DisplayServerWayland::DisplayServerWayland(const String &p_rendering_driver, Win driver_found = true; } } +#endif // GLES3_ENABLED if (!driver_found) { r_error = ERR_UNAVAILABLE; ERR_FAIL_MSG("Video driver not found."); } -#endif // GLES3_ENABLED cursor_set_shape(CURSOR_BUSY); diff --git a/platform/macos/detect.py b/platform/macos/detect.py index a8968b592e..cab91fd33c 100644 --- a/platform/macos/detect.py +++ b/platform/macos/detect.py @@ -3,7 +3,7 @@ import sys from typing import TYPE_CHECKING from methods import detect_darwin_sdk_path, get_compiler_version, is_vanilla_clang, print_error, print_warning -from platform_methods import detect_arch, detect_mvk +from platform_methods import detect_arch, detect_mvk, validate_arch if TYPE_CHECKING: from SCons.Script.SConscript import SConsEnvironment @@ -68,12 +68,7 @@ def get_flags(): def configure(env: "SConsEnvironment"): # Validate arch. supported_arches = ["x86_64", "arm64"] - if env["arch"] not in supported_arches: - print_error( - 'Unsupported CPU architecture "%s" for macOS. Supported architectures are: %s.' - % (env["arch"], ", ".join(supported_arches)) - ) - sys.exit(255) + validate_arch(env["arch"], get_name(), supported_arches) ## Build type diff --git a/platform/macos/doc_classes/EditorExportPlatformMacOS.xml b/platform/macos/doc_classes/EditorExportPlatformMacOS.xml index c261c252ba..dcaba9bbd2 100644 --- a/platform/macos/doc_classes/EditorExportPlatformMacOS.xml +++ b/platform/macos/doc_classes/EditorExportPlatformMacOS.xml @@ -75,6 +75,13 @@ <member name="codesign/custom_options" type="PackedStringArray" setter="" getter=""> Array of the additional command line arguments passed to the code signing tool. </member> + <member name="codesign/entitlements/additional" type="String" setter="" getter=""> + Additional data added to the root [code]<dict>[/code] section of the [url=https://developer.apple.com/documentation/bundleresources/entitlements].entitlements[/url] file. The value should be an XML section with pairs of key-value elements, e.g.: + [codeblock lang=text] + <key>key_name</key> + <string>value</string> + [/codeblock] + </member> <member name="codesign/entitlements/address_book" type="bool" setter="" getter=""> Enable to allow access to contacts in the user's address book, if it's enabled you should also provide usage message in the [member privacy/address_book_usage_description] option. See [url=https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_security_personal-information_addressbook]com.apple.security.personal-information.addressbook[/url]. </member> diff --git a/platform/macos/export/export_plugin.cpp b/platform/macos/export/export_plugin.cpp index c99e9cdd0c..7887e5d0ab 100644 --- a/platform/macos/export/export_plugin.cpp +++ b/platform/macos/export/export_plugin.cpp @@ -327,7 +327,7 @@ bool EditorExportPlatformMacOS::get_export_option_visibility(const EditorExportP } bool advanced_options_enabled = p_preset->are_advanced_options_enabled(); - if (p_option.begins_with("privacy")) { + if (p_option.begins_with("privacy") || p_option == "codesign/entitlements/additional") { return advanced_options_enabled; } } @@ -501,6 +501,7 @@ void EditorExportPlatformMacOS::get_export_options(List<ExportOption> *r_options r_options->push_back(ExportOption(PropertyInfo(Variant::INT, "codesign/entitlements/app_sandbox/files_movies", PROPERTY_HINT_ENUM, "No,Read-only,Read-write"), 0)); r_options->push_back(ExportOption(PropertyInfo(Variant::INT, "codesign/entitlements/app_sandbox/files_user_selected", PROPERTY_HINT_ENUM, "No,Read-only,Read-write"), 0)); r_options->push_back(ExportOption(PropertyInfo(Variant::ARRAY, "codesign/entitlements/app_sandbox/helper_executables", PROPERTY_HINT_ARRAY_TYPE, itos(Variant::STRING) + "/" + itos(PROPERTY_HINT_GLOBAL_FILE) + ":"), Array())); + r_options->push_back(ExportOption(PropertyInfo(Variant::STRING, "codesign/entitlements/additional", PROPERTY_HINT_MULTILINE_TEXT), "")); r_options->push_back(ExportOption(PropertyInfo(Variant::PACKED_STRING_ARRAY, "codesign/custom_options"), PackedStringArray())); #ifdef MACOS_ENABLED @@ -2126,6 +2127,11 @@ Error EditorExportPlatformMacOS::export_project(const Ref<EditorExportPreset> &p } } + const String &additional_entitlements = p_preset->get("codesign/entitlements/additional"); + if (!additional_entitlements.is_empty()) { + ent_f->store_line(additional_entitlements); + } + ent_f->store_line("</dict>"); ent_f->store_line("</plist>"); } else { @@ -2288,6 +2294,14 @@ Error EditorExportPlatformMacOS::export_project(const Ref<EditorExportPreset> &p } } + if (FileAccess::exists(ent_path)) { + print_verbose("entitlements:\n" + FileAccess::get_file_as_string(ent_path)); + } + + if (FileAccess::exists(hlp_ent_path)) { + print_verbose("helper entitlements:\n" + FileAccess::get_file_as_string(hlp_ent_path)); + } + // Clean up temporary entitlements files. if (FileAccess::exists(hlp_ent_path)) { DirAccess::remove_file_or_error(hlp_ent_path); diff --git a/platform/macos/export/export_plugin.h b/platform/macos/export/export_plugin.h index d88d347359..4ded2f3301 100644 --- a/platform/macos/export/export_plugin.h +++ b/platform/macos/export/export_plugin.h @@ -34,6 +34,7 @@ #include "core/config/project_settings.h" #include "core/io/dir_access.h" #include "core/io/file_access.h" +#include "core/io/image.h" #include "core/io/marshalls.h" #include "core/io/resource_saver.h" #include "core/os/os.h" diff --git a/platform/web/detect.py b/platform/web/detect.py index 735e2eaf4f..26bbbccffa 100644 --- a/platform/web/detect.py +++ b/platform/web/detect.py @@ -14,6 +14,7 @@ from emscripten_helpers import ( from SCons.Util import WhereIs from methods import get_compiler_version, print_error, print_warning +from platform_methods import validate_arch if TYPE_CHECKING: from SCons.Script.SConscript import SConsEnvironment @@ -86,12 +87,7 @@ def get_flags(): def configure(env: "SConsEnvironment"): # Validate arch. supported_arches = ["wasm32"] - if env["arch"] not in supported_arches: - print_error( - 'Unsupported CPU architecture "%s" for Web. Supported architectures are: %s.' - % (env["arch"], ", ".join(supported_arches)) - ) - sys.exit(255) + validate_arch(env["arch"], get_name(), supported_arches) try: env["initial_memory"] = int(env["initial_memory"]) diff --git a/platform/web/js/libs/library_godot_fetch.js b/platform/web/js/libs/library_godot_fetch.js index 00616bc1a5..eeb3978256 100644 --- a/platform/web/js/libs/library_godot_fetch.js +++ b/platform/web/js/libs/library_godot_fetch.js @@ -59,7 +59,12 @@ const GodotFetch = { }); obj.status = response.status; obj.response = response; - obj.reader = response.body.getReader(); + // `body` can be null per spec (for example, in cases where the request method is HEAD). + // As of the time of writing, Chromium (127.0.6533.72) does not follow the spec but Firefox (131.0.3) does. + // See godotengine/godot#76825 for more information. + // See Chromium revert (of the change to follow the spec): + // https://chromium.googlesource.com/chromium/src/+/135354b7bdb554cd03c913af7c90aceead03c4d4 + obj.reader = response.body?.getReader(); obj.chunked = chunked; }, @@ -121,6 +126,10 @@ const GodotFetch = { } obj.reading = true; obj.reader.read().then(GodotFetch.onread.bind(null, id)).catch(GodotFetch.onerror.bind(null, id)); + } else if (obj.reader == null && obj.response.body == null) { + // Emulate a stream closure to maintain the request lifecycle. + obj.reading = true; + GodotFetch.onread(id, { value: undefined, done: true }); } }, }, @@ -159,7 +168,10 @@ const GodotFetch = { if (!obj.response) { return 0; } - if (obj.reader) { + // If the reader is nullish, but there is no body, and the request is not marked as done, + // the same status should be returned as though the request is currently being read + // so that the proper lifecycle closure can be handled in `read()`. + if (obj.reader || (obj.response.body == null && !obj.done)) { return 1; } if (obj.done) { diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 0ee52a09a7..ddcd29adc9 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING import methods from methods import print_error, print_warning -from platform_methods import detect_arch +from platform_methods import detect_arch, validate_arch if TYPE_CHECKING: from SCons.Script.SConscript import SConsEnvironment @@ -483,9 +483,7 @@ def configure_msvc(env: "SConsEnvironment", vcvars_msvc_config): else: print_warning("Missing environment variable: WindowsSdkDir") - if int(env["target_win_version"], 16) < 0x0601: - print_error("`target_win_version` should be 0x0601 or higher (Windows 7).") - sys.exit(255) + validate_win_version(env) env.AppendUnique( CPPDEFINES=[ @@ -549,15 +547,7 @@ def configure_msvc(env: "SConsEnvironment", vcvars_msvc_config): LIBS += ["vulkan"] if env["d3d12"]: - # Check whether we have d3d12 dependencies installed. - if not os.path.exists(env["mesa_libs"]): - print_error( - "The Direct3D 12 rendering driver requires dependencies to be installed.\n" - "You can install them by running `python misc\\scripts\\install_d3d12_sdk_windows.py`.\n" - "See the documentation for more information:\n\t" - "https://docs.godotengine.org/en/latest/contributing/development/compiling/compiling_for_windows.html" - ) - sys.exit(255) + check_d3d12_installed(env) env.AppendUnique(CPPDEFINES=["D3D12_ENABLED", "RD_ENABLED"]) LIBS += ["dxgi", "dxguid"] @@ -820,9 +810,7 @@ def configure_mingw(env: "SConsEnvironment"): ## Compile flags - if int(env["target_win_version"], 16) < 0x0601: - print_error("`target_win_version` should be 0x0601 or higher (Windows 7).") - sys.exit(255) + validate_win_version(env) if not env["use_llvm"]: env.Append(CCFLAGS=["-mwindows"]) @@ -900,15 +888,7 @@ def configure_mingw(env: "SConsEnvironment"): env.Append(LIBS=["vulkan"]) if env["d3d12"]: - # Check whether we have d3d12 dependencies installed. - if not os.path.exists(env["mesa_libs"]): - print_error( - "The Direct3D 12 rendering driver requires dependencies to be installed.\n" - "You can install them by running `python misc\\scripts\\install_d3d12_sdk_windows.py`.\n" - "See the documentation for more information:\n\t" - "https://docs.godotengine.org/en/latest/contributing/development/compiling/compiling_for_windows.html" - ) - sys.exit(255) + check_d3d12_installed(env) env.AppendUnique(CPPDEFINES=["D3D12_ENABLED", "RD_ENABLED"]) env.Append(LIBS=["dxgi", "dxguid"]) @@ -951,12 +931,7 @@ def configure_mingw(env: "SConsEnvironment"): def configure(env: "SConsEnvironment"): # Validate arch. supported_arches = ["x86_32", "x86_64", "arm32", "arm64"] - if env["arch"] not in supported_arches: - print_error( - 'Unsupported CPU architecture "%s" for Windows. Supported architectures are: %s.' - % (env["arch"], ", ".join(supported_arches)) - ) - sys.exit(255) + validate_arch(env["arch"], get_name(), supported_arches) # At this point the env has been set up with basic tools/compilers. env.Prepend(CPPPATH=["#platform/windows"]) @@ -984,3 +959,20 @@ def configure(env: "SConsEnvironment"): else: # MinGW configure_mingw(env) + + +def check_d3d12_installed(env): + if not os.path.exists(env["mesa_libs"]): + print_error( + "The Direct3D 12 rendering driver requires dependencies to be installed.\n" + "You can install them by running `python misc\\scripts\\install_d3d12_sdk_windows.py`.\n" + "See the documentation for more information:\n\t" + "https://docs.godotengine.org/en/latest/contributing/development/compiling/compiling_for_windows.html" + ) + sys.exit(255) + + +def validate_win_version(env): + if int(env["target_win_version"], 16) < 0x0601: + print_error("`target_win_version` should be 0x0601 or higher (Windows 7).") + sys.exit(255) diff --git a/platform/windows/display_server_windows.cpp b/platform/windows/display_server_windows.cpp index ffa3840181..c427596829 100644 --- a/platform/windows/display_server_windows.cpp +++ b/platform/windows/display_server_windows.cpp @@ -67,6 +67,18 @@ #define DWMWA_USE_IMMERSIVE_DARK_MODE_BEFORE_20H1 19 #endif +#ifndef DWMWA_WINDOW_CORNER_PREFERENCE +#define DWMWA_WINDOW_CORNER_PREFERENCE 33 +#endif + +#ifndef DWMWCP_DEFAULT +#define DWMWCP_DEFAULT 0 +#endif + +#ifndef DWMWCP_DONOTROUND +#define DWMWCP_DONOTROUND 1 +#endif + #define WM_INDICATOR_CALLBACK_MESSAGE (WM_USER + 1) #if defined(__GNUC__) @@ -1483,6 +1495,9 @@ DisplayServer::WindowID DisplayServerWindows::create_sub_window(WindowMode p_mod if (p_flags & WINDOW_FLAG_ALWAYS_ON_TOP_BIT && p_mode != WINDOW_MODE_FULLSCREEN && p_mode != WINDOW_MODE_EXCLUSIVE_FULLSCREEN) { wd.always_on_top = true; } + if (p_flags & WINDOW_FLAG_SHARP_CORNERS_BIT) { + wd.sharp_corners = true; + } if (p_flags & WINDOW_FLAG_NO_FOCUS_BIT) { wd.no_focus = true; } @@ -2297,6 +2312,12 @@ void DisplayServerWindows::window_set_flag(WindowFlags p_flag, bool p_enabled, W wd.always_on_top = p_enabled; _update_window_style(p_window); } break; + case WINDOW_FLAG_SHARP_CORNERS: { + wd.sharp_corners = p_enabled; + DWORD value = wd.sharp_corners ? DWMWCP_DONOTROUND : DWMWCP_DEFAULT; + ::DwmSetWindowAttribute(wd.hWnd, DWMWA_WINDOW_CORNER_PREFERENCE, &value, sizeof(value)); + _update_window_style(p_window); + } break; case WINDOW_FLAG_TRANSPARENT: { if (p_enabled) { // Enable per-pixel alpha. @@ -3994,6 +4015,10 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA native_menu->_menu_activate(HMENU(lParam), (int)wParam); } break; case WM_CREATE: { + { + DWORD value = windows[window_id].sharp_corners ? DWMWCP_DONOTROUND : DWMWCP_DEFAULT; + ::DwmSetWindowAttribute(windows[window_id].hWnd, DWMWA_WINDOW_CORNER_PREFERENCE, &value, sizeof(value)); + } if (is_dark_mode_supported() && dark_title_available) { BOOL value = is_dark_mode(); @@ -5645,6 +5670,12 @@ DisplayServer::WindowID DisplayServerWindows::_create_window(WindowMode p_mode, wd_transient_parent->transient_children.insert(id); } + wd.sharp_corners = p_flags & WINDOW_FLAG_SHARP_CORNERS_BIT; + { + DWORD value = wd.sharp_corners ? DWMWCP_DONOTROUND : DWMWCP_DEFAULT; + ::DwmSetWindowAttribute(wd.hWnd, DWMWA_WINDOW_CORNER_PREFERENCE, &value, sizeof(value)); + } + if (is_dark_mode_supported() && dark_title_available) { BOOL value = is_dark_mode(); ::DwmSetWindowAttribute(wd.hWnd, use_legacy_dark_mode_before_20H1 ? DWMWA_USE_IMMERSIVE_DARK_MODE_BEFORE_20H1 : DWMWA_USE_IMMERSIVE_DARK_MODE, &value, sizeof(value)); @@ -6369,7 +6400,10 @@ DisplayServerWindows::DisplayServerWindows(const String &p_rendering_driver, Win } WindowID main_window = _create_window(p_mode, p_vsync_mode, p_flags, Rect2i(window_position, p_resolution), false, INVALID_WINDOW_ID); - ERR_FAIL_COND_MSG(main_window == INVALID_WINDOW_ID, "Failed to create main window."); + if (main_window == INVALID_WINDOW_ID) { + r_error = ERR_UNAVAILABLE; + ERR_FAIL_MSG("Failed to create main window."); + } joypad = new JoypadWindows(&windows[MAIN_WINDOW_ID].hWnd); diff --git a/platform/windows/display_server_windows.h b/platform/windows/display_server_windows.h index 7d6a3e96a6..fc72e05b1d 100644 --- a/platform/windows/display_server_windows.h +++ b/platform/windows/display_server_windows.h @@ -38,6 +38,7 @@ #include "core/config/project_settings.h" #include "core/input/input.h" +#include "core/io/image.h" #include "core/os/os.h" #include "drivers/unix/ip_unix.h" #include "drivers/wasapi/audio_driver_wasapi.h" @@ -473,6 +474,7 @@ class DisplayServerWindows : public DisplayServer { bool exclusive = false; bool context_created = false; bool mpass = false; + bool sharp_corners = false; // Used to transfer data between events using timer. WPARAM saved_wparam; diff --git a/platform/windows/native_menu_windows.h b/platform/windows/native_menu_windows.h index 235a4b332a..09e4640b40 100644 --- a/platform/windows/native_menu_windows.h +++ b/platform/windows/native_menu_windows.h @@ -31,6 +31,7 @@ #ifndef NATIVE_MENU_WINDOWS_H #define NATIVE_MENU_WINDOWS_H +#include "core/io/image.h" #include "core/templates/hash_map.h" #include "core/templates/rid_owner.h" #include "servers/display/native_menu.h" diff --git a/platform_methods.py b/platform_methods.py index 2b157da22b..2c4eb0d1dd 100644 --- a/platform_methods.py +++ b/platform_methods.py @@ -1,6 +1,7 @@ import os import platform import subprocess +import sys import methods @@ -40,6 +41,15 @@ def detect_arch(): return "x86_64" +def validate_arch(arch, platform_name, supported_arches): + if arch not in supported_arches: + methods.print_error( + 'Unsupported CPU architecture "%s" for %s. Supported architectures are: %s.' + % (arch, platform_name, ", ".join(supported_arches)) + ) + sys.exit(255) + + def get_build_version(short): import version diff --git a/scene/2d/mesh_instance_2d.cpp b/scene/2d/mesh_instance_2d.cpp index ae45d431fe..1031a67343 100644 --- a/scene/2d/mesh_instance_2d.cpp +++ b/scene/2d/mesh_instance_2d.cpp @@ -54,7 +54,20 @@ void MeshInstance2D::_bind_methods() { } void MeshInstance2D::set_mesh(const Ref<Mesh> &p_mesh) { + if (mesh == p_mesh) { + return; + } + + if (mesh.is_valid()) { + mesh->disconnect_changed(callable_mp((CanvasItem *)this, &CanvasItem::queue_redraw)); + } + mesh = p_mesh; + + if (mesh.is_valid()) { + mesh->connect_changed(callable_mp((CanvasItem *)this, &CanvasItem::queue_redraw)); + } + queue_redraw(); } diff --git a/scene/2d/path_2d.cpp b/scene/2d/path_2d.cpp index 5813ab02e3..7d645ba448 100644 --- a/scene/2d/path_2d.cpp +++ b/scene/2d/path_2d.cpp @@ -167,17 +167,16 @@ void Path2D::_curve_changed() { return; } - if (!Engine::get_singleton()->is_editor_hint() && !get_tree()->is_debugging_paths_hint()) { - return; - } - - queue_redraw(); for (int i = 0; i < get_child_count(); i++) { PathFollow2D *follow = Object::cast_to<PathFollow2D>(get_child(i)); if (follow) { follow->path_changed(); } } + + if (Engine::get_singleton()->is_editor_hint() || get_tree()->is_debugging_paths_hint()) { + queue_redraw(); + } } void Path2D::set_curve(const Ref<Curve2D> &p_curve) { diff --git a/scene/3d/cpu_particles_3d.cpp b/scene/3d/cpu_particles_3d.cpp index acbc443a93..5b84bf903f 100644 --- a/scene/3d/cpu_particles_3d.cpp +++ b/scene/3d/cpu_particles_3d.cpp @@ -1479,6 +1479,7 @@ void CPUParticles3D::_bind_methods() { ClassDB::bind_method(D_METHOD("get_mesh"), &CPUParticles3D::get_mesh); ClassDB::bind_method(D_METHOD("restart"), &CPUParticles3D::restart); + ClassDB::bind_method(D_METHOD("capture_aabb"), &CPUParticles3D::capture_aabb); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "emitting"), "set_emitting", "is_emitting"); ADD_PROPERTY(PropertyInfo(Variant::INT, "amount", PROPERTY_HINT_RANGE, "1,1000000,1,exp"), "set_amount", "get_amount"); diff --git a/scene/3d/light_3d.cpp b/scene/3d/light_3d.cpp index 7b70986adc..2d18e62b10 100644 --- a/scene/3d/light_3d.cpp +++ b/scene/3d/light_3d.cpp @@ -146,6 +146,15 @@ bool Light3D::get_shadow_reverse_cull_face() const { return reverse_cull; } +void Light3D::set_shadow_caster_mask(uint32_t p_caster_mask) { + shadow_caster_mask = p_caster_mask; + RS::get_singleton()->light_set_shadow_caster_mask(light, shadow_caster_mask); +} + +uint32_t Light3D::get_shadow_caster_mask() const { + return shadow_caster_mask; +} + AABB Light3D::get_aabb() const { if (type == RenderingServer::LIGHT_DIRECTIONAL) { return AABB(Vector3(-1, -1, -1), Vector3(2, 2, 2)); @@ -300,7 +309,7 @@ bool Light3D::is_editor_only() const { } void Light3D::_validate_property(PropertyInfo &p_property) const { - if (!shadow && (p_property.name == "shadow_bias" || p_property.name == "shadow_normal_bias" || p_property.name == "shadow_reverse_cull_face" || p_property.name == "shadow_transmittance_bias" || p_property.name == "shadow_opacity" || p_property.name == "shadow_blur" || p_property.name == "distance_fade_shadow")) { + if (!shadow && (p_property.name == "shadow_bias" || p_property.name == "shadow_normal_bias" || p_property.name == "shadow_reverse_cull_face" || p_property.name == "shadow_transmittance_bias" || p_property.name == "shadow_opacity" || p_property.name == "shadow_blur" || p_property.name == "distance_fade_shadow" || p_property.name == "shadow_caster_mask")) { p_property.usage = PROPERTY_USAGE_NO_EDITOR; } @@ -354,6 +363,9 @@ void Light3D::_bind_methods() { ClassDB::bind_method(D_METHOD("set_shadow_reverse_cull_face", "enable"), &Light3D::set_shadow_reverse_cull_face); ClassDB::bind_method(D_METHOD("get_shadow_reverse_cull_face"), &Light3D::get_shadow_reverse_cull_face); + ClassDB::bind_method(D_METHOD("set_shadow_caster_mask", "caster_mask"), &Light3D::set_shadow_caster_mask); + ClassDB::bind_method(D_METHOD("get_shadow_caster_mask"), &Light3D::get_shadow_caster_mask); + ClassDB::bind_method(D_METHOD("set_bake_mode", "bake_mode"), &Light3D::set_bake_mode); ClassDB::bind_method(D_METHOD("get_bake_mode"), &Light3D::get_bake_mode); @@ -388,6 +400,7 @@ void Light3D::_bind_methods() { ADD_PROPERTYI(PropertyInfo(Variant::FLOAT, "shadow_transmittance_bias", PROPERTY_HINT_RANGE, "-16,16,0.001"), "set_param", "get_param", PARAM_TRANSMITTANCE_BIAS); ADD_PROPERTYI(PropertyInfo(Variant::FLOAT, "shadow_opacity", PROPERTY_HINT_RANGE, "0,1,0.01"), "set_param", "get_param", PARAM_SHADOW_OPACITY); ADD_PROPERTYI(PropertyInfo(Variant::FLOAT, "shadow_blur", PROPERTY_HINT_RANGE, "0,10,0.001"), "set_param", "get_param", PARAM_SHADOW_BLUR); + ADD_PROPERTY(PropertyInfo(Variant::INT, "shadow_caster_mask", PROPERTY_HINT_LAYERS_3D_RENDER), "set_shadow_caster_mask", "get_shadow_caster_mask"); ADD_GROUP("Distance Fade", "distance_fade_"); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "distance_fade_enabled"), "set_enable_distance_fade", "is_distance_fade_enabled"); diff --git a/scene/3d/light_3d.h b/scene/3d/light_3d.h index d6eca8d8b6..5f549469c6 100644 --- a/scene/3d/light_3d.h +++ b/scene/3d/light_3d.h @@ -75,6 +75,7 @@ private: bool negative = false; bool reverse_cull = false; uint32_t cull_mask = 0; + uint32_t shadow_caster_mask = 0xFFFFFFFF; bool distance_fade_enabled = false; real_t distance_fade_begin = 40.0; real_t distance_fade_shadow = 50.0; @@ -136,6 +137,9 @@ public: void set_shadow_reverse_cull_face(bool p_enable); bool get_shadow_reverse_cull_face() const; + void set_shadow_caster_mask(uint32_t p_caster_mask); + uint32_t get_shadow_caster_mask() const; + void set_bake_mode(BakeMode p_mode); BakeMode get_bake_mode() const; diff --git a/scene/3d/lightmap_gi.cpp b/scene/3d/lightmap_gi.cpp index a1f32fccbe..012ef7860d 100644 --- a/scene/3d/lightmap_gi.cpp +++ b/scene/3d/lightmap_gi.cpp @@ -1602,19 +1602,16 @@ Ref<CameraAttributes> LightmapGI::get_camera_attributes() const { PackedStringArray LightmapGI::get_configuration_warnings() const { PackedStringArray warnings = VisualInstance3D::get_configuration_warnings(); -#ifndef MODULE_LIGHTMAPPER_RD_ENABLED -#if defined(ANDROID_ENABLED) || defined(IOS_ENABLED) - warnings.push_back(vformat(RTR("Lightmaps cannot be baked on %s. Rendering existing baked lightmaps will still work."), OS::get_singleton()->get_name())); -#else - warnings.push_back(RTR("Lightmaps cannot be baked, as the `lightmapper_rd` module was disabled at compile-time. Rendering existing baked lightmaps will still work.")); -#endif - return warnings; -#endif - +#ifdef MODULE_LIGHTMAPPER_RD_ENABLED if (!DisplayServer::get_singleton()->can_create_rendering_device()) { warnings.push_back(vformat(RTR("Lightmaps can only be baked from a GPU that supports the RenderingDevice backends.\nYour GPU (%s) does not support RenderingDevice, as it does not support Vulkan, Direct3D 12, or Metal.\nLightmap baking will not be available on this device, although rendering existing baked lightmaps will work."), RenderingServer::get_singleton()->get_video_adapter_name())); return warnings; } +#elif defined(ANDROID_ENABLED) || defined(IOS_ENABLED) + warnings.push_back(vformat(RTR("Lightmaps cannot be baked on %s. Rendering existing baked lightmaps will still work."), OS::get_singleton()->get_name())); +#else + warnings.push_back(RTR("Lightmaps cannot be baked, as the `lightmapper_rd` module was disabled at compile-time. Rendering existing baked lightmaps will still work.")); +#endif return warnings; } diff --git a/scene/3d/mesh_instance_3d.cpp b/scene/3d/mesh_instance_3d.cpp index e8166802f8..f551cb401c 100644 --- a/scene/3d/mesh_instance_3d.cpp +++ b/scene/3d/mesh_instance_3d.cpp @@ -332,8 +332,8 @@ void MeshInstance3D::create_multiple_convex_collisions(const Ref<MeshConvexDecom void MeshInstance3D::_notification(int p_what) { switch (p_what) { - case NOTIFICATION_READY: { - callable_mp(this, &MeshInstance3D::_resolve_skeleton_path).call_deferred(); + case NOTIFICATION_ENTER_TREE: { + _resolve_skeleton_path(); } break; case NOTIFICATION_TRANSLATION_CHANGED: { if (mesh.is_valid()) { diff --git a/scene/3d/skeleton_3d.cpp b/scene/3d/skeleton_3d.cpp index f778fccf09..c1c992588b 100644 --- a/scene/3d/skeleton_3d.cpp +++ b/scene/3d/skeleton_3d.cpp @@ -276,6 +276,8 @@ void Skeleton3D::_update_process_order() { concatenated_bone_names = StringName(); + _update_bones_nested_set(); + process_order_dirty = false; emit_signal("bone_list_changed"); @@ -324,6 +326,8 @@ void Skeleton3D::_notification(int p_what) { #ifndef DISABLE_DEPRECATED setup_simulator(); #endif // _DISABLE_DEPRECATED + update_flags = UPDATE_FLAG_POSE; + _notification(NOTIFICATION_UPDATE_SKELETON); } break; case NOTIFICATION_UPDATE_SKELETON: { // Update bone transforms to apply unprocessed poses. @@ -334,6 +338,8 @@ void Skeleton3D::_notification(int p_what) { Bone *bonesptr = bones.ptr(); int len = bones.size(); + thread_local LocalVector<bool> bone_global_pose_dirty_backup; + // Process modifiers. _find_modifiers(); if (!modifiers.is_empty()) { @@ -341,6 +347,9 @@ void Skeleton3D::_notification(int p_what) { for (uint32_t i = 0; i < bones.size(); i++) { bones_backup[i].save(bones[i]); } + // Store dirty flags for global bone poses. + bone_global_pose_dirty_backup = bone_global_pose_dirty; + _process_modifiers(); } @@ -415,6 +424,8 @@ void Skeleton3D::_notification(int p_what) { for (uint32_t i = 0; i < bones.size(); i++) { bones_backup[i].restore(bones[i]); } + // Restore dirty flags for global bone poses. + bone_global_pose_dirty = bone_global_pose_dirty_backup; } updating = false; @@ -457,10 +468,111 @@ void Skeleton3D::_make_modifiers_dirty() { _update_deferred(UPDATE_FLAG_MODIFIER); } +void Skeleton3D::_update_bones_nested_set() { + nested_set_offset_to_bone_index.resize(bones.size()); + bone_global_pose_dirty.resize(bones.size()); + _make_bone_global_poses_dirty(); + + int offset = 0; + for (int bone : parentless_bones) { + offset += _update_bone_nested_set(bone, offset); + } +} + +int Skeleton3D::_update_bone_nested_set(int p_bone, int p_offset) { + Bone &bone = bones[p_bone]; + int offset = p_offset + 1; + int span = 1; + + for (int child_bone : bone.child_bones) { + int subspan = _update_bone_nested_set(child_bone, offset); + offset += subspan; + span += subspan; + } + + nested_set_offset_to_bone_index[p_offset] = p_bone; + bone.nested_set_offset = p_offset; + bone.nested_set_span = span; + + return span; +} + +void Skeleton3D::_make_bone_global_poses_dirty() { + for (uint32_t i = 0; i < bone_global_pose_dirty.size(); i++) { + bone_global_pose_dirty[i] = true; + } +} + +void Skeleton3D::_make_bone_global_pose_subtree_dirty(int p_bone) { + if (process_order_dirty) { + return; + } + + const Bone &bone = bones[p_bone]; + int span_offset = bone.nested_set_offset; + // No need to make subtree dirty when bone is already dirty. + if (bone_global_pose_dirty[span_offset]) { + return; + } + + // Make global poses of subtree dirty. + int span_end = span_offset + bone.nested_set_span; + for (int i = span_offset; i < span_end; i++) { + bone_global_pose_dirty[i] = true; + } +} + +void Skeleton3D::_update_bone_global_pose(int p_bone) { + const int bone_size = bones.size(); + ERR_FAIL_INDEX(p_bone, bone_size); + + _update_process_order(); + + // Global pose is already calculated. + int nested_set_offset = bones[p_bone].nested_set_offset; + if (!bone_global_pose_dirty[nested_set_offset]) { + return; + } + + thread_local LocalVector<int> bone_list; + bone_list.clear(); + Transform3D global_pose; + + // Create list of parent bones for which the global pose needs to be recalculated. + for (int bone = p_bone; bone >= 0; bone = bones[bone].parent) { + int offset = bones[bone].nested_set_offset; + // Stop searching when global pose is not dirty. + if (!bone_global_pose_dirty[offset]) { + global_pose = bones[bone].global_pose; + break; + } + + bone_list.push_back(bone); + } + + // Calculate global poses for all parent bones and the current bone. + for (int i = bone_list.size() - 1; i >= 0; i--) { + int bone_idx = bone_list[i]; + Bone &bone = bones[bone_idx]; + bool bone_enabled = bone.enabled && !show_rest_only; + Transform3D bone_pose = bone_enabled ? get_bone_pose(bone_idx) : get_bone_rest(bone_idx); + + global_pose *= bone_pose; +#ifndef DISABLE_DEPRECATED + if (bone.global_pose_override_amount >= CMP_EPSILON) { + global_pose = global_pose.interpolate_with(bone.global_pose_override, bone.global_pose_override_amount); + } +#endif // _DISABLE_DEPRECATED + + bone.global_pose = global_pose; + bone_global_pose_dirty[bone.nested_set_offset] = false; + } +} + Transform3D Skeleton3D::get_bone_global_pose(int p_bone) const { const int bone_size = bones.size(); ERR_FAIL_INDEX_V(p_bone, bone_size, Transform3D()); - const_cast<Skeleton3D *>(this)->force_update_all_dirty_bones(); + const_cast<Skeleton3D *>(this)->_update_bone_global_pose(p_bone); return bones[p_bone].global_pose; } @@ -672,6 +784,7 @@ void Skeleton3D::set_bone_rest(int p_bone, const Transform3D &p_rest) { bones[p_bone].rest = p_rest; rest_dirty = true; _make_dirty(); + _make_bone_global_pose_subtree_dirty(p_bone); } Transform3D Skeleton3D::get_bone_rest(int p_bone) const { const int bone_size = bones.size(); @@ -695,6 +808,7 @@ void Skeleton3D::set_bone_enabled(int p_bone, bool p_enabled) { bones[p_bone].enabled = p_enabled; emit_signal(SceneStringName(bone_enabled_changed), p_bone); _make_dirty(); + _make_bone_global_pose_subtree_dirty(p_bone); } bool Skeleton3D::is_bone_enabled(int p_bone) const { @@ -707,6 +821,7 @@ void Skeleton3D::set_show_rest_only(bool p_enabled) { show_rest_only = p_enabled; emit_signal(SceneStringName(show_rest_only_changed)); _make_dirty(); + _make_bone_global_poses_dirty(); } bool Skeleton3D::is_show_rest_only() const { @@ -733,6 +848,7 @@ void Skeleton3D::set_bone_pose(int p_bone, const Transform3D &p_pose) { bones[p_bone].pose_cache_dirty = true; if (is_inside_tree()) { _make_dirty(); + _make_bone_global_pose_subtree_dirty(p_bone); } } @@ -744,6 +860,7 @@ void Skeleton3D::set_bone_pose_position(int p_bone, const Vector3 &p_position) { bones[p_bone].pose_cache_dirty = true; if (is_inside_tree()) { _make_dirty(); + _make_bone_global_pose_subtree_dirty(p_bone); } } void Skeleton3D::set_bone_pose_rotation(int p_bone, const Quaternion &p_rotation) { @@ -754,6 +871,7 @@ void Skeleton3D::set_bone_pose_rotation(int p_bone, const Quaternion &p_rotation bones[p_bone].pose_cache_dirty = true; if (is_inside_tree()) { _make_dirty(); + _make_bone_global_pose_subtree_dirty(p_bone); } } void Skeleton3D::set_bone_pose_scale(int p_bone, const Vector3 &p_scale) { @@ -764,6 +882,7 @@ void Skeleton3D::set_bone_pose_scale(int p_bone, const Vector3 &p_scale) { bones[p_bone].pose_cache_dirty = true; if (is_inside_tree()) { _make_dirty(); + _make_bone_global_pose_subtree_dirty(p_bone); } } @@ -938,14 +1057,14 @@ void Skeleton3D::force_update_bone_children_transforms(int p_bone_idx) { ERR_FAIL_INDEX(p_bone_idx, bone_size); Bone *bonesptr = bones.ptr(); - thread_local LocalVector<int> bones_to_process; - bones_to_process.clear(); - bones_to_process.push_back(p_bone_idx); - uint32_t index = 0; - while (index < bones_to_process.size()) { - int current_bone_idx = bones_to_process[index]; + // Loop through nested set. + for (int offset = 0; offset < bone_size; offset++) { + if (!bone_global_pose_dirty[offset]) { + continue; + } + int current_bone_idx = nested_set_offset_to_bone_index[offset]; Bone &b = bonesptr[current_bone_idx]; bool bone_enabled = b.enabled && !show_rest_only; @@ -992,13 +1111,7 @@ void Skeleton3D::force_update_bone_children_transforms(int p_bone_idx) { } #endif // _DISABLE_DEPRECATED - // Add the bone's children to the list of bones to be processed. - int child_bone_size = b.child_bones.size(); - for (int i = 0; i < child_bone_size; i++) { - bones_to_process.push_back(b.child_bones[i]); - } - - index++; + bone_global_pose_dirty[offset] = false; } } @@ -1176,6 +1289,7 @@ void Skeleton3D::clear_bones_global_pose_override() { bones[i].global_pose_override_reset = true; } _make_dirty(); + _make_bone_global_poses_dirty(); } void Skeleton3D::set_bone_global_pose_override(int p_bone, const Transform3D &p_pose, real_t p_amount, bool p_persistent) { @@ -1185,6 +1299,7 @@ void Skeleton3D::set_bone_global_pose_override(int p_bone, const Transform3D &p_ bones[p_bone].global_pose_override = p_pose; bones[p_bone].global_pose_override_reset = !p_persistent; _make_dirty(); + _make_bone_global_pose_subtree_dirty(p_bone); } Transform3D Skeleton3D::get_bone_global_pose_override(int p_bone) const { diff --git a/scene/3d/skeleton_3d.h b/scene/3d/skeleton_3d.h index 7213dc580c..ecfe095f1d 100644 --- a/scene/3d/skeleton_3d.h +++ b/scene/3d/skeleton_3d.h @@ -31,6 +31,7 @@ #ifndef SKELETON_3D_H #define SKELETON_3D_H +#include "core/templates/a_hash_map.h" #include "scene/3d/node_3d.h" #include "scene/resources/3d/skin.h" @@ -107,6 +108,8 @@ private: Quaternion pose_rotation; Vector3 pose_scale = Vector3(1, 1, 1); Transform3D global_pose; + int nested_set_offset = 0; // Offset in nested set of bone hierarchy. + int nested_set_span = 0; // Subtree span in nested set of bone hierarchy. void update_pose_cache() { if (pose_cache_dirty) { @@ -157,7 +160,7 @@ private: bool process_order_dirty = false; Vector<int> parentless_bones; - HashMap<String, int> name_to_bone_index; + AHashMap<String, int> name_to_bone_index; mutable StringName concatenated_bone_names = StringName(); void _update_bone_names() const; @@ -183,6 +186,15 @@ private: void _make_modifiers_dirty(); LocalVector<BonePoseBackup> bones_backup; + // Global bone pose calculation. + LocalVector<int> nested_set_offset_to_bone_index; // Map from Bone::nested_set_offset to bone index. + LocalVector<bool> bone_global_pose_dirty; // Indexable with Bone::nested_set_offset. + void _update_bones_nested_set(); + int _update_bone_nested_set(int p_bone, int p_offset); + void _make_bone_global_poses_dirty(); + void _make_bone_global_pose_subtree_dirty(int p_bone); + void _update_bone_global_pose(int p_bone); + #ifndef DISABLE_DEPRECATED void _add_bone_bind_compat_88791(const String &p_name); diff --git a/scene/animation/animation_mixer.cpp b/scene/animation/animation_mixer.cpp index eb8bc8c382..0fa6810d23 100644 --- a/scene/animation/animation_mixer.cpp +++ b/scene/animation/animation_mixer.cpp @@ -563,6 +563,7 @@ void AnimationMixer::_clear_caches() { memdelete(K.value); } track_cache.clear(); + animation_track_num_to_track_cashe.clear(); cache_valid = false; capture_cache.clear(); @@ -922,6 +923,27 @@ bool AnimationMixer::_update_caches() { idx++; } + for (KeyValue<Animation::TypeHash, TrackCache *> &K : track_cache) { + K.value->blend_idx = track_map[K.value->path]; + } + + animation_track_num_to_track_cashe.clear(); + LocalVector<TrackCache *> track_num_to_track_cashe; + for (const StringName &E : sname_list) { + Ref<Animation> anim = get_animation(E); + const Vector<Animation::Track *> tracks = anim->get_tracks(); + track_num_to_track_cashe.resize(tracks.size()); + for (int i = 0; i < tracks.size(); i++) { + TrackCache **track_ptr = track_cache.getptr(tracks[i]->thash); + if (track_ptr == nullptr) { + track_num_to_track_cashe[i] = nullptr; + } else { + track_num_to_track_cashe[i] = *track_ptr; + } + } + animation_track_num_to_track_cashe.insert(anim, track_num_to_track_cashe); + } + track_count = idx; cache_valid = true; @@ -946,7 +968,7 @@ void AnimationMixer::_process_animation(double p_delta, bool p_update_only) { clear_animation_instances(); } -Variant AnimationMixer::_post_process_key_value(const Ref<Animation> &p_anim, int p_track, Variant p_value, ObjectID p_object_id, int p_object_sub_idx) { +Variant AnimationMixer::_post_process_key_value(const Ref<Animation> &p_anim, int p_track, Variant &p_value, ObjectID p_object_id, int p_object_sub_idx) { #ifndef _3D_DISABLED switch (p_anim->track_get_type(p_track)) { case Animation::TYPE_POSITION_3D: { @@ -1033,7 +1055,7 @@ void AnimationMixer::_blend_init() { } } -bool AnimationMixer::_blend_pre_process(double p_delta, int p_track_count, const HashMap<NodePath, int> &p_track_map) { +bool AnimationMixer::_blend_pre_process(double p_delta, int p_track_count, const AHashMap<NodePath, int> &p_track_map) { return true; } @@ -1084,26 +1106,30 @@ void AnimationMixer::_blend_calc_total_weight() { real_t weight = ai.playback_info.weight; const real_t *track_weights_ptr = ai.playback_info.track_weights.ptr(); int track_weights_count = ai.playback_info.track_weights.size(); - static LocalVector<Animation::TypeHash> processed_hashes; + ERR_CONTINUE_EDMSG(!animation_track_num_to_track_cashe.has(a), "No animation in cache."); + LocalVector<TrackCache *> &track_num_to_track_cashe = animation_track_num_to_track_cashe[a]; + thread_local HashSet<Animation::TypeHash, HashHasher> processed_hashes; processed_hashes.clear(); const Vector<Animation::Track *> tracks = a->get_tracks(); - for (const Animation::Track *animation_track : tracks) { + Animation::Track *const *tracks_ptr = tracks.ptr(); + int count = tracks.size(); + for (int i = 0; i < count; i++) { + Animation::Track *animation_track = tracks_ptr[i]; if (!animation_track->enabled) { continue; } Animation::TypeHash thash = animation_track->thash; - TrackCache **track_ptr = track_cache.getptr(thash); - if (track_ptr == nullptr || processed_hashes.has(thash)) { + TrackCache *track = track_num_to_track_cashe[i]; + if (track == nullptr || processed_hashes.has(thash)) { // No path, but avoid error spamming. // Or, there is the case different track type with same path; These can be distinguished by hash. So don't add the weight doubly. continue; } - TrackCache *track = *track_ptr; - int blend_idx = track_map[track->path]; + int blend_idx = track->blend_idx; ERR_CONTINUE(blend_idx < 0 || blend_idx >= track_count); real_t blend = blend_idx < track_weights_count ? track_weights_ptr[blend_idx] * weight : weight; track->total_weight += blend; - processed_hashes.push_back(thash); + processed_hashes.insert(thash); } } } @@ -1130,6 +1156,8 @@ void AnimationMixer::_blend_process(double p_delta, bool p_update_only) { #ifndef _3D_DISABLED bool calc_root = !seeked || is_external_seeking; #endif // _3D_DISABLED + ERR_CONTINUE_EDMSG(!animation_track_num_to_track_cashe.has(a), "No animation in cache."); + LocalVector<TrackCache *> &track_num_to_track_cashe = animation_track_num_to_track_cashe[a]; const Vector<Animation::Track *> tracks = a->get_tracks(); Animation::Track *const *tracks_ptr = tracks.ptr(); real_t a_length = a->get_length(); @@ -1139,15 +1167,11 @@ void AnimationMixer::_blend_process(double p_delta, bool p_update_only) { if (!animation_track->enabled) { continue; } - Animation::TypeHash thash = animation_track->thash; - TrackCache **track_ptr = track_cache.getptr(thash); - if (track_ptr == nullptr) { + TrackCache *track = track_num_to_track_cashe[i]; + if (track == nullptr) { continue; // No path, but avoid error spamming. } - TrackCache *track = *track_ptr; - int *blend_idx_ptr = track_map.getptr(track->path); - ERR_CONTINUE(blend_idx_ptr == nullptr); - int blend_idx = *blend_idx_ptr; + int blend_idx = track->blend_idx; ERR_CONTINUE(blend_idx < 0 || blend_idx >= track_count); real_t blend = blend_idx < track_weights_count ? track_weights_ptr[blend_idx] * weight : weight; if (!deterministic) { @@ -1581,7 +1605,7 @@ void AnimationMixer::_blend_process(double p_delta, bool p_update_only) { track_info.loop = a->get_loop_mode() != Animation::LOOP_NONE; track_info.backward = backward; track_info.use_blend = a->audio_track_is_use_blend(i); - HashMap<int, PlayingAudioStreamInfo> &map = track_info.stream_info; + AHashMap<int, PlayingAudioStreamInfo> &map = track_info.stream_info; // Main process to fire key is started from here. if (p_update_only) { @@ -1850,7 +1874,7 @@ void AnimationMixer::_blend_apply() { PlayingAudioTrackInfo &track_info = L.value; float db = Math::linear_to_db(track_info.use_blend ? track_info.volume : 1.0); LocalVector<int> erase_streams; - HashMap<int, PlayingAudioStreamInfo> &map = track_info.stream_info; + AHashMap<int, PlayingAudioStreamInfo> &map = track_info.stream_info; for (const KeyValue<int, PlayingAudioStreamInfo> &M : map) { PlayingAudioStreamInfo pasi = M.value; @@ -2134,7 +2158,7 @@ void AnimationMixer::restore(const Ref<AnimatedValuesBackup> &p_backup) { ERR_FAIL_COND(p_backup.is_null()); track_cache = p_backup->get_data(); _blend_apply(); - track_cache = HashMap<Animation::TypeHash, AnimationMixer::TrackCache *>(); + track_cache = AHashMap<Animation::TypeHash, AnimationMixer::TrackCache *, HashHasher>(); cache_valid = false; } @@ -2370,7 +2394,7 @@ AnimationMixer::AnimationMixer() { AnimationMixer::~AnimationMixer() { } -void AnimatedValuesBackup::set_data(const HashMap<Animation::TypeHash, AnimationMixer::TrackCache *> p_data) { +void AnimatedValuesBackup::set_data(const AHashMap<Animation::TypeHash, AnimationMixer::TrackCache *, HashHasher> p_data) { clear_data(); for (const KeyValue<Animation::TypeHash, AnimationMixer::TrackCache *> &E : p_data) { @@ -2383,7 +2407,7 @@ void AnimatedValuesBackup::set_data(const HashMap<Animation::TypeHash, Animation } } -HashMap<Animation::TypeHash, AnimationMixer::TrackCache *> AnimatedValuesBackup::get_data() const { +AHashMap<Animation::TypeHash, AnimationMixer::TrackCache *, HashHasher> AnimatedValuesBackup::get_data() const { HashMap<Animation::TypeHash, AnimationMixer::TrackCache *> ret; for (const KeyValue<Animation::TypeHash, AnimationMixer::TrackCache *> &E : data) { AnimationMixer::TrackCache *track = get_cache_copy(E.value); diff --git a/scene/animation/animation_mixer.h b/scene/animation/animation_mixer.h index 27c9a00a9c..1906146c56 100644 --- a/scene/animation/animation_mixer.h +++ b/scene/animation/animation_mixer.h @@ -31,6 +31,7 @@ #ifndef ANIMATION_MIXER_H #define ANIMATION_MIXER_H +#include "core/templates/a_hash_map.h" #include "scene/animation/tween.h" #include "scene/main/node.h" #include "scene/resources/animation.h" @@ -102,7 +103,7 @@ public: protected: /* ---- Data lists ---- */ LocalVector<AnimationLibraryData> animation_libraries; - HashMap<StringName, AnimationData> animation_set; // HashMap<Library name + Animation name, AnimationData> + AHashMap<StringName, AnimationData> animation_set; // HashMap<Library name + Animation name, AnimationData> TypedArray<StringName> _get_animation_library_list() const; Vector<String> _get_animation_list() const { @@ -148,6 +149,7 @@ protected: uint64_t setup_pass = 0; Animation::TrackType type = Animation::TrackType::TYPE_ANIMATION; NodePath path; + int blend_idx = -1; ObjectID object_id; real_t total_weight = 0.0; @@ -269,7 +271,7 @@ protected: // Audio track information for mixng and ending. struct PlayingAudioTrackInfo { - HashMap<int, PlayingAudioStreamInfo> stream_info; + AHashMap<int, PlayingAudioStreamInfo> stream_info; double length = 0.0; double time = 0.0; real_t volume = 0.0; @@ -308,7 +310,8 @@ protected: }; RootMotionCache root_motion_cache; - HashMap<Animation::TypeHash, TrackCache *> track_cache; + AHashMap<Animation::TypeHash, TrackCache *, HashHasher> track_cache; + AHashMap<Ref<Animation>, LocalVector<TrackCache *>> animation_track_num_to_track_cashe; HashSet<TrackCache *> playing_caches; Vector<Node *> playing_audio_stream_players; @@ -324,7 +327,7 @@ protected: /* ---- Blending processor ---- */ LocalVector<AnimationInstance> animation_instances; - HashMap<NodePath, int> track_map; + AHashMap<NodePath, int> track_map; int track_count = 0; bool deterministic = false; @@ -359,12 +362,12 @@ protected: virtual void _process_animation(double p_delta, bool p_update_only = false); // For post process with retrieved key value during blending. - virtual Variant _post_process_key_value(const Ref<Animation> &p_anim, int p_track, Variant p_value, ObjectID p_object_id, int p_object_sub_idx = -1); + virtual Variant _post_process_key_value(const Ref<Animation> &p_anim, int p_track, Variant &p_value, ObjectID p_object_id, int p_object_sub_idx = -1); Variant post_process_key_value(const Ref<Animation> &p_anim, int p_track, Variant p_value, ObjectID p_object_id, int p_object_sub_idx = -1); GDVIRTUAL5RC(Variant, _post_process_key_value, Ref<Animation>, int, Variant, ObjectID, int); void _blend_init(); - virtual bool _blend_pre_process(double p_delta, int p_track_count, const HashMap<NodePath, int> &p_track_map); + virtual bool _blend_pre_process(double p_delta, int p_track_count, const AHashMap<NodePath, int> &p_track_map); virtual void _blend_capture(double p_delta); void _blend_calc_total_weight(); // For undeterministic blending. void _blend_process(double p_delta, bool p_update_only = false); @@ -485,11 +488,11 @@ public: class AnimatedValuesBackup : public RefCounted { GDCLASS(AnimatedValuesBackup, RefCounted); - HashMap<Animation::TypeHash, AnimationMixer::TrackCache *> data; + AHashMap<Animation::TypeHash, AnimationMixer::TrackCache *, HashHasher> data; public: - void set_data(const HashMap<Animation::TypeHash, AnimationMixer::TrackCache *> p_data); - HashMap<Animation::TypeHash, AnimationMixer::TrackCache *> get_data() const; + void set_data(const AHashMap<Animation::TypeHash, AnimationMixer::TrackCache *, HashHasher> p_data); + AHashMap<Animation::TypeHash, AnimationMixer::TrackCache *, HashHasher> get_data() const; void clear_data(); AnimationMixer::TrackCache *get_cache_copy(AnimationMixer::TrackCache *p_cache) const; diff --git a/scene/animation/animation_node_state_machine.cpp b/scene/animation/animation_node_state_machine.cpp index c3c5399a6b..5cc204100c 100644 --- a/scene/animation/animation_node_state_machine.cpp +++ b/scene/animation/animation_node_state_machine.cpp @@ -1619,7 +1619,7 @@ AnimationNode::NodeTimeInfo AnimationNodeStateMachine::_process(const AnimationM playback_new = playback_new->duplicate(); // Don't process original when testing. } - return playback_new->process(node_state.base_path, this, p_playback_info, p_test_only); + return playback_new->process(node_state.get_base_path(), this, p_playback_info, p_test_only); } String AnimationNodeStateMachine::get_caption() const { diff --git a/scene/animation/animation_player.cpp b/scene/animation/animation_player.cpp index 8a2ca47920..b3a75a75a0 100644 --- a/scene/animation/animation_player.cpp +++ b/scene/animation/animation_player.cpp @@ -133,7 +133,7 @@ void AnimationPlayer::_get_property_list(List<PropertyInfo> *p_list) const { List<PropertyInfo> anim_names; for (const KeyValue<StringName, AnimationData> &E : animation_set) { - HashMap<StringName, StringName>::ConstIterator F = animation_next_set.find(E.key); + AHashMap<StringName, StringName>::ConstIterator F = animation_next_set.find(E.key); if (F && F->value != StringName()) { anim_names.push_back(PropertyInfo(Variant::STRING, "next/" + String(E.key), PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL)); } @@ -299,7 +299,7 @@ void AnimationPlayer::_blend_playback_data(double p_delta, bool p_started) { } } -bool AnimationPlayer::_blend_pre_process(double p_delta, int p_track_count, const HashMap<NodePath, int> &p_track_map) { +bool AnimationPlayer::_blend_pre_process(double p_delta, int p_track_count, const AHashMap<NodePath, int> &p_track_map) { if (!playback.current.from) { _set_process(false); return false; diff --git a/scene/animation/animation_player.h b/scene/animation/animation_player.h index 3223e2522d..06b3eecb89 100644 --- a/scene/animation/animation_player.h +++ b/scene/animation/animation_player.h @@ -52,7 +52,7 @@ public: #endif // DISABLE_DEPRECATED private: - HashMap<StringName, StringName> animation_next_set; // For auto advance. + AHashMap<StringName, StringName> animation_next_set; // For auto advance. float speed_scale = 1.0; double default_blend_time = 0.0; @@ -138,7 +138,7 @@ protected: static void _bind_methods(); // Make animation instances. - virtual bool _blend_pre_process(double p_delta, int p_track_count, const HashMap<NodePath, int> &p_track_map) override; + virtual bool _blend_pre_process(double p_delta, int p_track_count, const AHashMap<NodePath, int> &p_track_map) override; virtual void _blend_capture(double p_delta) override; virtual void _blend_post_process() override; diff --git a/scene/animation/animation_tree.cpp b/scene/animation/animation_tree.cpp index 19080e61de..d676e2acf4 100644 --- a/scene/animation/animation_tree.cpp +++ b/scene/animation/animation_tree.cpp @@ -75,20 +75,34 @@ void AnimationNode::set_parameter(const StringName &p_name, const Variant &p_val if (process_state->is_testing) { return; } + + const AHashMap<StringName, int>::Iterator it = property_cache.find(p_name); + if (it) { + process_state->tree->property_map.get_by_index(it->value).value.first = p_value; + return; + } + ERR_FAIL_COND(!process_state->tree->property_parent_map.has(node_state.base_path)); ERR_FAIL_COND(!process_state->tree->property_parent_map[node_state.base_path].has(p_name)); StringName path = process_state->tree->property_parent_map[node_state.base_path][p_name]; - - process_state->tree->property_map[path].first = p_value; + int idx = process_state->tree->property_map.get_index(path); + property_cache.insert_new(p_name, idx); + process_state->tree->property_map.get_by_index(idx).value.first = p_value; } Variant AnimationNode::get_parameter(const StringName &p_name) const { ERR_FAIL_NULL_V(process_state, Variant()); + const AHashMap<StringName, int>::ConstIterator it = property_cache.find(p_name); + if (it) { + return process_state->tree->property_map.get_by_index(it->value).value.first; + } ERR_FAIL_COND_V(!process_state->tree->property_parent_map.has(node_state.base_path), Variant()); ERR_FAIL_COND_V(!process_state->tree->property_parent_map[node_state.base_path].has(p_name), Variant()); StringName path = process_state->tree->property_parent_map[node_state.base_path][p_name]; - return process_state->tree->property_map[path].first; + int idx = process_state->tree->property_map.get_index(path); + property_cache.insert_new(p_name, idx); + return process_state->tree->property_map.get_by_index(idx).value.first; } void AnimationNode::set_node_time_info(const NodeTimeInfo &p_node_time_info) { @@ -203,7 +217,7 @@ AnimationNode::NodeTimeInfo AnimationNode::_blend_node(Ref<AnimationNode> p_node } for (const KeyValue<NodePath, bool> &E : filter) { - const HashMap<NodePath, int> &map = *process_state->track_map; + const AHashMap<NodePath, int> &map = *process_state->track_map; if (!map.has(E.key)) { continue; } @@ -292,7 +306,7 @@ AnimationNode::NodeTimeInfo AnimationNode::_blend_node(Ref<AnimationNode> p_node // This process, which depends on p_sync is needed to process sync correctly in the case of // that a synced AnimationNodeSync exists under the un-synced AnimationNodeSync. - p_node->node_state.base_path = new_path; + p_node->set_node_state_base_path(new_path); p_node->node_state.parent = new_parent; if (!p_playback_info.seeked && !p_sync && !any_valid) { p_playback_info.delta = 0.0; @@ -603,7 +617,7 @@ Ref<AnimationRootNode> AnimationTree::get_root_animation_node() const { return root_animation_node; } -bool AnimationTree::_blend_pre_process(double p_delta, int p_track_count, const HashMap<NodePath, int> &p_track_map) { +bool AnimationTree::_blend_pre_process(double p_delta, int p_track_count, const AHashMap<NodePath, int> &p_track_map) { _update_properties(); // If properties need updating, update them. if (!root_animation_node.is_valid()) { @@ -627,7 +641,7 @@ bool AnimationTree::_blend_pre_process(double p_delta, int p_track_count, const for (int i = 0; i < p_track_count; i++) { src_blendsw[i] = 1.0; // By default all go to 1 for the root input. } - root_animation_node->node_state.base_path = SNAME(Animation::PARAMETERS_BASE_PATH.ascii().get_data()); + root_animation_node->set_node_state_base_path(SNAME(Animation::PARAMETERS_BASE_PATH.ascii().get_data())); root_animation_node->node_state.parent = nullptr; } @@ -732,7 +746,7 @@ void AnimationTree::_animation_node_removed(const ObjectID &p_oid, const StringN void AnimationTree::_update_properties_for_node(const String &p_base_path, Ref<AnimationNode> p_node) { ERR_FAIL_COND(p_node.is_null()); if (!property_parent_map.has(p_base_path)) { - property_parent_map[p_base_path] = HashMap<StringName, StringName>(); + property_parent_map[p_base_path] = AHashMap<StringName, StringName>(); } if (!property_reference_map.has(p_node->get_instance_id())) { property_reference_map[p_node->get_instance_id()] = p_base_path; @@ -767,7 +781,7 @@ void AnimationTree::_update_properties_for_node(const String &p_base_path, Ref<A pinfo.name = p_base_path + key; properties.push_back(pinfo); } - + p_node->make_cache_dirty(); List<AnimationNode::ChildNode> children; p_node->get_child_nodes(&children); diff --git a/scene/animation/animation_tree.h b/scene/animation/animation_tree.h index d4b7bf31c9..8ee80f29ee 100644 --- a/scene/animation/animation_tree.h +++ b/scene/animation/animation_tree.h @@ -60,7 +60,7 @@ public: bool closable = false; Vector<Input> inputs; - HashMap<NodePath, bool> filter; + AHashMap<NodePath, bool> filter; bool filter_enabled = false; // To propagate information from upstream for use in estimation of playback progress. @@ -97,22 +97,57 @@ public: // Temporary state for blending process which needs to be stored in each AnimationNodes. struct NodeState { + friend AnimationNode; + + private: StringName base_path; + + public: AnimationNode *parent = nullptr; Vector<StringName> connections; Vector<real_t> track_weights; + + const StringName get_base_path() const { + return base_path; + } + } node_state; // Temporary state for blending process which needs to be started in the AnimationTree, pass through the AnimationNodes, and then return to the AnimationTree. struct ProcessState { AnimationTree *tree = nullptr; - const HashMap<NodePath, int> *track_map; // TODO: Is there a better way to manage filter/tracks? + const AHashMap<NodePath, int> *track_map; // TODO: Is there a better way to manage filter/tracks? bool is_testing = false; bool valid = false; String invalid_reasons; uint64_t last_pass = 0; } *process_state = nullptr; +private: + mutable AHashMap<StringName, int> property_cache; + +public: + void set_node_state_base_path(const StringName p_base_path) { + if (p_base_path != node_state.base_path) { + node_state.base_path = p_base_path; + make_cache_dirty(); + } + } + + void set_node_state_base_path(const String p_base_path) { + if (p_base_path != node_state.base_path) { + node_state.base_path = p_base_path; + make_cache_dirty(); + } + } + + const StringName get_node_state_base_path() const { + return node_state.get_base_path(); + } + + void make_cache_dirty() { + property_cache.clear(); + } Array _get_filters() const; void _set_filters(const Array &p_filters); friend class AnimationNodeBlendTree; @@ -250,9 +285,9 @@ private: friend class AnimationNode; List<PropertyInfo> properties; - HashMap<StringName, HashMap<StringName, StringName>> property_parent_map; - HashMap<ObjectID, StringName> property_reference_map; - HashMap<StringName, Pair<Variant, bool>> property_map; // Property value and read-only flag. + AHashMap<StringName, AHashMap<StringName, StringName>> property_parent_map; + AHashMap<ObjectID, StringName> property_reference_map; + AHashMap<StringName, Pair<Variant, bool>> property_map; // Property value and read-only flag. bool properties_dirty = true; @@ -286,7 +321,7 @@ private: virtual void _set_active(bool p_active) override; // Make animation instances. - virtual bool _blend_pre_process(double p_delta, int p_track_count, const HashMap<NodePath, int> &p_track_map) override; + virtual bool _blend_pre_process(double p_delta, int p_track_count, const AHashMap<NodePath, int> &p_track_map) override; #ifndef DISABLE_DEPRECATED void _set_process_callback_bind_compat_80813(AnimationProcessCallback p_mode); diff --git a/scene/debugger/scene_debugger.cpp b/scene/debugger/scene_debugger.cpp index 07c32eef13..22e5238fae 100644 --- a/scene/debugger/scene_debugger.cpp +++ b/scene/debugger/scene_debugger.cpp @@ -535,7 +535,21 @@ SceneDebuggerTree::SceneDebuggerTree(Node *p_root) { } } } - nodes.push_back(RemoteNode(count, n->get_name(), n->get_class(), n->get_instance_id(), n->get_scene_file_path(), view_flags)); + + String class_name; + ScriptInstance *script_instance = n->get_script_instance(); + if (script_instance) { + Ref<Script> script = script_instance->get_script(); + if (script.is_valid()) { + class_name = script->get_global_name(); + + if (class_name.is_empty()) { + // If there is no class_name in this script we just take the script path. + class_name = script->get_path(); + } + } + } + nodes.push_back(RemoteNode(count, n->get_name(), class_name.is_empty() ? n->get_class() : class_name, n->get_instance_id(), n->get_scene_file_path(), view_flags)); } } diff --git a/scene/gui/button.cpp b/scene/gui/button.cpp index 0a5f2ec6c7..9df4bfde92 100644 --- a/scene/gui/button.cpp +++ b/scene/gui/button.cpp @@ -296,19 +296,12 @@ void Button::_notification(int p_what) { } } break; case DRAW_HOVER_PRESSED: { - // Edge case for CheckButton and CheckBox. - if (has_theme_stylebox("hover_pressed")) { - if (has_theme_color(SNAME("font_hover_pressed_color"))) { - font_color = theme_cache.font_hover_pressed_color; - } - if (has_theme_color(SNAME("icon_hover_pressed_color"))) { - icon_modulate_color = theme_cache.icon_hover_pressed_color; - } - - break; + font_color = theme_cache.font_hover_pressed_color; + if (has_theme_color(SNAME("icon_hover_pressed_color"))) { + icon_modulate_color = theme_cache.icon_hover_pressed_color; } - } - [[fallthrough]]; + + } break; case DRAW_PRESSED: { if (has_theme_color(SNAME("font_pressed_color"))) { font_color = theme_cache.font_pressed_color; diff --git a/scene/gui/code_edit.cpp b/scene/gui/code_edit.cpp index 635228670d..7346c9dcd3 100644 --- a/scene/gui/code_edit.cpp +++ b/scene/gui/code_edit.cpp @@ -277,15 +277,17 @@ void CodeEdit::gui_input(const Ref<InputEvent> &p_gui_input) { code_completion_force_item_center = -1; queue_redraw(); } - code_completion_pan_offset += 1.0f; + code_completion_pan_offset = 0; } else if (code_completion_pan_offset >= +1.0) { if (code_completion_current_selected < code_completion_options.size() - 1) { code_completion_current_selected++; code_completion_force_item_center = -1; queue_redraw(); } - code_completion_pan_offset -= 1.0f; + code_completion_pan_offset = 0; } + accept_event(); + return; } Ref<InputEventMouseButton> mb = p_gui_input; diff --git a/scene/gui/color_picker.cpp b/scene/gui/color_picker.cpp index fe4c91cb56..5532176b2d 100644 --- a/scene/gui/color_picker.cpp +++ b/scene/gui/color_picker.cpp @@ -245,21 +245,7 @@ void ColorPicker::finish_shaders() { } void ColorPicker::set_focus_on_line_edit() { - bool has_hardware_keyboard = true; -#if defined(ANDROID_ENABLED) || defined(IOS_ENABLED) - has_hardware_keyboard = DisplayServer::get_singleton()->has_hardware_keyboard(); -#endif // ANDROID_ENABLED || IOS_ENABLED - if (has_hardware_keyboard) { - callable_mp((Control *)c_text, &Control::grab_focus).call_deferred(); - } else { - // A hack to avoid showing the virtual keyboard when the ColorPicker window popups and - // no hardware keyboard is detected on Android and IOS. - // This will only focus the LineEdit without editing, the virtual keyboard will only be visible when - // we touch the LineEdit to enter edit mode. - callable_mp(c_text, &LineEdit::set_editable).call_deferred(false); - callable_mp((Control *)c_text, &Control::grab_focus).call_deferred(); - callable_mp(c_text, &LineEdit::set_editable).call_deferred(true); - } + callable_mp((Control *)c_text, &Control::grab_focus).call_deferred(); } void ColorPicker::_update_controls() { @@ -1571,19 +1557,16 @@ void ColorPicker::_pick_button_pressed_legacy() { picker_texture_rect->set_default_cursor_shape(CURSOR_POINTING_HAND); picker_texture_rect->connect(SceneStringName(gui_input), callable_mp(this, &ColorPicker::_picker_texture_input)); - picker_preview = memnew(Panel); - picker_preview->set_anchors_preset(Control::PRESET_CENTER_TOP); - picker_preview->set_mouse_filter(MOUSE_FILTER_IGNORE); - picker_window->add_child(picker_preview); - picker_preview_label = memnew(Label); - picker_preview->set_anchors_preset(Control::PRESET_CENTER_TOP); + picker_preview_label->set_anchors_preset(Control::PRESET_CENTER_TOP); picker_preview_label->set_text(ETR("Color Picking active")); - picker_preview->add_child(picker_preview_label); - picker_preview_style_box = (Ref<StyleBoxFlat>)memnew(StyleBoxFlat); + picker_preview_style_box.instantiate(); picker_preview_style_box->set_bg_color(Color(1.0, 1.0, 1.0)); - picker_preview->add_theme_style_override(SceneStringName(panel), picker_preview_style_box); + picker_preview_style_box->set_content_margin_all(4.0); + picker_preview_label->add_theme_style_override(CoreStringName(normal), picker_preview_style_box); + + picker_window->add_child(picker_preview_label); } Rect2i screen_rect; @@ -1625,7 +1608,7 @@ void ColorPicker::_pick_button_pressed_legacy() { } picker_window->set_size(screen_rect.size); - picker_preview->set_size(screen_rect.size / 10.0); // 10% of size in each axis. + picker_preview_label->set_custom_minimum_size(screen_rect.size / 10); // 10% of size in each axis. picker_window->popup(); } @@ -1648,7 +1631,7 @@ void ColorPicker::_picker_texture_input(const Ref<InputEvent> &p_event) { Vector2 ofs = mev->get_position(); picker_color = img->get_pixel(ofs.x, ofs.y); picker_preview_style_box->set_bg_color(picker_color); - picker_preview_label->set_self_modulate(picker_color.get_luminance() < 0.5 ? Color(1.0f, 1.0f, 1.0f) : Color(0.0f, 0.0f, 0.0f)); + picker_preview_label->add_theme_color_override(SceneStringName(font_color), picker_color.get_luminance() < 0.5 ? Color(1.0f, 1.0f, 1.0f) : Color(0.0f, 0.0f, 0.0f)); } } } @@ -2103,7 +2086,9 @@ void ColorPickerButton::pressed() { float v_offset = show_above ? -minsize.y : get_size().y; popup->set_position(get_screen_position() + Vector2(h_offset, v_offset)); popup->popup(); - picker->set_focus_on_line_edit(); + if (DisplayServer::get_singleton()->has_hardware_keyboard()) { + picker->set_focus_on_line_edit(); + } } void ColorPickerButton::_notification(int p_what) { diff --git a/scene/gui/color_picker.h b/scene/gui/color_picker.h index ad028584b1..59540d9ace 100644 --- a/scene/gui/color_picker.h +++ b/scene/gui/color_picker.h @@ -130,7 +130,6 @@ private: Popup *picker_window = nullptr; // Legacy color picking. TextureRect *picker_texture_rect = nullptr; - Panel *picker_preview = nullptr; Label *picker_preview_label = nullptr; Ref<StyleBoxFlat> picker_preview_style_box; Color picker_color; diff --git a/scene/gui/control.cpp b/scene/gui/control.cpp index c1d197ea9b..5052deb65a 100644 --- a/scene/gui/control.cpp +++ b/scene/gui/control.cpp @@ -3042,7 +3042,7 @@ void Control::set_layout_direction(Control::LayoutDirection p_direction) { if (data.layout_dir == p_direction) { return; } - ERR_FAIL_INDEX((int)p_direction, 4); + ERR_FAIL_INDEX(p_direction, LAYOUT_DIRECTION_MAX); data.layout_dir = p_direction; @@ -3115,13 +3115,20 @@ bool Control::is_layout_rtl() const { String locale = TranslationServer::get_singleton()->get_tool_locale(); const_cast<Control *>(this)->data.is_rtl = TS->is_locale_right_to_left(locale); } - } else if (data.layout_dir == LAYOUT_DIRECTION_LOCALE) { + } else if (data.layout_dir == LAYOUT_DIRECTION_APPLICATION_LOCALE) { if (GLOBAL_GET(SNAME("internationalization/rendering/force_right_to_left_layout_direction"))) { const_cast<Control *>(this)->data.is_rtl = true; } else { String locale = TranslationServer::get_singleton()->get_tool_locale(); const_cast<Control *>(this)->data.is_rtl = TS->is_locale_right_to_left(locale); } + } else if (data.layout_dir == LAYOUT_DIRECTION_SYSTEM_LOCALE) { + if (GLOBAL_GET(SNAME("internationalization/rendering/force_right_to_left_layout_direction"))) { + const_cast<Control *>(this)->data.is_rtl = true; + } else { + String locale = OS::get_singleton()->get_locale(); + const_cast<Control *>(this)->data.is_rtl = TS->is_locale_right_to_left(locale); + } } else { const_cast<Control *>(this)->data.is_rtl = (data.layout_dir == LAYOUT_DIRECTION_RTL); } @@ -3574,7 +3581,7 @@ void Control::_bind_methods() { ADD_GROUP("Layout", ""); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "clip_contents"), "set_clip_contents", "is_clipping_contents"); ADD_PROPERTY(PropertyInfo(Variant::VECTOR2, "custom_minimum_size", PROPERTY_HINT_NONE, "suffix:px"), "set_custom_minimum_size", "get_custom_minimum_size"); - ADD_PROPERTY(PropertyInfo(Variant::INT, "layout_direction", PROPERTY_HINT_ENUM, "Inherited,Based on Locale,Left-to-Right,Right-to-Left"), "set_layout_direction", "get_layout_direction"); + ADD_PROPERTY(PropertyInfo(Variant::INT, "layout_direction", PROPERTY_HINT_ENUM, "Inherited,Based on Application Locale,Left-to-Right,Right-to-Left,Based on System Locale"), "set_layout_direction", "get_layout_direction"); ADD_PROPERTY(PropertyInfo(Variant::INT, "layout_mode", PROPERTY_HINT_ENUM, "Position,Anchors,Container,Uncontrolled", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_INTERNAL), "_set_layout_mode", "_get_layout_mode"); ADD_PROPERTY_DEFAULT("layout_mode", LayoutMode::LAYOUT_MODE_POSITION); @@ -3723,9 +3730,14 @@ void Control::_bind_methods() { BIND_ENUM_CONSTANT(ANCHOR_END); BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_INHERITED); - BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_LOCALE); + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_APPLICATION_LOCALE); BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_LTR); BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_RTL); + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_SYSTEM_LOCALE); + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_MAX); +#ifndef DISABLE_DEPRECATED + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_LOCALE); +#endif // DISABLE_DEPRECATED BIND_ENUM_CONSTANT(TEXT_DIRECTION_INHERITED); BIND_ENUM_CONSTANT(TEXT_DIRECTION_AUTO); diff --git a/scene/gui/control.h b/scene/gui/control.h index 6e1621ce58..6cabf10971 100644 --- a/scene/gui/control.h +++ b/scene/gui/control.h @@ -140,9 +140,14 @@ public: enum LayoutDirection { LAYOUT_DIRECTION_INHERITED, - LAYOUT_DIRECTION_LOCALE, + LAYOUT_DIRECTION_APPLICATION_LOCALE, LAYOUT_DIRECTION_LTR, - LAYOUT_DIRECTION_RTL + LAYOUT_DIRECTION_RTL, + LAYOUT_DIRECTION_SYSTEM_LOCALE, + LAYOUT_DIRECTION_MAX, +#ifndef DISABLE_DEPRECATED + LAYOUT_DIRECTION_LOCALE = LAYOUT_DIRECTION_APPLICATION_LOCALE, +#endif // DISABLE_DEPRECATED }; enum TextDirection { diff --git a/scene/gui/item_list.cpp b/scene/gui/item_list.cpp index f3cf29d0cf..1ba5ef309b 100644 --- a/scene/gui/item_list.cpp +++ b/scene/gui/item_list.cpp @@ -1056,7 +1056,7 @@ void ItemList::_notification(int p_what) { scroll_bar->set_anchor_and_offset(SIDE_BOTTOM, ANCHOR_END, -theme_cache.panel_style->get_margin(SIDE_BOTTOM)); Size2 size = get_size(); - int width = size.width - theme_cache.panel_style->get_margin(SIDE_RIGHT); + int width = size.width - theme_cache.panel_style->get_minimum_size().width; if (scroll_bar->is_visible()) { width -= scroll_bar_minwidth; } diff --git a/scene/gui/menu_bar.cpp b/scene/gui/menu_bar.cpp index 46c9c7cccc..9853d699d4 100644 --- a/scene/gui/menu_bar.cpp +++ b/scene/gui/menu_bar.cpp @@ -510,6 +510,7 @@ void MenuBar::_refresh_menu_names() { if (!popups[i]->has_meta("_menu_name") && String(popups[i]->get_name()) != get_menu_title(i)) { menu_cache.write[i].name = popups[i]->get_name(); shape(menu_cache.write[i]); + queue_redraw(); if (is_global && menu_cache[i].submenu_rid.is_valid()) { int item_idx = nmenu->find_item_index_with_submenu(main_menu, menu_cache[i].submenu_rid); if (item_idx >= 0) { diff --git a/scene/gui/tree.cpp b/scene/gui/tree.cpp index 646cd9c70e..40917ee8f1 100644 --- a/scene/gui/tree.cpp +++ b/scene/gui/tree.cpp @@ -961,19 +961,17 @@ TreeItem *TreeItem::_get_prev_in_tree(bool p_wrap, bool p_include_invisible) { if (!prev_item) { current = current->parent; - if (current == tree->root && tree->hide_root) { - return nullptr; - } else if (!current) { - if (p_wrap) { - current = this; - TreeItem *temp = get_next_visible(); - while (temp) { - current = temp; - temp = temp->get_next_visible(); - } - } else { + if (!current || (current == tree->root && tree->hide_root)) { + if (!p_wrap) { return nullptr; } + // Wrap around to the last visible item. + current = this; + TreeItem *temp = get_next_visible(); + while (temp) { + current = temp; + temp = temp->get_next_visible(); + } } } else { current = prev_item; @@ -2165,12 +2163,18 @@ int Tree::draw_item(const Point2i &p_pos, const Point2 &p_draw_ofs, const Size2 int ofs = p_pos.x + ((p_item->disable_folding || hide_folding) ? theme_cache.h_separation : theme_cache.item_margin); int skip2 = 0; + + bool is_row_hovered = (!cache.hover_header_row && cache.hover_item == p_item); + for (int i = 0; i < columns.size(); i++) { if (skip2) { skip2--; continue; } + bool is_col_hovered = cache.hover_column == i; + bool is_cell_hovered = is_row_hovered && is_col_hovered; + bool is_cell_button_hovered = is_cell_hovered && cache.hover_button_index_in_column != -1; int item_width = get_column_width(i); if (i == 0) { @@ -2203,6 +2207,8 @@ int Tree::draw_item(const Point2i &p_pos, const Point2 &p_draw_ofs, const Size2 int total_ofs = ofs - theme_cache.offset.x; + // If part of the column is beyond the right side of the control due to scrolling, clamp the label width + // so that all buttons attached to the cell remain within view. if (total_ofs + item_width > p_draw_size.width) { item_width = MAX(buttons_width, p_draw_size.width - total_ofs); } @@ -2247,17 +2253,42 @@ int Tree::draw_item(const Point2i &p_pos, const Point2 &p_draw_ofs, const Size2 RenderingServer::get_singleton()->canvas_item_add_line(ci, Point2i(r.position.x, r.position.y + r.size.height), r.position + r.size, theme_cache.guide_color, 1); } - if (i == 0) { - if (p_item->cells[0].selected && select_mode == SELECT_ROW) { + if (i == 0 && select_mode == SELECT_ROW) { + if (p_item->cells[0].selected || is_row_hovered) { const Rect2 content_rect = _get_content_rect(); Rect2i row_rect = Rect2i(Point2i(content_rect.position.x, item_rect.position.y), Size2i(content_rect.size.x, item_rect.size.y)); if (rtl) { row_rect.position.x = get_size().width - row_rect.position.x - row_rect.size.x; } - if (has_focus()) { - theme_cache.selected_focus->draw(ci, row_rect); + + if (p_item->cells[0].selected) { + if (has_focus()) { + theme_cache.selected_focus->draw(ci, row_rect); + } else { + theme_cache.selected->draw(ci, row_rect); + } + } else if (!drop_mode_flags) { + if (is_cell_button_hovered) { + theme_cache.hovered_dimmed->draw(ci, row_rect); + } else { + theme_cache.hovered->draw(ci, row_rect); + } + } + } + } + + if (select_mode != SELECT_ROW) { + Rect2i r = cell_rect; + if (rtl) { + r.position.x = get_size().width - r.position.x - r.size.x; + } + + // Cell hover. + if (is_cell_hovered && !p_item->cells[i].selected && !drop_mode_flags) { + if (is_cell_button_hovered) { + theme_cache.hovered_dimmed->draw(ci, r); } else { - theme_cache.selected->draw(ci, row_rect); + theme_cache.hovered->draw(ci, r); } } } @@ -2335,7 +2366,9 @@ int Tree::draw_item(const Point2i &p_pos, const Point2 &p_draw_ofs, const Size2 if (p_item->cells[i].custom_color) { cell_color = p_item->cells[i].color; } else { - cell_color = p_item->cells[i].selected ? theme_cache.font_selected_color : theme_cache.font_color; + bool draw_as_hover = !drop_mode_flags && (select_mode == SELECT_ROW ? is_row_hovered : is_cell_hovered); + bool draw_as_hover_dim = draw_as_hover && is_cell_button_hovered; + cell_color = p_item->cells[i].selected ? theme_cache.font_selected_color : (draw_as_hover_dim ? theme_cache.font_hovered_dimmed_color : (draw_as_hover ? theme_cache.font_hovered_color : theme_cache.font_color)); } Color font_outline_color = theme_cache.font_outline_color; @@ -2487,7 +2520,7 @@ int Tree::draw_item(const Point2i &p_pos, const Point2 &p_draw_ofs, const Size2 ir.size.width -= downarrow->get_width(); if (p_item->cells[i].custom_button) { - if (cache.hover_item == p_item && cache.hover_cell == i) { + if (cache.hover_item == p_item && cache.hover_column == i) { if (Input::get_singleton()->is_mouse_button_pressed(MouseButton::LEFT)) { draw_style_box(theme_cache.custom_button_pressed, ir); } else { @@ -2508,19 +2541,26 @@ int Tree::draw_item(const Point2i &p_pos, const Point2 &p_draw_ofs, const Size2 } break; } + // Draw the buttons inside the cell. for (int j = p_item->cells[i].buttons.size() - 1; j >= 0; j--) { Ref<Texture2D> button_texture = p_item->cells[i].buttons[j].texture; Size2 button_size = button_texture->get_size() + theme_cache.button_pressed->get_minimum_size(); Point2i button_ofs = Point2i(ofs + item_width_with_buttons - button_size.width, p_pos.y) - theme_cache.offset + p_draw_ofs; - if (cache.click_type == Cache::CLICK_BUTTON && cache.click_item == p_item && cache.click_column == i && cache.click_index == j && !p_item->cells[i].buttons[j].disabled) { - // Being pressed. + bool should_draw_pressed = cache.click_type == Cache::CLICK_BUTTON && cache.click_item == p_item && cache.click_column == i && cache.click_index == j && !p_item->cells[i].buttons[j].disabled; + bool should_draw_hovered = !should_draw_pressed && !drop_mode_flags && cache.hover_item == p_item && cache.hover_column == i && cache.hover_button_index_in_column == j && !p_item->cells[i].buttons[j].disabled; + + if (should_draw_pressed || should_draw_hovered) { Point2 od = button_ofs; if (rtl) { od.x = get_size().width - od.x - button_size.x; } - theme_cache.button_pressed->draw(get_canvas_item(), Rect2(od.x, od.y, button_size.width, MAX(button_size.height, label_h))); + if (should_draw_pressed) { + theme_cache.button_pressed->draw(get_canvas_item(), Rect2(od.x, od.y, button_size.width, MAX(button_size.height, label_h))); + } else { + theme_cache.button_hover->draw(get_canvas_item(), Rect2(od.x, od.y, button_size.width, MAX(button_size.height, label_h))); + } } button_ofs.y += (label_h - button_size.height) / 2; @@ -2551,6 +2591,7 @@ int Tree::draw_item(const Point2i &p_pos, const Point2 &p_draw_ofs, const Size2 } } + // Draw the folding arrow. if (!p_item->disable_folding && !hide_folding && p_item->first_child && p_item->get_visible_child_count() != 0) { //has visible children, draw the guide box Ref<Texture2D> arrow; @@ -2966,6 +3007,7 @@ int Tree::propagate_mouse_event(const Point2i &p_pos, int x_ofs, int y_ofs, int col_width = MAX(button_w, MIN(limit_w, col_width)); } + // Cell button detection code. for (int j = c.buttons.size() - 1; j >= 0; j--) { Ref<Texture2D> b = c.buttons[j].texture; int w = b->get_size().width + theme_cache.button_pressed->get_minimum_size().width; @@ -2978,7 +3020,7 @@ int Tree::propagate_mouse_event(const Point2i &p_pos, int x_ofs, int y_ofs, int } // Make sure the click is correct. - Point2 click_pos = get_global_mouse_position() - get_global_position(); + const Point2 click_pos = get_local_mouse_position(); if (!get_item_at_position(click_pos)) { pressed_button = -1; cache.click_type = Cache::CLICK_NONE; @@ -3485,7 +3527,11 @@ bool Tree::_scroll(bool p_horizontal, float p_pages) { double prev_value = scroll->get_value(); scroll->set_value(scroll->get_value() + scroll->get_page() * p_pages); - return scroll->get_value() != prev_value; + bool scroll_happened = scroll->get_value() != prev_value; + if (scroll_happened) { + _determine_hovered_item(); + } + return scroll_happened; } Rect2 Tree::_get_scrollbar_layout_rect() const { @@ -3710,92 +3756,10 @@ void Tree::gui_input(const Ref<InputEvent> &p_event) { Ref<InputEventMouseMotion> mm = p_event; if (mm.is_valid()) { - Ref<StyleBox> bg = theme_cache.panel_style; - bool rtl = is_layout_rtl(); - - Point2 pos = mm->get_position(); - if (rtl) { - pos.x = get_size().width - pos.x; - } - pos -= theme_cache.panel_style->get_offset(); - - Cache::ClickType old_hover = cache.hover_type; - int old_index = cache.hover_index; - - cache.hover_type = Cache::CLICK_NONE; - cache.hover_index = 0; - if (show_column_titles) { - pos.y -= _get_title_button_height(); - if (pos.y < 0) { - pos.x += theme_cache.offset.x; - int len = 0; - for (int i = 0; i < columns.size(); i++) { - len += get_column_width(i); - if (pos.x < len) { - cache.hover_type = Cache::CLICK_TITLE; - cache.hover_index = i; - break; - } - } - } - } - - if (root) { - Point2 mpos = mm->get_position(); - if (rtl) { - mpos.x = get_size().width - mpos.x; - } - mpos -= theme_cache.panel_style->get_offset(); - mpos.y -= _get_title_button_height(); - if (mpos.y >= 0) { - if (h_scroll->is_visible_in_tree()) { - mpos.x += h_scroll->get_value(); - } - if (v_scroll->is_visible_in_tree()) { - mpos.y += v_scroll->get_value(); - } - - TreeItem *old_it = cache.hover_item; - int old_col = cache.hover_cell; - - int col, h, section; - TreeItem *it = _find_item_at_pos(root, mpos, col, h, section); - - if (drop_mode_flags) { - if (it != drop_mode_over) { - drop_mode_over = it; - queue_redraw(); - } - if (it && section != drop_mode_section) { - drop_mode_section = section; - queue_redraw(); - } - } - - cache.hover_item = it; - cache.hover_cell = col; - - if (it != old_it || col != old_col) { - if (old_it && old_col >= old_it->cells.size()) { - // Columns may have changed since last redraw(). - queue_redraw(); - } else { - // Only need to update if mouse enters/exits a button - bool was_over_button = old_it && old_it->cells[old_col].custom_button; - bool is_over_button = it && it->cells[col].custom_button; - if (was_over_button || is_over_button) { - queue_redraw(); - } - } - } - } - } - - // Update if mouse enters/exits columns - if (cache.hover_type != old_hover || cache.hover_index != old_index) { - queue_redraw(); - } + hovered_pos = mm->get_position(); + _determine_hovered_item(); + bool rtl = is_layout_rtl(); if (pressing_for_editor && popup_pressing_edited_item && (popup_pressing_edited_item->get_cell_mode(popup_pressing_edited_item_column) == TreeItem::CELL_MODE_RANGE)) { /* This needs to happen now, because the popup can be closed when pressing another item, and must remain the popup edited item until it actually closes */ popup_edited_item = popup_pressing_edited_item; @@ -4080,6 +4044,174 @@ void Tree::gui_input(const Ref<InputEvent> &p_event) { } } +void Tree::_determine_hovered_item() { + Ref<StyleBox> bg = theme_cache.panel_style; + bool rtl = is_layout_rtl(); + + Point2 pos = hovered_pos; + if (rtl) { + pos.x = get_size().width - pos.x; + } + pos -= theme_cache.panel_style->get_offset(); + + bool old_header_row = cache.hover_header_row; + int old_header_column = cache.hover_header_column; + TreeItem *old_item = cache.hover_item; + int old_column = cache.hover_column; + int old_button_index_in_column = cache.hover_button_index_in_column; + + // Determine hover on column headers. + cache.hover_header_row = false; + cache.hover_header_column = 0; + if (show_column_titles && is_mouse_hovering) { + pos.y -= _get_title_button_height(); + if (pos.y < 0) { + pos.x += theme_cache.offset.x; + int len = 0; + for (int i = 0; i < columns.size(); i++) { + len += get_column_width(i); + if (pos.x < len) { + cache.hover_header_row = true; + cache.hover_header_column = i; + cache.hover_button_index_in_column = -1; + break; + } + } + } + } + + // Determine hover on rows and items. + if (root && is_mouse_hovering) { + Point2 mpos = hovered_pos; + if (rtl) { + mpos.x = get_size().width - mpos.x; + } + mpos -= theme_cache.panel_style->get_offset(); + mpos.y -= _get_title_button_height(); + if (mpos.y >= 0) { + if (h_scroll->is_visible_in_tree()) { + mpos.x += h_scroll->get_value(); + } + if (v_scroll->is_visible_in_tree()) { + mpos.y += v_scroll->get_value(); + } + + int col, h, section; + TreeItem *it = _find_item_at_pos(root, mpos, col, h, section); + + // Find possible hovered button in cell. + int col_button_index = -1; + + Point2 cpos = mpos; + + if (it) { + const TreeItem::Cell &c = it->cells[col]; + int col_width = get_column_width(col); + + // In the first column, tree nesting indent impacts the leftmost possible buttons position + // and the clickable area of the folding arrow. + int col_indent = 0; + if (col == 0) { + col_indent = _get_item_h_offset(it); + } + + // Compute total width of buttons block including spacings. + int buttons_width = 0; + for (int j = c.buttons.size() - 1; j >= 0; j--) { + Ref<Texture2D> b = c.buttons[j].texture; + Size2 size = b->get_size() + theme_cache.button_pressed->get_minimum_size(); + buttons_width += size.width + theme_cache.button_margin; + } + + // Adjust when buttons are shifted left into view so that they remain visible even + // if part of the cell is beyond the right border due to horizontal scrolling and + // a long string in one of the items. This matches the drawing & click handling algorithms + // that are based on recursion. + int clamped_column_offset = 0; + int col_left = 0; + + for (int i = 0; i < col; i++) { + int i_col_w = get_column_width(i); + cpos.x -= i_col_w; + col_left += i_col_w; + } + col_left -= theme_cache.offset.x; + + // Compute buttons offset that makes them visible, in comparison to what would be their + // natural position that would cut them off. + if (!rtl) { + const Rect2 content_rect = _get_content_rect(); + int cw = content_rect.size.width; + int col_right = col_left + col_width; + if (col_right > cw) { + clamped_column_offset = col_right - cw - theme_cache.scrollbar_h_separation; + int max_clamp_offset = col_width - col_indent - buttons_width; + if (clamped_column_offset > max_clamp_offset) { + clamped_column_offset = max_clamp_offset; + } + } + } + col_width -= clamped_column_offset; + + // Find the actual button under coordinates. + for (int j = c.buttons.size() - 1; j >= 0; j--) { + Ref<Texture2D> b = c.buttons[j].texture; + Size2 size = b->get_size() + theme_cache.button_pressed->get_minimum_size(); + if (cpos.x > col_width - size.width && col_button_index == -1) { + col_button_index = j; + } + col_width -= size.width + theme_cache.button_margin; + } + } + + if (drop_mode_flags) { + if (it != drop_mode_over) { + drop_mode_over = it; + queue_redraw(); + } + if (it && section != drop_mode_section) { + drop_mode_section = section; + queue_redraw(); + } + } + + cache.hover_item = it; + cache.hover_column = col; + cache.hover_button_index_in_column = col_button_index; + + if (it != old_item || col != old_column) { + if (old_item && old_column >= old_item->cells.size()) { + // Columns may have changed since last redraw(). + queue_redraw(); + } else { + // Only need to update if mouse enters/exits a button. + bool was_over_button = old_item && old_item->cells[old_column].custom_button; + bool is_over_button = it && it->cells[col].custom_button; + if (was_over_button || is_over_button) { + queue_redraw(); + } + } + } + } + } + + // Reduce useless redraw calls. + + bool hovered_cell_button_changed = (cache.hover_button_index_in_column != old_button_index_in_column); + bool hovered_column_changed = (cache.hover_column != old_column); + + // Mouse has moved from row to row, or from cell to cell within same row unless selection mode is full row which saves a useless redraw. + bool item_hover_needs_redraw = !cache.hover_header_row && (cache.hover_item != old_item || hovered_cell_button_changed || (select_mode != SELECT_ROW && hovered_column_changed)); + // Mouse has moved between two different column header sections. + bool header_hover_needs_redraw = cache.hover_header_row && cache.hover_header_column != old_header_column; + // Mouse has moved between header and "main" areas. + bool whole_needs_redraw = cache.hover_header_row != old_header_row; + + if (whole_needs_redraw || header_hover_needs_redraw || item_hover_needs_redraw) { + queue_redraw(); + } +} + bool Tree::edit_selected(bool p_force_edit) { TreeItem *s = get_selected(); ERR_FAIL_NULL_V_MSG(s, false, "No item selected."); @@ -4304,9 +4436,20 @@ void Tree::_notification(int p_what) { } } break; + case NOTIFICATION_MOUSE_ENTER: { + is_mouse_hovering = true; + _determine_hovered_item(); + } break; + case NOTIFICATION_MOUSE_EXIT: { - if (cache.hover_type != Cache::CLICK_NONE) { - cache.hover_type = Cache::CLICK_NONE; + is_mouse_hovering = false; + // Clear hovered item cache. + if (cache.hover_header_row || cache.hover_item != nullptr) { + cache.hover_header_row = false; + cache.hover_header_column = -1; + cache.hover_item = nullptr; + cache.hover_column = -1; + cache.hover_button_index_in_column = -1; queue_redraw(); } } break; @@ -4420,7 +4563,7 @@ void Tree::_notification(int p_what) { //title buttons int ofs2 = theme_cache.panel_style->get_margin(SIDE_LEFT); for (int i = 0; i < columns.size(); i++) { - Ref<StyleBox> sb = (cache.click_type == Cache::CLICK_TITLE && cache.click_index == i) ? theme_cache.title_button_pressed : ((cache.hover_type == Cache::CLICK_TITLE && cache.hover_index == i) ? theme_cache.title_button_hover : theme_cache.title_button); + Ref<StyleBox> sb = (cache.click_type == Cache::CLICK_TITLE && cache.click_index == i) ? theme_cache.title_button_pressed : ((cache.hover_header_row && cache.hover_header_column == i) ? theme_cache.title_button_hover : theme_cache.title_button); Rect2 tbrect = Rect2(ofs2 - theme_cache.offset.x, bg->get_margin(SIDE_TOP), get_column_width(i), tbh); if (cache.rtl) { tbrect.position.x = get_size().width - tbrect.size.x - tbrect.position.x; @@ -4536,6 +4679,8 @@ TreeItem *Tree::create_item(TreeItem *p_parent, int p_index) { } } + _determine_hovered_item(); + return ti; } @@ -4679,6 +4824,8 @@ void Tree::clear() { popup_edited_item = nullptr; popup_pressing_edited_item = nullptr; + _determine_hovered_item(); + queue_redraw(); }; @@ -4931,6 +5078,7 @@ int Tree::get_columns() const { } void Tree::_scroll_moved(float) { + _determine_hovered_item(); queue_redraw(); } @@ -4972,7 +5120,47 @@ int Tree::get_item_offset(TreeItem *p_item) const { } } - return -1; //not found + return -1; // Not found. +} + +int Tree::_get_item_h_offset(TreeItem *p_item) const { + TreeItem *it = root; + int nesting_level = 0; + if (!it) { + return 0; + } + + while (true) { + if (it == p_item) { + if (!hide_root) { + nesting_level += 1; + } + if (hide_folding) { + nesting_level -= 1; + } + return nesting_level * theme_cache.item_margin; + } + + if (it->first_child && !it->collapsed) { + it = it->first_child; + nesting_level += 1; + + } else if (it->next) { + it = it->next; + } else { + while (!it->next) { + it = it->parent; + nesting_level -= 1; + if (it == nullptr) { + return 0; + } + } + + it = it->next; + } + } + + return -1; // Not found. } void Tree::ensure_cursor_is_visible() { @@ -5803,10 +5991,13 @@ void Tree::_bind_methods() { BIND_THEME_ITEM_CUSTOM(Theme::DATA_TYPE_FONT, Tree, tb_font, "title_button_font"); BIND_THEME_ITEM_CUSTOM(Theme::DATA_TYPE_FONT_SIZE, Tree, tb_font_size, "title_button_font_size"); + BIND_THEME_ITEM(Theme::DATA_TYPE_STYLEBOX, Tree, hovered); + BIND_THEME_ITEM(Theme::DATA_TYPE_STYLEBOX, Tree, hovered_dimmed); BIND_THEME_ITEM(Theme::DATA_TYPE_STYLEBOX, Tree, selected); BIND_THEME_ITEM(Theme::DATA_TYPE_STYLEBOX, Tree, selected_focus); BIND_THEME_ITEM(Theme::DATA_TYPE_STYLEBOX, Tree, cursor); BIND_THEME_ITEM_CUSTOM(Theme::DATA_TYPE_STYLEBOX, Tree, cursor_unfocus, "cursor_unfocused"); + BIND_THEME_ITEM(Theme::DATA_TYPE_STYLEBOX, Tree, button_hover); BIND_THEME_ITEM(Theme::DATA_TYPE_STYLEBOX, Tree, button_pressed); BIND_THEME_ITEM(Theme::DATA_TYPE_ICON, Tree, checked); @@ -5827,6 +6018,8 @@ void Tree::_bind_methods() { BIND_THEME_ITEM(Theme::DATA_TYPE_COLOR, Tree, custom_button_font_highlight); BIND_THEME_ITEM(Theme::DATA_TYPE_COLOR, Tree, font_color); + BIND_THEME_ITEM(Theme::DATA_TYPE_COLOR, Tree, font_hovered_color); + BIND_THEME_ITEM(Theme::DATA_TYPE_COLOR, Tree, font_hovered_dimmed_color); BIND_THEME_ITEM(Theme::DATA_TYPE_COLOR, Tree, font_selected_color); BIND_THEME_ITEM(Theme::DATA_TYPE_COLOR, Tree, font_disabled_color); BIND_THEME_ITEM(Theme::DATA_TYPE_COLOR, Tree, drop_position_color); diff --git a/scene/gui/tree.h b/scene/gui/tree.h index 86efdfec52..b417b7ee31 100644 --- a/scene/gui/tree.h +++ b/scene/gui/tree.h @@ -450,6 +450,9 @@ private: Vector2 pressing_pos; Rect2 pressing_item_rect; + Vector2 hovered_pos; + bool is_mouse_hovering = false; + float range_drag_base = 0.0; bool range_drag_enabled = false; Vector2 range_drag_capture_pos; @@ -545,10 +548,13 @@ private: int font_size = 0; int tb_font_size = 0; + Ref<StyleBox> hovered; + Ref<StyleBox> hovered_dimmed; Ref<StyleBox> selected; Ref<StyleBox> selected_focus; Ref<StyleBox> cursor; Ref<StyleBox> cursor_unfocus; + Ref<StyleBox> button_hover; Ref<StyleBox> button_pressed; Ref<StyleBox> title_button; Ref<StyleBox> title_button_hover; @@ -572,6 +578,8 @@ private: Ref<Texture2D> updown; Color font_color; + Color font_hovered_color; + Color font_hovered_dimmed_color; Color font_selected_color; Color font_disabled_color; Color guide_color; @@ -623,16 +631,17 @@ private: }; ClickType click_type = Cache::CLICK_NONE; - ClickType hover_type = Cache::CLICK_NONE; int click_index = -1; int click_id = -1; TreeItem *click_item = nullptr; int click_column = 0; - int hover_index = -1; + int hover_header_column = -1; + bool hover_header_row = false; Point2 click_pos; TreeItem *hover_item = nullptr; - int hover_cell = -1; + int hover_column = -1; + int hover_button_index_in_column = -1; bool rtl = false; } cache; @@ -660,6 +669,7 @@ private: TreeItem *_search_item_text(TreeItem *p_at, const String &p_find, int *r_col, bool p_selectable, bool p_backwards = false); TreeItem *_find_item_at_pos(TreeItem *p_item, const Point2 &p_pos, int &r_column, int &h, int §ion) const; + int _get_item_h_offset(TreeItem *p_item) const; void _find_button_at_pos(const Point2 &p_pos, TreeItem *&r_item, int &r_column, int &r_index) const; @@ -689,6 +699,8 @@ private: bool enable_recursive_folding = true; + void _determine_hovered_item(); + int _count_selected_items(TreeItem *p_from) const; bool _is_branch_selected(TreeItem *p_from) const; bool _is_sibling_branch_selected(TreeItem *p_from) const; diff --git a/scene/main/canvas_item.cpp b/scene/main/canvas_item.cpp index c0386b056f..7c8bf9c809 100644 --- a/scene/main/canvas_item.cpp +++ b/scene/main/canvas_item.cpp @@ -1253,7 +1253,7 @@ void CanvasItem::_bind_methods() { ClassDB::bind_method(D_METHOD("force_update_transform"), &CanvasItem::force_update_transform); - ClassDB::bind_method(D_METHOD("make_canvas_position_local", "screen_point"), &CanvasItem::make_canvas_position_local); + ClassDB::bind_method(D_METHOD("make_canvas_position_local", "viewport_point"), &CanvasItem::make_canvas_position_local); ClassDB::bind_method(D_METHOD("make_input_local", "event"), &CanvasItem::make_input_local); ClassDB::bind_method(D_METHOD("set_visibility_layer", "layer"), &CanvasItem::set_visibility_layer); diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp index 169a5dcb01..8755d5f51e 100644 --- a/scene/main/viewport.cpp +++ b/scene/main/viewport.cpp @@ -1448,7 +1448,11 @@ void Viewport::_gui_show_tooltip() { &tooltip_owner); gui.tooltip_text = gui.tooltip_text.strip_edges(); - if (gui.tooltip_text.is_empty()) { + // Controls can implement `make_custom_tooltip` to provide their own tooltip. + // This should be a Control node which will be added as child to a TooltipPanel. + Control *base_tooltip = tooltip_owner ? tooltip_owner->make_custom_tooltip(gui.tooltip_text) : nullptr; + + if (gui.tooltip_text.is_empty() && !base_tooltip) { return; // Nothing to show. } @@ -1469,10 +1473,6 @@ void Viewport::_gui_show_tooltip() { // Ensure no opaque background behind the panel as its StyleBox can be partially transparent (e.g. corners). panel->set_transparent_background(true); - // Controls can implement `make_custom_tooltip` to provide their own tooltip. - // This should be a Control node which will be added as child to a TooltipPanel. - Control *base_tooltip = tooltip_owner->make_custom_tooltip(gui.tooltip_text); - // If no custom tooltip is given, use a default implementation. if (!base_tooltip) { gui.tooltip_label = memnew(Label); @@ -1921,7 +1921,7 @@ void Viewport::_gui_input_event(Ref<InputEvent> p_event) { String tooltip = _gui_get_tooltip(over, gui.tooltip_control->get_global_transform_with_canvas().affine_inverse().xform(mpos)); tooltip = tooltip.strip_edges(); - if (tooltip.is_empty() || tooltip != gui.tooltip_text) { + if (tooltip != gui.tooltip_text) { _gui_cancel_tooltip(); } else { is_tooltip_shown = true; diff --git a/scene/main/window.cpp b/scene/main/window.cpp index 803ce89bc9..045c3ae02d 100644 --- a/scene/main/window.cpp +++ b/scene/main/window.cpp @@ -2635,7 +2635,7 @@ void Window::set_unparent_when_invisible(bool p_unparent) { void Window::set_layout_direction(Window::LayoutDirection p_direction) { ERR_MAIN_THREAD_GUARD; - ERR_FAIL_INDEX((int)p_direction, 4); + ERR_FAIL_INDEX(p_direction, LAYOUT_DIRECTION_MAX); layout_dir = p_direction; propagate_notification(Control::NOTIFICATION_LAYOUT_DIRECTION_CHANGED); @@ -2700,13 +2700,20 @@ bool Window::is_layout_rtl() const { String locale = TranslationServer::get_singleton()->get_tool_locale(); return TS->is_locale_right_to_left(locale); } - } else if (layout_dir == LAYOUT_DIRECTION_LOCALE) { + } else if (layout_dir == LAYOUT_DIRECTION_APPLICATION_LOCALE) { if (GLOBAL_GET(SNAME("internationalization/rendering/force_right_to_left_layout_direction"))) { return true; } else { String locale = TranslationServer::get_singleton()->get_tool_locale(); return TS->is_locale_right_to_left(locale); } + } else if (layout_dir == LAYOUT_DIRECTION_SYSTEM_LOCALE) { + if (GLOBAL_GET(SNAME("internationalization/rendering/force_right_to_left_layout_direction"))) { + return true; + } else { + String locale = OS::get_singleton()->get_locale(); + return TS->is_locale_right_to_left(locale); + } } else { return (layout_dir == LAYOUT_DIRECTION_RTL); } @@ -3001,6 +3008,7 @@ void Window::_bind_methods() { ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "popup_window"), "set_flag", "get_flag", FLAG_POPUP); ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "extend_to_title"), "set_flag", "get_flag", FLAG_EXTEND_TO_TITLE); ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "mouse_passthrough"), "set_flag", "get_flag", FLAG_MOUSE_PASSTHROUGH); + ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "sharp_corners"), "set_flag", "get_flag", FLAG_SHARP_CORNERS); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "force_native"), "set_force_native", "get_force_native"); ADD_GROUP("Limits", ""); @@ -3054,6 +3062,7 @@ void Window::_bind_methods() { BIND_ENUM_CONSTANT(FLAG_POPUP); BIND_ENUM_CONSTANT(FLAG_EXTEND_TO_TITLE); BIND_ENUM_CONSTANT(FLAG_MOUSE_PASSTHROUGH); + BIND_ENUM_CONSTANT(FLAG_SHARP_CORNERS); BIND_ENUM_CONSTANT(FLAG_MAX); BIND_ENUM_CONSTANT(CONTENT_SCALE_MODE_DISABLED); @@ -3070,9 +3079,14 @@ void Window::_bind_methods() { BIND_ENUM_CONSTANT(CONTENT_SCALE_STRETCH_INTEGER); BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_INHERITED); - BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_LOCALE); + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_APPLICATION_LOCALE); BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_LTR); BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_RTL); + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_SYSTEM_LOCALE); + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_MAX); +#ifndef DISABLE_DEPRECATED + BIND_ENUM_CONSTANT(LAYOUT_DIRECTION_LOCALE); +#endif // DISABLE_DEPRECATED BIND_ENUM_CONSTANT(WINDOW_INITIAL_POSITION_ABSOLUTE); BIND_ENUM_CONSTANT(WINDOW_INITIAL_POSITION_CENTER_PRIMARY_SCREEN); diff --git a/scene/main/window.h b/scene/main/window.h index 47aaf73728..6517350b78 100644 --- a/scene/main/window.h +++ b/scene/main/window.h @@ -62,6 +62,7 @@ public: FLAG_POPUP = DisplayServer::WINDOW_FLAG_POPUP, FLAG_EXTEND_TO_TITLE = DisplayServer::WINDOW_FLAG_EXTEND_TO_TITLE, FLAG_MOUSE_PASSTHROUGH = DisplayServer::WINDOW_FLAG_MOUSE_PASSTHROUGH, + FLAG_SHARP_CORNERS = DisplayServer::WINDOW_FLAG_SHARP_CORNERS, FLAG_MAX = DisplayServer::WINDOW_FLAG_MAX, }; @@ -86,9 +87,14 @@ public: enum LayoutDirection { LAYOUT_DIRECTION_INHERITED, - LAYOUT_DIRECTION_LOCALE, + LAYOUT_DIRECTION_APPLICATION_LOCALE, LAYOUT_DIRECTION_LTR, - LAYOUT_DIRECTION_RTL + LAYOUT_DIRECTION_RTL, + LAYOUT_DIRECTION_SYSTEM_LOCALE, + LAYOUT_DIRECTION_MAX, +#ifndef DISABLE_DEPRECATED + LAYOUT_DIRECTION_LOCALE = LAYOUT_DIRECTION_APPLICATION_LOCALE, +#endif // DISABLE_DEPRECATED }; enum { diff --git a/scene/property_utils.cpp b/scene/property_utils.cpp index 94a037bd9b..f068e34beb 100644 --- a/scene/property_utils.cpp +++ b/scene/property_utils.cpp @@ -89,6 +89,16 @@ Variant PropertyUtils::get_property_default_value(const Object *p_object, const *r_is_valid = false; } + // Handle special case "script" property, where the default value is either null or the custom type script. + // Do this only if there's no states stack cache to trace for default values. + if (!p_states_stack_cache && p_property == CoreStringName(script) && p_object->has_meta(SceneStringName(_custom_type_script))) { + Ref<Script> ct_scr = p_object->get_meta(SceneStringName(_custom_type_script)); + if (r_is_valid) { + *r_is_valid = true; + } + return ct_scr; + } + Ref<Script> topmost_script; if (const Node *node = Object::cast_to<Node>(p_object)) { diff --git a/scene/resources/2d/tile_set.cpp b/scene/resources/2d/tile_set.cpp index 229e18be23..e624bdb32f 100644 --- a/scene/resources/2d/tile_set.cpp +++ b/scene/resources/2d/tile_set.cpp @@ -6480,9 +6480,9 @@ int TileData::get_terrain_set() const { } void TileData::set_terrain(int p_terrain) { - ERR_FAIL_COND(terrain_set < 0); ERR_FAIL_COND(p_terrain < -1); - if (tile_set) { + ERR_FAIL_COND(terrain_set < 0 && p_terrain != -1); + if (tile_set && terrain_set >= 0) { ERR_FAIL_COND(p_terrain >= tile_set->get_terrains_count(terrain_set)); } terrain = p_terrain; @@ -6495,9 +6495,9 @@ int TileData::get_terrain() const { void TileData::set_terrain_peering_bit(TileSet::CellNeighbor p_peering_bit, int p_terrain_index) { ERR_FAIL_INDEX(p_peering_bit, TileSet::CellNeighbor::CELL_NEIGHBOR_MAX); - ERR_FAIL_COND(terrain_set < 0); ERR_FAIL_COND(p_terrain_index < -1); - if (tile_set) { + ERR_FAIL_COND(terrain_set < 0 && p_terrain_index != -1); + if (tile_set && terrain_set >= 0) { ERR_FAIL_COND(p_terrain_index >= tile_set->get_terrains_count(terrain_set)); ERR_FAIL_COND(!is_valid_terrain_peering_bit(p_peering_bit)); } diff --git a/scene/resources/external_texture.cpp b/scene/resources/external_texture.cpp index 0552bbd081..c8b714372a 100644 --- a/scene/resources/external_texture.cpp +++ b/scene/resources/external_texture.cpp @@ -39,12 +39,14 @@ void ExternalTexture::_bind_methods() { } uint64_t ExternalTexture::get_external_texture_id() const { + _ensure_created(); return RenderingServer::get_singleton()->texture_get_native_handle(texture); } void ExternalTexture::set_size(const Size2 &p_size) { if (p_size.width > 0 && p_size.height > 0 && p_size != size) { size = p_size; + _ensure_created(); RenderingServer::get_singleton()->texture_external_update(texture, size.width, size.height, external_buffer); emit_changed(); } @@ -57,6 +59,7 @@ Size2 ExternalTexture::get_size() const { void ExternalTexture::set_external_buffer_id(uint64_t p_external_buffer) { if (p_external_buffer != external_buffer) { external_buffer = p_external_buffer; + _ensure_created(); RenderingServer::get_singleton()->texture_external_update(texture, size.width, size.height, external_buffer); } } @@ -74,11 +77,29 @@ bool ExternalTexture::has_alpha() const { } RID ExternalTexture::get_rid() const { + if (!texture.is_valid()) { + texture = RenderingServer::get_singleton()->texture_2d_placeholder_create(); + using_placeholder = true; + } return texture; } +void ExternalTexture::_ensure_created() const { + if (texture.is_valid() && !using_placeholder) { + return; + } + + RID new_texture = RenderingServer::get_singleton()->texture_external_create(size.width, size.height); + if (using_placeholder) { + DEV_ASSERT(texture.is_valid()); + RenderingServer::get_singleton()->texture_replace(texture, new_texture); + using_placeholder = false; + } else { + texture = new_texture; + } +} + ExternalTexture::ExternalTexture() { - texture = RenderingServer::get_singleton()->texture_external_create(size.width, size.height); } ExternalTexture::~ExternalTexture() { diff --git a/scene/resources/external_texture.h b/scene/resources/external_texture.h index 96bcd8d0fe..cd60bcc030 100644 --- a/scene/resources/external_texture.h +++ b/scene/resources/external_texture.h @@ -38,10 +38,13 @@ class ExternalTexture : public Texture2D { GDCLASS(ExternalTexture, Texture2D); private: - RID texture; + mutable RID texture; + mutable bool using_placeholder = false; Size2 size = Size2(256, 256); uint64_t external_buffer = 0; + void _ensure_created() const; + protected: static void _bind_methods(); diff --git a/scene/resources/immediate_mesh.cpp b/scene/resources/immediate_mesh.cpp index 907c0ab4ca..072542f0ad 100644 --- a/scene/resources/immediate_mesh.cpp +++ b/scene/resources/immediate_mesh.cpp @@ -312,6 +312,8 @@ void ImmediateMesh::surface_end() { uses_uv2s = false; surface_active = false; + + emit_changed(); } void ImmediateMesh::clear_surfaces() { diff --git a/scene/resources/resource_format_text.cpp b/scene/resources/resource_format_text.cpp index e234a81c88..4a318a10f0 100644 --- a/scene/resources/resource_format_text.cpp +++ b/scene/resources/resource_format_text.cpp @@ -1431,8 +1431,8 @@ void ResourceFormatLoaderText::get_recognized_extensions_for_type(const String & p_extensions->push_back("tscn"); } - // Don't allow .tres for PackedScenes. - if (p_type != "PackedScene") { + // Don't allow .tres for PackedScenes or GDExtension. + if (p_type != "PackedScene" && p_type != "GDExtension") { p_extensions->push_back("tres"); } } diff --git a/scene/resources/shader.cpp b/scene/resources/shader.cpp index 24d17108d5..d163a42fa9 100644 --- a/scene/resources/shader.cpp +++ b/scene/resources/shader.cpp @@ -32,6 +32,7 @@ #include "shader.compat.inc" #include "core/io/file_access.h" +#include "scene/main/scene_tree.h" #include "servers/rendering/shader_language.h" #include "servers/rendering/shader_preprocessor.h" #include "servers/rendering_server.h" @@ -138,6 +139,14 @@ String Shader::get_code() const { return code; } +void Shader::inspect_native_shader_code() { + SceneTree *st = SceneTree::get_singleton(); + RID _shader = get_rid(); + if (st && _shader.is_valid()) { + st->call_group_flags(SceneTree::GROUP_CALL_DEFERRED, "_native_shader_source_visualizer", "_inspect_shader", _shader); + } +} + void Shader::get_shader_uniform_list(List<PropertyInfo> *p_params, bool p_get_groups) const { _update_shader(); _check_shader_rid(); @@ -267,6 +276,9 @@ void Shader::_bind_methods() { ClassDB::bind_method(D_METHOD("get_shader_uniform_list", "get_groups"), &Shader::_get_shader_uniform_list, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("inspect_native_shader_code"), &Shader::inspect_native_shader_code); + ClassDB::set_method_flags(get_class_static(), _scs_create("inspect_native_shader_code"), METHOD_FLAGS_DEFAULT | METHOD_FLAG_EDITOR); + ADD_PROPERTY(PropertyInfo(Variant::STRING, "code", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_code", "get_code"); BIND_ENUM_CONSTANT(MODE_SPATIAL); diff --git a/scene/resources/shader.h b/scene/resources/shader.h index 18197419f3..7234d37579 100644 --- a/scene/resources/shader.h +++ b/scene/resources/shader.h @@ -88,6 +88,8 @@ public: void set_code(const String &p_code); String get_code() const; + void inspect_native_shader_code(); + void get_shader_uniform_list(List<PropertyInfo> *p_params, bool p_get_groups = false) const; void set_default_texture_parameter(const StringName &p_name, const Ref<Texture> &p_texture, int p_index = 0); diff --git a/scene/scene_string_names.cpp b/scene/scene_string_names.cpp index 140e588291..31daeb3ae3 100644 --- a/scene/scene_string_names.cpp +++ b/scene/scene_string_names.cpp @@ -130,6 +130,8 @@ SceneStringNames::SceneStringNames() { shader_overrides_group = StaticCString::create("_shader_overrides_group_"); shader_overrides_group_active = StaticCString::create("_shader_overrides_group_active_"); + _custom_type_script = StaticCString::create("_custom_type_script"); + pressed = StaticCString::create("pressed"); id_pressed = StaticCString::create("id_pressed"); toggled = StaticCString::create("toggled"); diff --git a/scene/scene_string_names.h b/scene/scene_string_names.h index fc22be33b2..0a2ebeda7a 100644 --- a/scene/scene_string_names.h +++ b/scene/scene_string_names.h @@ -143,6 +143,8 @@ public: StringName shader_overrides_group; StringName shader_overrides_group_active; + StringName _custom_type_script; + StringName pressed; StringName id_pressed; StringName toggled; diff --git a/scene/theme/default_theme.cpp b/scene/theme/default_theme.cpp index caf44ac392..f5065e8de1 100644 --- a/scene/theme/default_theme.cpp +++ b/scene/theme/default_theme.cpp @@ -30,6 +30,7 @@ #include "default_theme.h" +#include "core/io/image.h" #include "core/os/os.h" #include "default_font.gen.h" #include "default_theme_icons.gen.h" @@ -832,10 +833,13 @@ void fill_default_theme(Ref<Theme> &theme, const Ref<Font> &default_font, const theme->set_stylebox(SceneStringName(panel), "Tree", make_flat_stylebox(style_normal_color, 4, 4, 4, 5)); theme->set_stylebox("focus", "Tree", focus); + theme->set_stylebox("hovered", "Tree", make_flat_stylebox(Color(1, 1, 1, 0.07))); + theme->set_stylebox("hovered_dimmed", "Tree", make_flat_stylebox(Color(1, 1, 1, 0.03))); theme->set_stylebox("selected", "Tree", make_flat_stylebox(style_selected_color)); theme->set_stylebox("selected_focus", "Tree", make_flat_stylebox(style_selected_color)); theme->set_stylebox("cursor", "Tree", focus); theme->set_stylebox("cursor_unfocused", "Tree", focus); + theme->set_stylebox("button_hover", "Tree", make_flat_stylebox(Color(1, 1, 1, 0.07))); theme->set_stylebox("button_pressed", "Tree", button_pressed); theme->set_stylebox("title_button_normal", "Tree", make_flat_stylebox(style_pressed_color, 4, 4, 4, 4)); theme->set_stylebox("title_button_pressed", "Tree", make_flat_stylebox(style_hover_color, 4, 4, 4, 4)); @@ -863,6 +867,8 @@ void fill_default_theme(Ref<Theme> &theme, const Ref<Font> &default_font, const theme->set_color("title_button_color", "Tree", control_font_color); theme->set_color(SceneStringName(font_color), "Tree", control_font_low_color); + theme->set_color("font_hovered_color", "Tree", control_font_hover_color); + theme->set_color("font_hovered_dimmed_color", "Tree", control_font_color); theme->set_color("font_selected_color", "Tree", control_font_pressed_color); theme->set_color("font_disabled_color", "Tree", control_font_disabled_color); theme->set_color("font_outline_color", "Tree", Color(0, 0, 0)); diff --git a/servers/audio/audio_stream.cpp b/servers/audio/audio_stream.cpp index 1886ebe1ac..d400b5790f 100644 --- a/servers/audio/audio_stream.cpp +++ b/servers/audio/audio_stream.cpp @@ -76,6 +76,42 @@ int AudioStreamPlayback::mix(AudioFrame *p_buffer, float p_rate_scale, int p_fra return ret; } +PackedVector2Array AudioStreamPlayback::_mix_audio_bind(float p_rate_scale, int p_frames) { + Vector<AudioFrame> frames = mix_audio(p_rate_scale, p_frames); + + PackedVector2Array res; + res.resize(frames.size()); + + Vector2 *res_ptrw = res.ptrw(); + for (int i = 0; i < frames.size(); i++) { + res_ptrw[i] = Vector2(frames[i].left, frames[i].right); + } + + return res; +} + +Vector<AudioFrame> AudioStreamPlayback::mix_audio(float p_rate_scale, int p_frames) { + Vector<AudioFrame> res; + res.resize(p_frames); + + int frames = mix(res.ptrw(), p_rate_scale, p_frames); + res.resize(frames); + + return res; +} + +void AudioStreamPlayback::start_playback(double p_from_pos) { + start(p_from_pos); +} + +void AudioStreamPlayback::stop_playback() { + stop(); +} + +void AudioStreamPlayback::seek_playback(double p_time) { + seek(p_time); +} + void AudioStreamPlayback::tag_used_streams() { GDVIRTUAL_CALL(_tag_used_streams); } @@ -108,6 +144,13 @@ void AudioStreamPlayback::_bind_methods() { ClassDB::bind_method(D_METHOD("set_sample_playback", "playback_sample"), &AudioStreamPlayback::set_sample_playback); ClassDB::bind_method(D_METHOD("get_sample_playback"), &AudioStreamPlayback::get_sample_playback); + ClassDB::bind_method(D_METHOD("mix_audio", "rate_scale", "frames"), &AudioStreamPlayback::_mix_audio_bind); + ClassDB::bind_method(D_METHOD("start", "from_pos"), &AudioStreamPlayback::start_playback, DEFVAL(0.0)); + ClassDB::bind_method(D_METHOD("seek", "time"), &AudioStreamPlayback::seek_playback, DEFVAL(0.0)); + ClassDB::bind_method(D_METHOD("stop"), &AudioStreamPlayback::stop_playback); + ClassDB::bind_method(D_METHOD("get_loop_count"), &AudioStreamPlayback::get_loop_count); + ClassDB::bind_method(D_METHOD("get_playback_position"), &AudioStreamPlayback::get_playback_position); + ClassDB::bind_method(D_METHOD("is_playing"), &AudioStreamPlayback::is_playing); } AudioStreamPlayback::AudioStreamPlayback() {} diff --git a/servers/audio/audio_stream.h b/servers/audio/audio_stream.h index 3feaa53630..65efccdc28 100644 --- a/servers/audio/audio_stream.h +++ b/servers/audio/audio_stream.h @@ -83,6 +83,7 @@ class AudioStreamPlayback : public RefCounted { protected: static void _bind_methods(); + PackedVector2Array _mix_audio_bind(float p_rate_scale, int p_frames); GDVIRTUAL1(_start, double) GDVIRTUAL0(_stop) GDVIRTUAL0RC(bool, _is_playing) @@ -118,6 +119,11 @@ public: AudioStreamPlayback(); ~AudioStreamPlayback(); + + Vector<AudioFrame> mix_audio(float p_rate_scale, int p_frames); + void start_playback(double p_from_pos = 0.0); + void stop_playback(); + void seek_playback(double p_time); }; class AudioStreamPlaybackResampled : public AudioStreamPlayback { diff --git a/servers/audio_server.cpp b/servers/audio_server.cpp index 70ef88e36d..17b573ab7b 100644 --- a/servers/audio_server.cpp +++ b/servers/audio_server.cpp @@ -1440,6 +1440,10 @@ uint64_t AudioServer::get_mixed_frames() const { return mix_frames; } +String AudioServer::get_driver_name() const { + return AudioDriver::get_singleton()->get_name(); +} + void AudioServer::notify_listener_changed() { for (CallbackItem *ci : listener_changed_callback_list) { ci->callback(ci->userdata); @@ -1947,6 +1951,8 @@ void AudioServer::_bind_methods() { ClassDB::bind_method(D_METHOD("get_speaker_mode"), &AudioServer::get_speaker_mode); ClassDB::bind_method(D_METHOD("get_mix_rate"), &AudioServer::get_mix_rate); + ClassDB::bind_method(D_METHOD("get_driver_name"), &AudioServer::get_driver_name); + ClassDB::bind_method(D_METHOD("get_output_device_list"), &AudioServer::get_output_device_list); ClassDB::bind_method(D_METHOD("get_output_device"), &AudioServer::get_output_device); ClassDB::bind_method(D_METHOD("set_output_device", "name"), &AudioServer::set_output_device); diff --git a/servers/audio_server.h b/servers/audio_server.h index 16fcc029b3..d4e1aa9995 100644 --- a/servers/audio_server.h +++ b/servers/audio_server.h @@ -427,6 +427,8 @@ public: uint64_t get_mix_count() const; uint64_t get_mixed_frames() const; + String get_driver_name() const; + void notify_listener_changed(); virtual void init(); diff --git a/servers/display_server.cpp b/servers/display_server.cpp index ce0d6cb996..428fb77c74 100644 --- a/servers/display_server.cpp +++ b/servers/display_server.cpp @@ -1128,6 +1128,7 @@ void DisplayServer::_bind_methods() { BIND_ENUM_CONSTANT(WINDOW_FLAG_POPUP); BIND_ENUM_CONSTANT(WINDOW_FLAG_EXTEND_TO_TITLE); BIND_ENUM_CONSTANT(WINDOW_FLAG_MOUSE_PASSTHROUGH); + BIND_ENUM_CONSTANT(WINDOW_FLAG_SHARP_CORNERS); BIND_ENUM_CONSTANT(WINDOW_FLAG_MAX); BIND_ENUM_CONSTANT(WINDOW_EVENT_MOUSE_ENTER); @@ -1229,6 +1230,10 @@ void DisplayServer::_input_set_custom_mouse_cursor_func(const Ref<Resource> &p_i } bool DisplayServer::can_create_rendering_device() { + if (get_singleton()->get_name() == "headless") { + return false; + } + #if defined(RD_ENABLED) RenderingDevice *device = RenderingDevice::get_singleton(); if (device) { diff --git a/servers/display_server.h b/servers/display_server.h index 36798bd011..f25bf334a4 100644 --- a/servers/display_server.h +++ b/servers/display_server.h @@ -32,6 +32,7 @@ #define DISPLAY_SERVER_H #include "core/input/input.h" +#include "core/io/image.h" #include "core/io/resource.h" #include "core/os/os.h" #include "core/variant/callable.h" @@ -39,7 +40,6 @@ #include "display/native_menu.h" class Texture2D; -class Image; class DisplayServer : public Object { GDCLASS(DisplayServer, Object) @@ -381,6 +381,7 @@ public: WINDOW_FLAG_POPUP, WINDOW_FLAG_EXTEND_TO_TITLE, WINDOW_FLAG_MOUSE_PASSTHROUGH, + WINDOW_FLAG_SHARP_CORNERS, WINDOW_FLAG_MAX, }; @@ -394,6 +395,7 @@ public: WINDOW_FLAG_POPUP_BIT = (1 << WINDOW_FLAG_POPUP), WINDOW_FLAG_EXTEND_TO_TITLE_BIT = (1 << WINDOW_FLAG_EXTEND_TO_TITLE), WINDOW_FLAG_MOUSE_PASSTHROUGH_BIT = (1 << WINDOW_FLAG_MOUSE_PASSTHROUGH), + WINDOW_FLAG_SHARP_CORNERS_BIT = (1 << WINDOW_FLAG_SHARP_CORNERS), }; virtual WindowID create_sub_window(WindowMode p_mode, VSyncMode p_vsync_mode, uint32_t p_flags, const Rect2i &p_rect = Rect2i(), bool p_exclusive = false, WindowID p_transient_parent = INVALID_WINDOW_ID); diff --git a/servers/movie_writer/movie_writer.h b/servers/movie_writer/movie_writer.h index e1dc8ef8cf..69d6b1ba2b 100644 --- a/servers/movie_writer/movie_writer.h +++ b/servers/movie_writer/movie_writer.h @@ -31,6 +31,7 @@ #ifndef MOVIE_WRITER_H #define MOVIE_WRITER_H +#include "core/io/image.h" #include "core/templates/local_vector.h" #include "servers/audio/audio_driver_dummy.h" #include "servers/audio_server.h" diff --git a/servers/rendering/dummy/storage/light_storage.h b/servers/rendering/dummy/storage/light_storage.h index c3b63cdbf6..d25523753c 100644 --- a/servers/rendering/dummy/storage/light_storage.h +++ b/servers/rendering/dummy/storage/light_storage.h @@ -78,6 +78,8 @@ public: virtual void light_set_cull_mask(RID p_light, uint32_t p_mask) override {} virtual void light_set_distance_fade(RID p_light, bool p_enabled, float p_begin, float p_shadow, float p_length) override {} virtual void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) override {} + virtual void light_set_shadow_caster_mask(RID p_light, uint32_t p_caster_mask) override {} + virtual uint32_t light_get_shadow_caster_mask(RID p_light) const override { return 0xFFFFFFFF; } virtual void light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) override {} virtual void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) override {} diff --git a/servers/rendering/renderer_canvas_cull.cpp b/servers/rendering/renderer_canvas_cull.cpp index 0ec161d8cf..701b4da8f8 100644 --- a/servers/rendering/renderer_canvas_cull.cpp +++ b/servers/rendering/renderer_canvas_cull.cpp @@ -51,7 +51,7 @@ void RendererCanvasCull::_render_canvas_item_tree(RID p_to_render_target, Canvas memset(z_last_list, 0, z_range * sizeof(RendererCanvasRender::Item *)); for (int i = 0; i < p_child_item_count; i++) { - _cull_canvas_item(p_child_items[i].item, p_transform, p_clip_rect, Color(1, 1, 1, 1), 0, z_list, z_last_list, nullptr, nullptr, true, p_canvas_cull_mask, Point2(), 1, nullptr); + _cull_canvas_item(p_child_items[i].item, p_transform, p_clip_rect, Color(1, 1, 1, 1), 0, z_list, z_last_list, nullptr, nullptr, false, p_canvas_cull_mask, Point2(), 1, nullptr); } RendererCanvasRender::Item *list = nullptr; @@ -79,45 +79,71 @@ void RendererCanvasCull::_render_canvas_item_tree(RID p_to_render_target, Canvas } } -void _collect_ysort_children(RendererCanvasCull::Item *p_canvas_item, const Transform2D &p_transform, RendererCanvasCull::Item *p_material_owner, const Color &p_modulate, RendererCanvasCull::Item **r_items, int &r_index, int p_z) { +void RendererCanvasCull::_collect_ysort_children(RendererCanvasCull::Item *p_canvas_item, RendererCanvasCull::Item *p_material_owner, const Color &p_modulate, RendererCanvasCull::Item **r_items, int &r_index, int p_z) { int child_item_count = p_canvas_item->child_items.size(); RendererCanvasCull::Item **child_items = p_canvas_item->child_items.ptrw(); for (int i = 0; i < child_item_count; i++) { - int abs_z = 0; if (child_items[i]->visible) { - if (r_items) { - r_items[r_index] = child_items[i]; - child_items[i]->ysort_xform = p_transform; - child_items[i]->ysort_pos = p_transform.xform(child_items[i]->xform_curr.columns[2]); - child_items[i]->material_owner = child_items[i]->use_parent_material ? p_material_owner : nullptr; - child_items[i]->ysort_modulate = p_modulate; - child_items[i]->ysort_index = r_index; - child_items[i]->ysort_parent_abs_z_index = p_z; - - if (!child_items[i]->repeat_source) { - child_items[i]->repeat_size = p_canvas_item->repeat_size; - child_items[i]->repeat_times = p_canvas_item->repeat_times; - child_items[i]->repeat_source_item = p_canvas_item->repeat_source_item; - } + // To y-sort according to the item's final position, physics interpolation + // and transform snapping need to be applied before y-sorting. + Transform2D child_xform; + if (!_interpolation_data.interpolation_enabled || !child_items[i]->interpolated) { + child_xform = child_items[i]->xform_curr; + } else { + real_t f = Engine::get_singleton()->get_physics_interpolation_fraction(); + TransformInterpolator::interpolate_transform_2d(child_items[i]->xform_prev, child_items[i]->xform_curr, child_xform, f); + } - // Y sorted canvas items are flattened into r_items. Calculate their absolute z index to use when rendering r_items. - if (child_items[i]->z_relative) { - abs_z = CLAMP(p_z + child_items[i]->z_index, RS::CANVAS_ITEM_Z_MIN, RS::CANVAS_ITEM_Z_MAX); - } else { - abs_z = child_items[i]->z_index; - } + if (snapping_2d_transforms_to_pixel) { + child_xform.columns[2] = (child_xform.columns[2] + Point2(0.5, 0.5)).floor(); + } + + r_items[r_index] = child_items[i]; + child_items[i]->ysort_xform = p_canvas_item->ysort_xform * child_xform; + child_items[i]->material_owner = child_items[i]->use_parent_material ? p_material_owner : nullptr; + child_items[i]->ysort_modulate = p_modulate; + child_items[i]->ysort_index = r_index; + child_items[i]->ysort_parent_abs_z_index = p_z; + + if (!child_items[i]->repeat_source) { + child_items[i]->repeat_size = p_canvas_item->repeat_size; + child_items[i]->repeat_times = p_canvas_item->repeat_times; + child_items[i]->repeat_source_item = p_canvas_item->repeat_source_item; + } + + // Y sorted canvas items are flattened into r_items. Calculate their absolute z index to use when rendering r_items. + int abs_z = 0; + if (child_items[i]->z_relative) { + abs_z = CLAMP(p_z + child_items[i]->z_index, RS::CANVAS_ITEM_Z_MIN, RS::CANVAS_ITEM_Z_MAX); + } else { + abs_z = child_items[i]->z_index; } r_index++; if (child_items[i]->sort_y) { - _collect_ysort_children(child_items[i], p_transform * child_items[i]->xform_curr, child_items[i]->use_parent_material ? p_material_owner : child_items[i], p_modulate * child_items[i]->modulate, r_items, r_index, abs_z); + _collect_ysort_children(child_items[i], child_items[i]->use_parent_material ? p_material_owner : child_items[i], p_modulate * child_items[i]->modulate, r_items, r_index, abs_z); } } } } -void _mark_ysort_dirty(RendererCanvasCull::Item *ysort_owner, RID_Owner<RendererCanvasCull::Item, true> &canvas_item_owner) { +int RendererCanvasCull::_count_ysort_children(RendererCanvasCull::Item *p_canvas_item) { + int ysort_children_count = 0; + int child_item_count = p_canvas_item->child_items.size(); + RendererCanvasCull::Item *const *child_items = p_canvas_item->child_items.ptr(); + for (int i = 0; i < child_item_count; i++) { + if (child_items[i]->visible) { + ysort_children_count++; + if (child_items[i]->sort_y) { + ysort_children_count += _count_ysort_children(child_items[i]); + } + } + } + return ysort_children_count; +} + +void RendererCanvasCull::_mark_ysort_dirty(RendererCanvasCull::Item *ysort_owner) { do { ysort_owner->ysort_children_count = -1; ysort_owner = canvas_item_owner.owns(ysort_owner->parent) ? canvas_item_owner.get_or_null(ysort_owner->parent) : nullptr; @@ -236,7 +262,7 @@ void RendererCanvasCull::_attach_canvas_item_for_draw(RendererCanvasCull::Item * } } -void RendererCanvasCull::_cull_canvas_item(Item *p_canvas_item, const Transform2D &p_parent_xform, const Rect2 &p_clip_rect, const Color &p_modulate, int p_z, RendererCanvasRender::Item **r_z_list, RendererCanvasRender::Item **r_z_last_list, Item *p_canvas_clip, Item *p_material_owner, bool p_allow_y_sort, uint32_t p_canvas_cull_mask, const Point2 &p_repeat_size, int p_repeat_times, RendererCanvasRender::Item *p_repeat_source_item) { +void RendererCanvasCull::_cull_canvas_item(Item *p_canvas_item, const Transform2D &p_parent_xform, const Rect2 &p_clip_rect, const Color &p_modulate, int p_z, RendererCanvasRender::Item **r_z_list, RendererCanvasRender::Item **r_z_last_list, Item *p_canvas_clip, Item *p_material_owner, bool p_is_already_y_sorted, uint32_t p_canvas_cull_mask, const Point2 &p_repeat_size, int p_repeat_times, RendererCanvasRender::Item *p_repeat_source_item) { Item *ci = p_canvas_item; if (!ci->visible) { @@ -260,15 +286,29 @@ void RendererCanvasCull::_cull_canvas_item(Item *p_canvas_item, const Transform2 } } + Transform2D self_xform; Transform2D final_xform; - if (!_interpolation_data.interpolation_enabled || !ci->interpolated) { - final_xform = ci->xform_curr; + if (p_is_already_y_sorted) { + // Y-sorted item's final transform is calculated before y-sorting, + // and is passed as `p_parent_xform` afterwards. No need to recalculate. + final_xform = p_parent_xform; } else { - real_t f = Engine::get_singleton()->get_physics_interpolation_fraction(); - TransformInterpolator::interpolate_transform_2d(ci->xform_prev, ci->xform_curr, final_xform, f); - } + if (!_interpolation_data.interpolation_enabled || !ci->interpolated) { + self_xform = ci->xform_curr; + } else { + real_t f = Engine::get_singleton()->get_physics_interpolation_fraction(); + TransformInterpolator::interpolate_transform_2d(ci->xform_prev, ci->xform_curr, self_xform, f); + } - Transform2D parent_xform = p_parent_xform; + Transform2D parent_xform = p_parent_xform; + + if (snapping_2d_transforms_to_pixel) { + self_xform.columns[2] = (self_xform.columns[2] + Point2(0.5, 0.5)).floor(); + parent_xform.columns[2] = (parent_xform.columns[2] + Point2(0.5, 0.5)).floor(); + } + + final_xform = parent_xform * self_xform; + } Point2 repeat_size = p_repeat_size; int repeat_times = p_repeat_times; @@ -284,13 +324,6 @@ void RendererCanvasCull::_cull_canvas_item(Item *p_canvas_item, const Transform2 ci->repeat_source_item = repeat_source_item; } - if (snapping_2d_transforms_to_pixel) { - final_xform.columns[2] = (final_xform.columns[2] + Point2(0.5, 0.5)).floor(); - parent_xform.columns[2] = (parent_xform.columns[2] + Point2(0.5, 0.5)).floor(); - } - - final_xform = parent_xform * final_xform; - Rect2 global_rect = final_xform.xform(rect); if (repeat_source_item && (repeat_size.x || repeat_size.y)) { // Top-left repeated rect. @@ -355,29 +388,27 @@ void RendererCanvasCull::_cull_canvas_item(Item *p_canvas_item, const Transform2 } if (ci->sort_y) { - if (p_allow_y_sort) { + if (!p_is_already_y_sorted) { if (ci->ysort_children_count == -1) { - ci->ysort_children_count = 0; - _collect_ysort_children(ci, Transform2D(), p_material_owner, Color(1, 1, 1, 1), nullptr, ci->ysort_children_count, p_z); + ci->ysort_children_count = _count_ysort_children(ci); } child_item_count = ci->ysort_children_count + 1; child_items = (Item **)alloca(child_item_count * sizeof(Item *)); - ci->ysort_xform = ci->xform_curr.affine_inverse(); - ci->ysort_pos = Vector2(); + ci->ysort_xform = Transform2D(); ci->ysort_modulate = Color(1, 1, 1, 1); ci->ysort_index = 0; ci->ysort_parent_abs_z_index = parent_z; child_items[0] = ci; int i = 1; - _collect_ysort_children(ci, Transform2D(), p_material_owner, Color(1, 1, 1, 1), child_items, i, p_z); + _collect_ysort_children(ci, p_material_owner, Color(1, 1, 1, 1), child_items, i, p_z); - SortArray<Item *, ItemPtrSort> sorter; + SortArray<Item *, ItemYSort> sorter; sorter.sort(child_items, child_item_count); for (i = 0; i < child_item_count; i++) { - _cull_canvas_item(child_items[i], final_xform * child_items[i]->ysort_xform, p_clip_rect, modulate * child_items[i]->ysort_modulate, child_items[i]->ysort_parent_abs_z_index, r_z_list, r_z_last_list, (Item *)ci->final_clip_owner, (Item *)child_items[i]->material_owner, false, p_canvas_cull_mask, child_items[i]->repeat_size, child_items[i]->repeat_times, child_items[i]->repeat_source_item); + _cull_canvas_item(child_items[i], final_xform * child_items[i]->ysort_xform, p_clip_rect, modulate * child_items[i]->ysort_modulate, child_items[i]->ysort_parent_abs_z_index, r_z_list, r_z_last_list, (Item *)ci->final_clip_owner, (Item *)child_items[i]->material_owner, true, p_canvas_cull_mask, child_items[i]->repeat_size, child_items[i]->repeat_times, child_items[i]->repeat_source_item); } } else { RendererCanvasRender::Item *canvas_group_from = nullptr; @@ -401,14 +432,14 @@ void RendererCanvasCull::_cull_canvas_item(Item *p_canvas_item, const Transform2 if (!child_items[i]->behind && !use_canvas_group) { continue; } - _cull_canvas_item(child_items[i], final_xform, p_clip_rect, modulate, p_z, r_z_list, r_z_last_list, (Item *)ci->final_clip_owner, p_material_owner, true, p_canvas_cull_mask, repeat_size, repeat_times, repeat_source_item); + _cull_canvas_item(child_items[i], final_xform, p_clip_rect, modulate, p_z, r_z_list, r_z_last_list, (Item *)ci->final_clip_owner, p_material_owner, false, p_canvas_cull_mask, repeat_size, repeat_times, repeat_source_item); } _attach_canvas_item_for_draw(ci, p_canvas_clip, r_z_list, r_z_last_list, final_xform, p_clip_rect, global_rect, modulate, p_z, p_material_owner, use_canvas_group, canvas_group_from); for (int i = 0; i < child_item_count; i++) { if (child_items[i]->behind || use_canvas_group) { continue; } - _cull_canvas_item(child_items[i], final_xform, p_clip_rect, modulate, p_z, r_z_list, r_z_last_list, (Item *)ci->final_clip_owner, p_material_owner, true, p_canvas_cull_mask, repeat_size, repeat_times, repeat_source_item); + _cull_canvas_item(child_items[i], final_xform, p_clip_rect, modulate, p_z, r_z_list, r_z_last_list, (Item *)ci->final_clip_owner, p_material_owner, false, p_canvas_cull_mask, repeat_size, repeat_times, repeat_source_item); } } } @@ -509,7 +540,7 @@ void RendererCanvasCull::canvas_item_set_parent(RID p_item, RID p_parent) { item_owner->child_items.erase(canvas_item); if (item_owner->sort_y) { - _mark_ysort_dirty(item_owner, canvas_item_owner); + _mark_ysort_dirty(item_owner); } } @@ -529,7 +560,7 @@ void RendererCanvasCull::canvas_item_set_parent(RID p_item, RID p_parent) { item_owner->children_order_dirty = true; if (item_owner->sort_y) { - _mark_ysort_dirty(item_owner, canvas_item_owner); + _mark_ysort_dirty(item_owner); } } else { @@ -546,7 +577,7 @@ void RendererCanvasCull::canvas_item_set_visible(RID p_item, bool p_visible) { canvas_item->visible = p_visible; - _mark_ysort_dirty(canvas_item, canvas_item_owner); + _mark_ysort_dirty(canvas_item); } void RendererCanvasCull::canvas_item_set_light_mask(RID p_item, int p_mask) { @@ -1742,7 +1773,7 @@ void RendererCanvasCull::canvas_item_set_sort_children_by_y(RID p_item, bool p_e canvas_item->sort_y = p_enable; - _mark_ysort_dirty(canvas_item, canvas_item_owner); + _mark_ysort_dirty(canvas_item); } void RendererCanvasCull::canvas_item_set_z_index(RID p_item, int p_z) { @@ -2423,7 +2454,7 @@ bool RendererCanvasCull::free(RID p_rid) { item_owner->child_items.erase(canvas_item); if (item_owner->sort_y) { - _mark_ysort_dirty(item_owner, canvas_item_owner); + _mark_ysort_dirty(item_owner); } } } diff --git a/servers/rendering/renderer_canvas_cull.h b/servers/rendering/renderer_canvas_cull.h index 91c03054f7..9a088d94ed 100644 --- a/servers/rendering/renderer_canvas_cull.h +++ b/servers/rendering/renderer_canvas_cull.h @@ -50,8 +50,7 @@ public: bool children_order_dirty; int ysort_children_count; Color ysort_modulate; - Transform2D ysort_xform; - Vector2 ysort_pos; + Transform2D ysort_xform; // Relative to y-sorted subtree's root item (identity for such root). Its `origin.y` is used for sorting. int ysort_index; int ysort_parent_abs_z_index; // Absolute Z index of parent. Only populated and used when y-sorting. uint32_t visibility_layer = 0xffffffff; @@ -84,7 +83,6 @@ public: index = 0; ysort_children_count = -1; ysort_xform = Transform2D(); - ysort_pos = Vector2(); ysort_index = 0; ysort_parent_abs_z_index = 0; } @@ -96,13 +94,15 @@ public: } }; - struct ItemPtrSort { + struct ItemYSort { _FORCE_INLINE_ bool operator()(const Item *p_left, const Item *p_right) const { - if (Math::is_equal_approx(p_left->ysort_pos.y, p_right->ysort_pos.y)) { + const real_t left_y = p_left->ysort_xform.columns[2].y; + const real_t right_y = p_right->ysort_xform.columns[2].y; + if (Math::is_equal_approx(left_y, right_y)) { return p_left->ysort_index < p_right->ysort_index; } - return p_left->ysort_pos.y < p_right->ysort_pos.y; + return left_y < right_y; } }; @@ -187,7 +187,11 @@ public: private: void _render_canvas_item_tree(RID p_to_render_target, Canvas::ChildItem *p_child_items, int p_child_item_count, const Transform2D &p_transform, const Rect2 &p_clip_rect, const Color &p_modulate, RendererCanvasRender::Light *p_lights, RendererCanvasRender::Light *p_directional_lights, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, uint32_t p_canvas_cull_mask, RenderingMethod::RenderInfo *r_render_info = nullptr); - void _cull_canvas_item(Item *p_canvas_item, const Transform2D &p_parent_xform, const Rect2 &p_clip_rect, const Color &p_modulate, int p_z, RendererCanvasRender::Item **r_z_list, RendererCanvasRender::Item **r_z_last_list, Item *p_canvas_clip, Item *p_material_owner, bool p_allow_y_sort, uint32_t p_canvas_cull_mask, const Point2 &p_repeat_size, int p_repeat_times, RendererCanvasRender::Item *p_repeat_source_item); + void _cull_canvas_item(Item *p_canvas_item, const Transform2D &p_parent_xform, const Rect2 &p_clip_rect, const Color &p_modulate, int p_z, RendererCanvasRender::Item **r_z_list, RendererCanvasRender::Item **r_z_last_list, Item *p_canvas_clip, Item *p_material_owner, bool p_is_already_y_sorted, uint32_t p_canvas_cull_mask, const Point2 &p_repeat_size, int p_repeat_times, RendererCanvasRender::Item *p_repeat_source_item); + + void _collect_ysort_children(RendererCanvasCull::Item *p_canvas_item, RendererCanvasCull::Item *p_material_owner, const Color &p_modulate, RendererCanvasCull::Item **r_items, int &r_index, int p_z); + int _count_ysort_children(RendererCanvasCull::Item *p_canvas_item); + void _mark_ysort_dirty(RendererCanvasCull::Item *ysort_owner); static constexpr int z_range = RS::CANVAS_ITEM_Z_MAX - RS::CANVAS_ITEM_Z_MIN + 1; diff --git a/servers/rendering/renderer_rd/environment/fog.cpp b/servers/rendering/renderer_rd/environment/fog.cpp index 2dfcd67411..903d73ff2b 100644 --- a/servers/rendering/renderer_rd/environment/fog.cpp +++ b/servers/rendering/renderer_rd/environment/fog.cpp @@ -622,6 +622,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); bool any_uses_time = false; + Vector3 cam_position = p_cam_transform.get_origin(); for (int i = 0; i < (int)p_fog_volumes.size(); i++) { FogVolumeInstance *fog_volume_instance = fog_volume_instance_owner.get_or_null(p_fog_volumes[i]); @@ -652,41 +653,68 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P any_uses_time |= shader_data->uses_time; - Vector3i min; - Vector3i max; + Vector3i froxel_min; + Vector3i froxel_max; Vector3i kernel_size; - Vector3 position = fog_volume_instance->transform.get_origin(); + Vector3 fog_position = fog_volume_instance->transform.get_origin(); RS::FogVolumeShape volume_type = RendererRD::Fog::get_singleton()->fog_volume_get_shape(fog_volume); Vector3 extents = RendererRD::Fog::get_singleton()->fog_volume_get_size(fog_volume) / 2; if (volume_type != RS::FOG_VOLUME_SHAPE_WORLD) { // Local fog volume. - Vector3i points[8]; Vector3 fog_size = Vector3(fog->width, fog->height, fog->depth); float volumetric_fog_detail_spread = RendererSceneRenderRD::get_singleton()->environment_get_volumetric_fog_detail_spread(p_settings.env); - points[0] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(extents.x, extents.y, extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - points[1] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(-extents.x, extents.y, extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - points[2] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(extents.x, -extents.y, extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - points[3] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(-extents.x, -extents.y, extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - points[4] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(extents.x, extents.y, -extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - points[5] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(-extents.x, extents.y, -extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - points[6] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(extents.x, -extents.y, -extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - points[7] = _point_get_position_in_froxel_volume(fog_volume_instance->transform.xform(Vector3(-extents.x, -extents.y, -extents.z)), fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); - - min = Vector3i(int32_t(fog->width) - 1, int32_t(fog->height) - 1, int32_t(fog->depth) - 1); - max = Vector3i(1, 1, 1); - + Vector3 corners[8]{ + fog_volume_instance->transform.xform(Vector3(extents.x, extents.y, extents.z)), + fog_volume_instance->transform.xform(Vector3(-extents.x, extents.y, extents.z)), + fog_volume_instance->transform.xform(Vector3(extents.x, -extents.y, extents.z)), + fog_volume_instance->transform.xform(Vector3(-extents.x, -extents.y, extents.z)), + fog_volume_instance->transform.xform(Vector3(extents.x, extents.y, -extents.z)), + fog_volume_instance->transform.xform(Vector3(-extents.x, extents.y, -extents.z)), + fog_volume_instance->transform.xform(Vector3(extents.x, -extents.y, -extents.z)), + fog_volume_instance->transform.xform(Vector3(-extents.x, -extents.y, -extents.z)) + }; + Vector3i froxels[8]; + Vector3 corner_min = corners[0]; + Vector3 corner_max = corners[0]; for (int j = 0; j < 8; j++) { - min = min.min(points[j]); - max = max.max(points[j]); + froxels[j] = _point_get_position_in_froxel_volume(corners[j], fog_end, fog_near_size, fog_far_size, volumetric_fog_detail_spread, fog_size, p_cam_transform); + corner_min = corner_min.min(corners[j]); + corner_max = corner_max.max(corners[j]); + } + + froxel_min = Vector3i(int32_t(fog->width) - 1, int32_t(fog->height) - 1, int32_t(fog->depth) - 1); + froxel_max = Vector3i(1, 1, 1); + + // Tracking just the corners of the fog volume can result in missing some fog: + // when the camera's near plane is inside the fog, we must always consider the entire screen + Vector3 near_plane_corner(frustum_near_size.x, frustum_near_size.y, z_near); + float expand = near_plane_corner.length(); + if (cam_position.x > (corner_min.x - expand) && cam_position.x < (corner_max.x + expand) && + cam_position.y > (corner_min.y - expand) && cam_position.y < (corner_max.y + expand) && + cam_position.z > (corner_min.z - expand) && cam_position.z < (corner_max.z + expand)) { + froxel_min.x = 0; + froxel_min.y = 0; + froxel_min.z = 0; + froxel_max.x = int32_t(fog->width); + froxel_max.y = int32_t(fog->height); + for (int j = 0; j < 8; j++) { + froxel_max.z = MAX(froxel_max.z, froxels[j].z); + } + } else { + // Camera is guaranteed to be outside the fog volume + for (int j = 0; j < 8; j++) { + froxel_min = froxel_min.min(froxels[j]); + froxel_max = froxel_max.max(froxels[j]); + } } - kernel_size = max - min; + kernel_size = froxel_max - froxel_min; } else { // Volume type global runs on all cells extents = Vector3(fog->width, fog->height, fog->depth); - min = Vector3i(0, 0, 0); + froxel_min = Vector3i(0, 0, 0); kernel_size = Vector3i(int32_t(fog->width), int32_t(fog->height), int32_t(fog->depth)); } @@ -695,15 +723,15 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P } VolumetricFogShader::FogPushConstant push_constant; - push_constant.position[0] = position.x; - push_constant.position[1] = position.y; - push_constant.position[2] = position.z; + push_constant.position[0] = fog_position.x; + push_constant.position[1] = fog_position.y; + push_constant.position[2] = fog_position.z; push_constant.size[0] = extents.x * 2; push_constant.size[1] = extents.y * 2; push_constant.size[2] = extents.z * 2; - push_constant.corner[0] = min.x; - push_constant.corner[1] = min.y; - push_constant.corner[2] = min.z; + push_constant.corner[0] = froxel_min.x; + push_constant.corner[1] = froxel_min.y; + push_constant.corner[2] = froxel_min.z; push_constant.shape = uint32_t(RendererRD::Fog::get_singleton()->fog_volume_get_shape(fog_volume)); RendererRD::MaterialStorage::store_transform(fog_volume_instance->transform.affine_inverse(), push_constant.transform); diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index 53982af590..532b5b6484 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -610,6 +610,8 @@ void SceneShaderForwardClustered::init(const String p_defines) { actions.renames["PI"] = _MKSTR(Math_PI); actions.renames["TAU"] = _MKSTR(Math_TAU); actions.renames["E"] = _MKSTR(Math_E); + actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; + actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["VIEWPORT_SIZE"] = "read_viewport_size"; actions.renames["FRAGCOORD"] = "gl_FragCoord"; @@ -649,8 +651,6 @@ void SceneShaderForwardClustered::init(const String p_defines) { actions.renames["CUSTOM1"] = "custom1_attrib"; actions.renames["CUSTOM2"] = "custom2_attrib"; actions.renames["CUSTOM3"] = "custom3_attrib"; - actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; - actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["LIGHT_VERTEX"] = "light_vertex"; actions.renames["NODE_POSITION_WORLD"] = "read_model_matrix[3].xyz"; diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp index 69f084f4c0..6dcaadddd3 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp @@ -521,6 +521,8 @@ void SceneShaderForwardMobile::init(const String p_defines) { actions.renames["PI"] = _MKSTR(Math_PI); actions.renames["TAU"] = _MKSTR(Math_TAU); actions.renames["E"] = _MKSTR(Math_E); + actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; + actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["VIEWPORT_SIZE"] = "read_viewport_size"; actions.renames["FRAGCOORD"] = "gl_FragCoord"; @@ -560,8 +562,6 @@ void SceneShaderForwardMobile::init(const String p_defines) { actions.renames["CUSTOM1"] = "custom1_attrib"; actions.renames["CUSTOM2"] = "custom2_attrib"; actions.renames["CUSTOM3"] = "custom3_attrib"; - actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; - actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["LIGHT_VERTEX"] = "light_vertex"; actions.renames["NODE_POSITION_WORLD"] = "read_model_matrix[3].xyz"; diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index dcd3e90e1b..2547f08715 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -31,6 +31,7 @@ #ifndef RENDERER_COMPOSITOR_RD_H #define RENDERER_COMPOSITOR_RD_H +#include "core/io/image.h" #include "core/os/os.h" #include "servers/rendering/renderer_compositor.h" #include "servers/rendering/renderer_rd/environment/fog.h" diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 0c21fec282..dc2605b670 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -31,6 +31,7 @@ #include "renderer_scene_render_rd.h" #include "core/config/project_settings.h" +#include "core/io/image.h" #include "core/os/os.h" #include "renderer_compositor_rd.h" #include "servers/rendering/renderer_rd/environment/fog.h" diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl index a797891ab6..90bbdfe685 100644 --- a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl @@ -513,6 +513,7 @@ void main() { shadow_sample.z = 1.0 + abs(shadow_sample.z); vec3 pos = vec3(shadow_sample.xy / shadow_sample.z, shadow_len - omni_lights.data[light_index].shadow_bias); pos.z *= omni_lights.data[light_index].inv_radius; + pos.z = 1.0 - pos.z; pos.xy = pos.xy * 0.5 + 0.5; pos.xy = uv_rect.xy + pos.xy * uv_rect.zw; diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl index 9d47711599..2f0e4e0bea 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl @@ -105,10 +105,6 @@ layout(location = 6) mediump out vec3 binormal_interp; layout(location = 7) highp out vec4 diffuse_light_interp; layout(location = 8) highp out vec4 specular_light_interp; -layout(constant_id = 9) const bool sc_disable_omni_lights = false; -layout(constant_id = 10) const bool sc_disable_spot_lights = false; -layout(constant_id = 12) const bool sc_disable_directional_lights = false; - #include "../scene_forward_vertex_lights_inc.glsl" #endif // !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) #ifdef MATERIAL_UNIFORMS_USED @@ -1606,7 +1602,6 @@ void main() { #endif #undef BIAS_FUNC } -#endif if (i < 4) { shadow0 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << (i * 8); @@ -1614,6 +1609,7 @@ void main() { shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8); } } +#endif // SHADOWS_DISABLED #ifndef USE_VERTEX_LIGHTING for (uint i = 0; i < scene_data.directional_light_count; i++) { diff --git a/servers/rendering/renderer_rd/storage_rd/light_storage.cpp b/servers/rendering/renderer_rd/storage_rd/light_storage.cpp index 8f71909154..3639b5739b 100644 --- a/servers/rendering/renderer_rd/storage_rd/light_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/light_storage.cpp @@ -285,6 +285,23 @@ void LightStorage::light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) light->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_LIGHT); } +void LightStorage::light_set_shadow_caster_mask(RID p_light, uint32_t p_caster_mask) { + Light *light = light_owner.get_or_null(p_light); + ERR_FAIL_NULL(light); + + light->shadow_caster_mask = p_caster_mask; + + light->version++; + light->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_LIGHT); +} + +uint32_t LightStorage::light_get_shadow_caster_mask(RID p_light) const { + Light *light = light_owner.get_or_null(p_light); + ERR_FAIL_NULL_V(light, 0); + + return light->shadow_caster_mask; +} + void LightStorage::light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) { Light *light = light_owner.get_or_null(p_light); ERR_FAIL_NULL(light); diff --git a/servers/rendering/renderer_rd/storage_rd/light_storage.h b/servers/rendering/renderer_rd/storage_rd/light_storage.h index 59303e8a73..80c62a7bc4 100644 --- a/servers/rendering/renderer_rd/storage_rd/light_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/light_storage.h @@ -72,6 +72,7 @@ private: RS::LightBakeMode bake_mode = RS::LIGHT_BAKE_DYNAMIC; uint32_t max_sdfgi_cascade = 2; uint32_t cull_mask = 0xFFFFFFFF; + uint32_t shadow_caster_mask = 0xFFFFFFFF; bool distance_fade = false; real_t distance_fade_begin = 40.0; real_t distance_fade_shadow = 50.0; @@ -480,6 +481,8 @@ public: virtual void light_set_cull_mask(RID p_light, uint32_t p_mask) override; virtual void light_set_distance_fade(RID p_light, bool p_enabled, float p_begin, float p_shadow, float p_length) override; virtual void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) override; + virtual void light_set_shadow_caster_mask(RID p_light, uint32_t p_caster_mask) override; + virtual uint32_t light_get_shadow_caster_mask(RID p_light) const override; virtual void light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) override; virtual void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) override; diff --git a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp index 9dc606620b..42fce65b2d 100644 --- a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp @@ -518,6 +518,32 @@ TextureStorage::TextureStorage() { rt_sdf.pipelines[i] = RD::get_singleton()->compute_pipeline_create(rt_sdf.shader.version_get_shader(rt_sdf.shader_version, i)); } } + + // Initialize texture placeholder data for the `texture_*_placeholder_initialize()` methods. + + constexpr int placeholder_size = 4; + texture_2d_placeholder = Image::create_empty(placeholder_size, placeholder_size, false, Image::FORMAT_RGBA8); + // Draw a magenta/black checkerboard pattern. + for (int i = 0; i < placeholder_size * placeholder_size; i++) { + const int x = i % placeholder_size; + const int y = i / placeholder_size; + texture_2d_placeholder->set_pixel(x, y, (x + y) % 2 == 0 ? Color(1, 0, 1) : Color(0, 0, 0)); + } + + texture_2d_array_placeholder.push_back(texture_2d_placeholder); + + for (int i = 0; i < 6; i++) { + cubemap_placeholder.push_back(texture_2d_placeholder); + } + + Ref<Image> texture_2d_placeholder_rotated; + texture_2d_placeholder_rotated.instantiate(); + texture_2d_placeholder_rotated->copy_from(texture_2d_placeholder); + texture_2d_placeholder_rotated->rotate_90(CLOCKWISE); + for (int i = 0; i < 4; i++) { + // Alternate checkerboard pattern on odd layers (by using a copy that is rotated 90 degrees). + texture_3d_placeholder.push_back(i % 2 == 0 ? texture_2d_placeholder : texture_2d_placeholder_rotated); + } } TextureStorage::~TextureStorage() { @@ -1365,46 +1391,19 @@ void TextureStorage::texture_proxy_update(RID p_texture, RID p_proxy_to) { //these two APIs can be used together or in combination with the others. void TextureStorage::texture_2d_placeholder_initialize(RID p_texture) { - //this could be better optimized to reuse an existing image , done this way - //for now to get it working - Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); - image->fill(Color(1, 0, 1, 1)); - - texture_2d_initialize(p_texture, image); + texture_2d_initialize(p_texture, texture_2d_placeholder); } void TextureStorage::texture_2d_layered_placeholder_initialize(RID p_texture, RS::TextureLayeredType p_layered_type) { - //this could be better optimized to reuse an existing image , done this way - //for now to get it working - Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); - image->fill(Color(1, 0, 1, 1)); - - Vector<Ref<Image>> images; if (p_layered_type == RS::TEXTURE_LAYERED_2D_ARRAY) { - images.push_back(image); + texture_2d_layered_initialize(p_texture, texture_2d_array_placeholder, p_layered_type); } else { - //cube - for (int i = 0; i < 6; i++) { - images.push_back(image); - } + texture_2d_layered_initialize(p_texture, cubemap_placeholder, p_layered_type); } - - texture_2d_layered_initialize(p_texture, images, p_layered_type); } void TextureStorage::texture_3d_placeholder_initialize(RID p_texture) { - //this could be better optimized to reuse an existing image , done this way - //for now to get it working - Ref<Image> image = Image::create_empty(4, 4, false, Image::FORMAT_RGBA8); - image->fill(Color(1, 0, 1, 1)); - - Vector<Ref<Image>> images; - //cube - for (int i = 0; i < 4; i++) { - images.push_back(image); - } - - texture_3d_initialize(p_texture, Image::FORMAT_RGBA8, 4, 4, 4, false, images); + texture_3d_initialize(p_texture, Image::FORMAT_RGBA8, 4, 4, 4, false, texture_3d_placeholder); } Ref<Image> TextureStorage::texture_2d_get(RID p_texture) const { @@ -2269,6 +2268,16 @@ void TextureStorage::_texture_format_from_rd(RD::DataFormat p_rd_format, Texture r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; } break; + case RD::DATA_FORMAT_B8G8R8A8_UNORM: + case RD::DATA_FORMAT_B8G8R8A8_SRGB: { + r_format.image_format = Image::FORMAT_RGBA8; + r_format.rd_format = RD::DATA_FORMAT_B8G8R8A8_UNORM; + r_format.rd_format_srgb = RD::DATA_FORMAT_B8G8R8A8_SRGB; + r_format.swizzle_r = RD::TEXTURE_SWIZZLE_R; + r_format.swizzle_g = RD::TEXTURE_SWIZZLE_G; + r_format.swizzle_b = RD::TEXTURE_SWIZZLE_B; + r_format.swizzle_a = RD::TEXTURE_SWIZZLE_A; + } break; case RD::DATA_FORMAT_B4G4R4A4_UNORM_PACK16: { r_format.image_format = Image::FORMAT_RGBA4444; r_format.rd_format = RD::DATA_FORMAT_B4G4R4A4_UNORM_PACK16; diff --git a/servers/rendering/renderer_rd/storage_rd/texture_storage.h b/servers/rendering/renderer_rd/storage_rd/texture_storage.h index 2135ee3e3b..866fdd50ac 100644 --- a/servers/rendering/renderer_rd/storage_rd/texture_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/texture_storage.h @@ -520,6 +520,11 @@ public: virtual void texture_external_update(RID p_texture, int p_width, int p_height, uint64_t p_external_buffer) override; virtual void texture_proxy_update(RID p_proxy, RID p_base) override; + Ref<Image> texture_2d_placeholder; + Vector<Ref<Image>> texture_2d_array_placeholder; + Vector<Ref<Image>> cubemap_placeholder; + Vector<Ref<Image>> texture_3d_placeholder; + //these two APIs can be used together or in combination with the others. virtual void texture_2d_placeholder_initialize(RID p_texture) override; virtual void texture_2d_layered_placeholder_initialize(RID p_texture, RenderingServer::TextureLayeredType p_layered_type) override; diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp index 8b7ec08868..45f980ab41 100644 --- a/servers/rendering/renderer_scene_cull.cpp +++ b/servers/rendering/renderer_scene_cull.cpp @@ -1998,6 +1998,9 @@ void RendererSceneCull::_update_instance(Instance *p_instance) { pair.bvh2 = &p_instance->scenario->indexers[Scenario::INDEXER_VOLUMES]; } pair.cull_mask = RSG::light_storage->light_get_cull_mask(p_instance->base); + } else if (p_instance->base_type == RS::INSTANCE_LIGHTMAP) { + pair.pair_mask = RS::INSTANCE_GEOMETRY_MASK; + pair.bvh = &p_instance->scenario->indexers[Scenario::INDEXER_GEOMETRY]; } else if (geometry_instance_pair_mask & (1 << RS::INSTANCE_REFLECTION_PROBE) && (p_instance->base_type == RS::INSTANCE_REFLECTION_PROBE)) { pair.pair_mask = RS::INSTANCE_GEOMETRY_MASK; pair.bvh = &p_instance->scenario->indexers[Scenario::INDEXER_GEOMETRY]; @@ -2296,6 +2299,7 @@ void RendererSceneCull::_light_instance_setup_directional_shadow(int p_shadow_in cull.shadow_count = p_shadow_index + 1; cull.shadows[p_shadow_index].cascade_count = splits; cull.shadows[p_shadow_index].light_instance = light->instance; + cull.shadows[p_shadow_index].caster_mask = RSG::light_storage->light_get_shadow_caster_mask(p_instance->base); for (int i = 0; i < splits; i++) { RENDER_TIMESTAMP("Cull DirectionalLight3D, Split " + itos(i)); @@ -2526,7 +2530,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; - if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask)) { + if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask & RSG::light_storage->light_get_shadow_caster_mask(p_instance->base))) { continue; } else { if (static_cast<InstanceGeometryData *>(instance->base_data)->material_is_animated) { @@ -2608,7 +2612,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; - if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask)) { + if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask & RSG::light_storage->light_get_shadow_caster_mask(p_instance->base))) { continue; } else { if (static_cast<InstanceGeometryData *>(instance->base_data)->material_is_animated) { @@ -2675,7 +2679,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; - if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask)) { + if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask & RSG::light_storage->light_get_shadow_caster_mask(p_instance->base))) { continue; } else { if (static_cast<InstanceGeometryData *>(instance->base_data)->material_is_animated) { @@ -3033,6 +3037,10 @@ void RendererSceneCull::_scene_cull(CullData &cull_data, InstanceCullResult &cul for (const Instance *E : geom->lights) { InstanceLightData *light = static_cast<InstanceLightData *>(E->base_data); + if (!(RSG::light_storage->light_get_cull_mask(E->base) & idata.layer_mask)) { + continue; + } + instance_pair_buffer[idx++] = light->instance; if (idx == MAX_INSTANCE_PAIRS) { break; @@ -3137,7 +3145,7 @@ void RendererSceneCull::_scene_cull(CullData &cull_data, InstanceCullResult &cul if (IN_FRUSTUM(cull_data.cull->shadows[j].cascades[k].frustum) && VIS_CHECK) { uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; - if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && idata.flags & InstanceData::FLAG_CAST_SHADOWS && LAYER_CHECK) { + if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && idata.flags & InstanceData::FLAG_CAST_SHADOWS && (LAYER_CHECK & cull_data.cull->shadows[j].caster_mask)) { cull_result.directional_shadows[j].cascade_geometry_instances[k].push_back(idata.instance_geometry); mesh_visible = true; } diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h index 5aae59eb51..d3f03c4789 100644 --- a/servers/rendering/renderer_scene_cull.h +++ b/servers/rendering/renderer_scene_cull.h @@ -1121,6 +1121,7 @@ public: struct Cull { struct Shadow { RID light_instance; + uint32_t caster_mask; struct Cascade { Frustum frustum; diff --git a/servers/rendering/renderer_scene_occlusion_cull.h b/servers/rendering/renderer_scene_occlusion_cull.h index df403c5484..0ef92dd9ff 100644 --- a/servers/rendering/renderer_scene_occlusion_cull.h +++ b/servers/rendering/renderer_scene_occlusion_cull.h @@ -72,7 +72,7 @@ public: return false; } - float min_depth = -closest_point_view.z * 0.95f; + float min_depth = (closest_point - p_cam_position).length(); Vector2 rect_min = Vector2(FLT_MAX, FLT_MAX); Vector2 rect_max = Vector2(FLT_MIN, FLT_MIN); @@ -83,6 +83,10 @@ public: Vector3 corner = Vector3(p_bounds[0] * c.x + p_bounds[3] * nc.x, p_bounds[1] * c.y + p_bounds[4] * nc.y, p_bounds[2] * c.z + p_bounds[5] * nc.z); Vector3 view = p_cam_inv_transform.xform(corner); + if (p_cam_projection.is_orthogonal()) { + min_depth = MIN(min_depth, view.z); + } + Plane vp = Plane(view, 1.0); Plane projected = p_cam_projection.xform4(vp); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index b287550986..6eb1386749 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -1243,6 +1243,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons TransferWorker *transfer_worker = nullptr; const uint8_t *read_ptr = p_data.ptr(); uint8_t *write_ptr = nullptr; + const RDD::TextureLayout copy_dst_layout = driver->api_trait_get(RDD::API_TRAIT_USE_GENERAL_IN_COPY_QUEUES) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; for (uint32_t pass = 0; pass < 2; pass++) { const bool copy_pass = (pass == 1); if (copy_pass) { @@ -1267,7 +1268,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons tb.texture = texture->driver_id; tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; - tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; + tb.next_layout = copy_dst_layout; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; @@ -1313,7 +1314,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons copy_region.texture_subresources.layer_count = 1; copy_region.texture_offset = Vector3i(0, 0, z); copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1); - driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region); + driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, copy_dst_layout, copy_region); } staging_local_offset += to_allocate; @@ -1332,14 +1333,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons RDD::TextureBarrier tb; tb.texture = texture->driver_id; tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; - tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; + tb.prev_layout = copy_dst_layout; tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); + transfer_worker->texture_barriers.push_back(tb); } _release_transfer_worker(transfer_worker); @@ -5052,7 +5052,6 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3 // No existing worker was picked, we create a new one. transfer_worker = memnew(TransferWorker); transfer_worker->command_fence = driver->fence_create(); - transfer_worker->command_semaphore = driver->semaphore_create(); transfer_worker->command_pool = driver->command_pool_create(transfer_queue_family, RDD::COMMAND_BUFFER_TYPE_PRIMARY); transfer_worker->command_buffer = driver->command_buffer_create(transfer_worker->command_pool); transfer_worker->index = transfer_worker_pool.size(); @@ -5075,7 +5074,7 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3 // If there's not enough bytes to use on the staging buffer, we submit everything pending from the worker and wait for the work to be finished. if (transfer_worker->recording) { _end_transfer_worker(transfer_worker); - _submit_transfer_worker(transfer_worker, false); + _submit_transfer_worker(transfer_worker); } if (transfer_worker->submitted) { @@ -5128,12 +5127,12 @@ void RenderingDevice::_end_transfer_worker(TransferWorker *p_transfer_worker) { p_transfer_worker->recording = false; } -void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore) { - const VectorView<RDD::SemaphoreID> execute_semaphore = p_signal_semaphore ? p_transfer_worker->command_semaphore : VectorView<RDD::SemaphoreID>(); - driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, execute_semaphore, p_transfer_worker->command_fence, {}); - if (p_signal_semaphore) { +void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores) { + driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, p_signal_semaphores, p_transfer_worker->command_fence, {}); + + for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) { // Indicate the frame should wait on these semaphores before executing the main command buffer. - frames[frame].semaphores_to_wait_on.push_back(p_transfer_worker->command_semaphore); + frames[frame].semaphores_to_wait_on.push_back(p_signal_semaphores[i]); } p_transfer_worker->submitted = true; @@ -5153,6 +5152,21 @@ void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worke MutexLock lock(p_transfer_worker->operations_mutex); p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted; } + + if (!p_transfer_worker->texture_barriers.is_empty()) { + MutexLock transfer_worker_lock(transfer_worker_pool_mutex); + _flush_barriers_for_transfer_worker(p_transfer_worker); + } +} + +void RenderingDevice::_flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker) { + if (!p_transfer_worker->texture_barriers.is_empty()) { + for (uint32_t i = 0; i < p_transfer_worker->texture_barriers.size(); i++) { + transfer_worker_pool_texture_barriers.push_back(p_transfer_worker->texture_barriers[i]); + } + + p_transfer_worker->texture_barriers.clear(); + } } void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) { @@ -5194,10 +5208,11 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr } } -void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) { +void RenderingDevice::_submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer) { MutexLock transfer_worker_lock(transfer_worker_pool_mutex); - for (TransferWorker *worker : transfer_worker_pool) { - if (p_operations_used_by_draw) { + for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) { + TransferWorker *worker = transfer_worker_pool[i]; + if (p_draw_command_buffer) { MutexLock lock(worker->operations_mutex); if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) { // The operation used by the draw has already been processed, we don't need to wait on the worker. @@ -5208,11 +5223,21 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) { { MutexLock lock(worker->thread_mutex); if (worker->recording) { + VectorView<RDD::SemaphoreID> semaphores = p_draw_command_buffer ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>(); _end_transfer_worker(worker); - _submit_transfer_worker(worker, true); + _submit_transfer_worker(worker, semaphores); + } + + if (p_draw_command_buffer) { + _flush_barriers_for_transfer_worker(worker); } } } + + if (p_draw_command_buffer && !transfer_worker_pool_texture_barriers.is_empty()) { + driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers); + transfer_worker_pool_texture_barriers.clear(); + } } void RenderingDevice::_wait_for_transfer_workers() { @@ -5228,7 +5253,6 @@ void RenderingDevice::_wait_for_transfer_workers() { void RenderingDevice::_free_transfer_workers() { MutexLock transfer_worker_lock(transfer_worker_pool_mutex); for (TransferWorker *worker : transfer_worker_pool) { - driver->semaphore_free(worker->command_semaphore); driver->fence_free(worker->command_fence); driver->buffer_free(worker->staging_buffer); driver->command_pool_free(worker->command_pool); @@ -5807,10 +5831,10 @@ void RenderingDevice::_end_frame() { ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); } - _submit_transfer_workers(true); - // The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use. RDD::CommandBufferID command_buffer = frames[frame].command_buffer; + _submit_transfer_workers(command_buffer); + draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool); driver->command_buffer_end(command_buffer); driver->end_segment(); @@ -6014,6 +6038,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ present_queue_family = main_queue_family; } + // Use the processor count as the max amount of transfer workers that can be created. + transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count(); + // Create data for all the frames. for (uint32_t i = 0; i < frames.size(); i++) { frames[i].index = 0; @@ -6041,6 +6068,13 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ // Assign the main queue family and command pool to the command buffer pool. frames[i].command_buffer_pool.pool = frames[i].command_pool; + + // Create the semaphores for the transfer workers. + frames[i].transfer_worker_semaphores.resize(transfer_worker_pool_max_size); + for (uint32_t j = 0; j < transfer_worker_pool_max_size; j++) { + frames[i].transfer_worker_semaphores[j] = driver->semaphore_create(); + ERR_FAIL_COND_V(!frames[i].transfer_worker_semaphores[j], FAILED); + } } // Start from frame count, so everything else is immediately old. @@ -6087,9 +6121,6 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ ERR_FAIL_COND_V(err, FAILED); } - // TODO: How should this max size be determined? - transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count(); - draw_list = nullptr; compute_list = nullptr; @@ -6380,7 +6411,7 @@ void RenderingDevice::finalize() { } // Wait for transfer workers to finish. - _submit_transfer_workers(false); + _submit_transfer_workers(); _wait_for_transfer_workers(); // Delete everything the graph has created. @@ -6452,6 +6483,10 @@ void RenderingDevice::finalize() { for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) { driver->semaphore_free(buffer_pool.semaphores[j]); } + + for (uint32_t j = 0; j < frames[i].transfer_worker_semaphores.size(); j++) { + driver->semaphore_free(frames[i].transfer_worker_semaphores[j]); + } } if (pipeline_cache_enabled) { @@ -7225,6 +7260,10 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(DEBUG_PASS); } +void RenderingDevice::make_current() { + render_thread_id = Thread::get_caller_id(); +} + RenderingDevice::~RenderingDevice() { finalize(); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 19d21dfda2..c440e11cd4 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1267,7 +1267,7 @@ private: RDD::CommandBufferID command_buffer; RDD::CommandPoolID command_pool; RDD::FenceID command_fence; - RDD::SemaphoreID command_semaphore; + LocalVector<RDD::TextureBarrier> texture_barriers; bool recording = false; bool submitted = false; BinaryMutex thread_mutex; @@ -1281,20 +1281,22 @@ private: uint32_t transfer_worker_pool_max_size = 1; LocalVector<uint64_t> transfer_worker_operation_used_by_draw; LocalVector<uint32_t> transfer_worker_pool_available_list; + LocalVector<RDD::TextureBarrier> transfer_worker_pool_texture_barriers; BinaryMutex transfer_worker_pool_mutex; ConditionVariable transfer_worker_pool_condition; TransferWorker *_acquire_transfer_worker(uint32_t p_transfer_size, uint32_t p_required_align, uint32_t &r_staging_offset); void _release_transfer_worker(TransferWorker *p_transfer_worker); void _end_transfer_worker(TransferWorker *p_transfer_worker); - void _submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore); + void _submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores = VectorView<RDD::SemaphoreID>()); void _wait_for_transfer_worker(TransferWorker *p_transfer_worker); + void _flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker); void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation); void _check_transfer_worker_buffer(Buffer *p_buffer); void _check_transfer_worker_texture(Texture *p_texture); void _check_transfer_worker_vertex_array(VertexArray *p_vertex_array); void _check_transfer_worker_index_array(IndexArray *p_index_array); - void _submit_transfer_workers(bool p_operations_used_by_draw); + void _submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer = RDD::CommandBufferID()); void _wait_for_transfer_workers(); void _free_transfer_workers(); @@ -1372,6 +1374,10 @@ private: // Swap chains prepared for drawing during the frame that must be presented. LocalVector<RDD::SwapChainID> swap_chains_to_present; + // Semaphores the transfer workers can use to wait before rendering the frame. + // This must have the same size of the transfer worker pool. + TightLocalVector<RDD::SemaphoreID> transfer_worker_semaphores; + // Extra command buffer pool used for driver workarounds. RDG::CommandBufferPool command_buffer_pool; @@ -1490,6 +1496,8 @@ public: static RenderingDevice *get_singleton(); + void make_current(); + RenderingDevice(); ~RenderingDevice(); diff --git a/servers/rendering/rendering_device_binds.cpp b/servers/rendering/rendering_device_binds.cpp index d9ca286b15..e41a56b0a3 100644 --- a/servers/rendering/rendering_device_binds.cpp +++ b/servers/rendering/rendering_device_binds.cpp @@ -31,7 +31,10 @@ #include "rendering_device_binds.h" Error RDShaderFile::parse_versions_from_text(const String &p_text, const String p_defines, OpenIncludeFunction p_include_func, void *p_include_func_userdata) { - ERR_FAIL_NULL_V(RenderingDevice::get_singleton(), ERR_UNAVAILABLE); + ERR_FAIL_NULL_V_MSG( + RenderingDevice::get_singleton(), + ERR_UNAVAILABLE, + "Cannot import custom .glsl shaders when running without a RenderingDevice. This can happen if you are using the headless more or the Compatibility backend."); Vector<String> lines = p_text.split("\n"); diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp index 3b8e3efeb8..9ff7b83215 100644 --- a/servers/rendering/rendering_device_driver.cpp +++ b/servers/rendering/rendering_device_driver.cpp @@ -374,6 +374,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) { return 1; case API_TRAIT_CLEARS_WITH_COPY_ENGINE: return true; + case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES: + return false; default: ERR_FAIL_V(0); } diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 91da67c8d7..637d52c060 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -220,6 +220,7 @@ public: enum TextureLayout { TEXTURE_LAYOUT_UNDEFINED, + TEXTURE_LAYOUT_GENERAL, TEXTURE_LAYOUT_STORAGE_OPTIMAL, TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, @@ -750,6 +751,7 @@ public: API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP, API_TRAIT_SECONDARY_VIEWPORT_SCISSOR, API_TRAIT_CLEARS_WITH_COPY_ENGINE, + API_TRAIT_USE_GENERAL_IN_COPY_QUEUES, }; enum ShaderChangeInvalidation { diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index abcb76cd43..0ecd818805 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -34,6 +34,7 @@ #define FORCE_FULL_ACCESS_BITS 0 #define PRINT_RESOURCE_TRACKER_TOTAL 0 #define PRINT_COMMAND_RECORDING 0 +#define INSERT_BREADCRUMBS 1 RenderingDeviceGraph::RenderingDeviceGraph() { driver_honors_barriers = false; @@ -438,6 +439,15 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // Always update the access of the tracker according to the latest usage. resource_tracker->usage_access = new_usage_access; + // Always accumulate the stages of the tracker with the commands that use it. + search_tracker->current_frame_stages = search_tracker->current_frame_stages | r_command->self_stages; + + if (!search_tracker->previous_frame_stages.is_empty()) { + // Add to the command the stages the tracker was used on in the previous frame. + r_command->previous_stages = r_command->previous_stages | search_tracker->previous_frame_stages; + search_tracker->previous_frame_stages.clear(); + } + if (different_usage) { // Even if the usage of the resource isn't a write usage explicitly, a different usage implies a transition and it should therefore be considered a write. write_usage = true; @@ -823,7 +833,7 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC const RecordedDrawListCommand *draw_list_command = reinterpret_cast<const RecordedDrawListCommand *>(command); const VectorView clear_values(draw_list_command->clear_values(), draw_list_command->clear_values_count); -#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) +#if INSERT_BREADCRUMBS driver->command_insert_breadcrumb(r_command_buffer, draw_list_command->breadcrumb); #endif driver->command_begin_render_pass(r_command_buffer, draw_list_command->render_pass, draw_list_command->framebuffer, draw_list_command->command_buffer_type, draw_list_command->region, clear_values); @@ -874,7 +884,7 @@ void RenderingDeviceGraph::_run_label_command_change(RDD::CommandBufferID p_comm } if (p_ignore_previous_value || p_new_label_index != r_current_label_index || p_new_level != r_current_label_level) { - if (!p_ignore_previous_value && (p_use_label_for_empty || r_current_label_index >= 0)) { + if (!p_ignore_previous_value && (p_use_label_for_empty || r_current_label_index >= 0 || r_current_label_level >= 0)) { // End the current label. driver->command_end_label(p_command_buffer); } @@ -888,6 +898,8 @@ void RenderingDeviceGraph::_run_label_command_change(RDD::CommandBufferID p_comm } else if (p_use_label_for_empty) { label_name = "Command graph"; label_color = Color(1, 1, 1, 1); + } else { + return; } // Add the level to the name. @@ -2064,7 +2076,7 @@ void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RD } } - _run_label_command_change(r_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level); + _run_label_command_change(r_command_buffer, -1, -1, false, false, nullptr, 0, current_label_index, current_label_level); #if PRINT_COMMAND_RECORDING print_line(vformat("Recorded %d commands", command_count)); diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index e13e3a0429..0534d4ee1e 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -151,6 +151,8 @@ public: struct ResourceTracker { uint32_t reference_count = 0; int64_t command_frame = -1; + BitField<RDD::PipelineStageBits> previous_frame_stages; + BitField<RDD::PipelineStageBits> current_frame_stages; int32_t read_full_command_list_index = -1; int32_t read_slice_command_list_index = -1; int32_t write_command_or_list_index = -1; @@ -174,8 +176,9 @@ public: _FORCE_INLINE_ void reset_if_outdated(int64_t new_command_frame) { if (new_command_frame != command_frame) { - usage_access.clear(); command_frame = new_command_frame; + previous_frame_stages = current_frame_stages; + current_frame_stages.clear(); read_full_command_list_index = -1; read_slice_command_list_index = -1; write_command_or_list_index = -1; diff --git a/servers/rendering/rendering_server_default.cpp b/servers/rendering/rendering_server_default.cpp index 20f1f9ad6f..2ec693cbbf 100644 --- a/servers/rendering/rendering_server_default.cpp +++ b/servers/rendering/rendering_server_default.cpp @@ -370,6 +370,8 @@ Size2i RenderingServerDefault::get_maximum_viewport_size() const { void RenderingServerDefault::_assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id) { server_thread = Thread::get_caller_id(); server_task_id = p_pump_task_id; + // This is needed because the main RD is created on the main thread. + RenderingDevice::get_singleton()->make_current(); } void RenderingServerDefault::_thread_exit() { diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h index 225a67fb52..766ec8fa96 100644 --- a/servers/rendering/rendering_server_default.h +++ b/servers/rendering/rendering_server_default.h @@ -443,6 +443,7 @@ public: FUNC2(light_set_cull_mask, RID, uint32_t) FUNC5(light_set_distance_fade, RID, bool, float, float, float) FUNC2(light_set_reverse_cull_face_mode, RID, bool) + FUNC2(light_set_shadow_caster_mask, RID, uint32_t) FUNC2(light_set_bake_mode, RID, LightBakeMode) FUNC2(light_set_max_sdfgi_cascade, RID, uint32_t) diff --git a/servers/rendering/shader_compiler.cpp b/servers/rendering/shader_compiler.cpp index e7339b2cdd..119ac677eb 100644 --- a/servers/rendering/shader_compiler.cpp +++ b/servers/rendering/shader_compiler.cpp @@ -355,7 +355,7 @@ void ShaderCompiler::_dump_function_deps(const SL::ShaderNode *p_node, const Str } header += " "; - header += _mkid(fnode->name); + header += _mkid(fnode->rname); header += "("; for (int i = 0; i < fnode->arguments.size(); i++) { @@ -1190,7 +1190,7 @@ String ShaderCompiler::_dump_node_code(const SL::Node *p_node, int p_level, Gene } else if (p_default_actions.renames.has(vnode->name)) { code += p_default_actions.renames[vnode->name]; } else { - code += _mkid(vnode->name); + code += _mkid(vnode->rname); } } diff --git a/servers/rendering/shader_language.cpp b/servers/rendering/shader_language.cpp index 77035a64f7..7c4128b0e3 100644 --- a/servers/rendering/shader_language.cpp +++ b/servers/rendering/shader_language.cpp @@ -356,7 +356,7 @@ const ShaderLanguage::KeyWord ShaderLanguage::keyword_list[] = { { TK_CF_BREAK, "break", CF_BLOCK, {}, {} }, { TK_CF_CONTINUE, "continue", CF_BLOCK, {}, {} }, { TK_CF_RETURN, "return", CF_BLOCK, {}, {} }, - { TK_CF_DISCARD, "discard", CF_BLOCK, { "particles", "sky", "fog" }, { "fragment" } }, + { TK_CF_DISCARD, "discard", CF_BLOCK, { "particles", "sky", "fog" }, { "vertex" } }, // function specifier keywords @@ -1305,6 +1305,7 @@ void ShaderLanguage::clear() { include_markers_handled.clear(); calls_info.clear(); + function_overload_count.clear(); #ifdef DEBUG_ENABLED keyword_completion_context = CF_UNSPECIFIED; @@ -3554,6 +3555,7 @@ bool ShaderLanguage::_validate_function_call(BlockNode *p_block, const FunctionI ERR_FAIL_COND_V(p_func->arguments[0]->type != Node::NODE_TYPE_VARIABLE, false); StringName name = static_cast<VariableNode *>(p_func->arguments[0])->name.operator String(); + StringName rname = static_cast<VariableNode *>(p_func->arguments[0])->rname.operator String(); for (int i = 1; i < p_func->arguments.size(); i++) { args.push_back(p_func->arguments[i]->get_datatype()); @@ -3563,28 +3565,33 @@ bool ShaderLanguage::_validate_function_call(BlockNode *p_block, const FunctionI int argcount = args.size(); - if (p_function_info.stage_functions.has(name)) { - //stage based function - const StageFunctionInfo &sf = p_function_info.stage_functions[name]; - if (argcount != sf.arguments.size()) { - _set_error(vformat(RTR("Invalid number of arguments when calling stage function '%s', which expects %d arguments."), String(name), sf.arguments.size())); - return false; - } - //validate arguments - for (int i = 0; i < argcount; i++) { - if (args[i] != sf.arguments[i].type) { - _set_error(vformat(RTR("Invalid argument type when calling stage function '%s', type expected is '%s'."), String(name), get_datatype_name(sf.arguments[i].type))); - return false; - } - } + if (stages) { + // Stage functions can be used in custom functions as well, that why need to check them all. + for (const KeyValue<StringName, FunctionInfo> &E : *stages) { + if (E.value.stage_functions.has(name)) { + // Stage-based function. + const StageFunctionInfo &sf = E.value.stage_functions[name]; + if (argcount != sf.arguments.size()) { + _set_error(vformat(RTR("Invalid number of arguments when calling stage function '%s', which expects %d arguments."), String(name), sf.arguments.size())); + return false; + } + // Validate arguments. + for (int i = 0; i < argcount; i++) { + if (args[i] != sf.arguments[i].type) { + _set_error(vformat(RTR("Invalid argument type when calling stage function '%s', type expected is '%s'."), String(name), get_datatype_name(sf.arguments[i].type))); + return false; + } + } - if (r_ret_type) { - *r_ret_type = sf.return_type; - } - if (r_ret_type_str) { - *r_ret_type_str = ""; + if (r_ret_type) { + *r_ret_type = sf.return_type; + } + if (r_ret_type_str) { + *r_ret_type_str = ""; + } + return true; + } } - return true; } bool failed_builtin = false; @@ -3891,9 +3898,10 @@ bool ShaderLanguage::_validate_function_call(BlockNode *p_block, const FunctionI } bool fail = false; + bool use_constant_conversion = function_overload_count[rname] == 0; for (int j = 0; j < args.size(); j++) { - if (get_scalar_type(args[j]) == args[j] && p_func->arguments[j + 1]->type == Node::NODE_TYPE_CONSTANT && args3[j] == 0 && convert_constant(static_cast<ConstantNode *>(p_func->arguments[j + 1]), pfunc->arguments[j].type)) { + if (use_constant_conversion && get_scalar_type(args[j]) == args[j] && p_func->arguments[j + 1]->type == Node::NODE_TYPE_CONSTANT && args3[j] == 0 && convert_constant(static_cast<ConstantNode *>(p_func->arguments[j + 1]), pfunc->arguments[j].type)) { //all good, but it needs implicit conversion later } else if (args[j] != pfunc->arguments[j].type || (args[j] == TYPE_STRUCT && args2[j] != pfunc->arguments[j].struct_name) || args3[j] != pfunc->arguments[j].array_size) { String func_arg_name; @@ -3919,7 +3927,7 @@ bool ShaderLanguage::_validate_function_call(BlockNode *p_block, const FunctionI arg_name += "]"; } - _set_error(vformat(RTR("Invalid argument for \"%s(%s)\" function: argument %d should be %s but is %s."), String(name), arg_list, j + 1, func_arg_name, arg_name)); + _set_error(vformat(RTR("Invalid argument for \"%s(%s)\" function: argument %d should be %s but is %s."), String(rname), arg_list, j + 1, func_arg_name, arg_name)); fail = true; break; } @@ -3960,9 +3968,9 @@ bool ShaderLanguage::_validate_function_call(BlockNode *p_block, const FunctionI if (exists) { if (last_arg_count > args.size()) { - _set_error(vformat(RTR("Too few arguments for \"%s(%s)\" call. Expected at least %d but received %d."), String(name), arg_list, last_arg_count, args.size())); + _set_error(vformat(RTR("Too few arguments for \"%s(%s)\" call. Expected at least %d but received %d."), String(rname), arg_list, last_arg_count, args.size())); } else if (last_arg_count < args.size()) { - _set_error(vformat(RTR("Too many arguments for \"%s(%s)\" call. Expected at most %d but received %d."), String(name), arg_list, last_arg_count, args.size())); + _set_error(vformat(RTR("Too many arguments for \"%s(%s)\" call. Expected at most %d but received %d."), String(rname), arg_list, last_arg_count, args.size())); } } @@ -5822,42 +5830,16 @@ ShaderLanguage::Node *ShaderLanguage::_parse_expression(BlockNode *p_block, cons } } - const StringName &name = identifier; - - if (name != current_function) { // Recursion is not allowed. - // Register call. - if (calls_info.has(name)) { - calls_info[current_function].calls.push_back(&calls_info[name]); - } - - int idx = 0; - bool is_builtin = false; - - while (frag_only_func_defs[idx].name) { - if (frag_only_func_defs[idx].name == name) { - // If a built-in function not found for the current shader type, then it shouldn't be parsed further. - if (!is_supported_frag_only_funcs) { - _set_error(vformat(RTR("Built-in function '%s' is not supported for the '%s' shader type."), name, shader_type_identifier)); - return nullptr; - } - // Register usage of the restricted function. - calls_info[current_function].uses_restricted_items.push_back(Pair<StringName, CallInfo::Item>(name, CallInfo::Item(CallInfo::Item::ITEM_TYPE_BUILTIN, _get_tkpos()))); - is_builtin = true; - break; - } - idx++; - } - - // Recursively checks for the restricted function call. - if (is_supported_frag_only_funcs && current_function == "vertex" && stages->has(current_function) && !_validate_restricted_func(name, &calls_info[current_function], is_builtin)) { - return nullptr; - } - } + const StringName &rname = identifier; + StringName name = identifier; OperatorNode *func = alloc_node<OperatorNode>(); func->op = OP_CALL; + VariableNode *funcname = alloc_node<VariableNode>(); funcname->name = name; + funcname->rname = name; + func->arguments.push_back(funcname); int carg = -1; @@ -5874,22 +5856,72 @@ ShaderLanguage::Node *ShaderLanguage::_parse_expression(BlockNode *p_block, cons bnode = bnode->parent_block; } - //test if function was parsed first + // Test if function was parsed first. int function_index = -1; - for (int i = 0; i < shader->vfunctions.size(); i++) { - if (shader->vfunctions[i].name == name) { - //add to current function as dependency - for (int j = 0; j < shader->vfunctions.size(); j++) { - if (shader->vfunctions[j].name == current_function) { - shader->vfunctions.write[j].uses_function.insert(name); - break; + for (int i = 0, max_valid_args = 0; i < shader->vfunctions.size(); i++) { + if (!shader->vfunctions[i].callable || shader->vfunctions[i].rname != rname) { + continue; + } + + bool found = true; + int valid_args = 0; + + // Search for correct overload. + for (int j = 1; j < func->arguments.size(); j++) { + if (j - 1 == shader->vfunctions[i].function->arguments.size()) { + found = false; + break; + } + + const FunctionNode::Argument &a = shader->vfunctions[i].function->arguments[j - 1]; + Node *b = func->arguments[j]; + + if (a.type == b->get_datatype() && a.array_size == b->get_array_size()) { + if (a.type == TYPE_STRUCT) { + if (a.struct_name != b->get_datatype_name()) { + found = false; + break; + } else { + valid_args++; + } + } else { + valid_args++; } + } else { + if (function_overload_count[rname] == 0 && get_scalar_type(a.type) == a.type && b->type == Node::NODE_TYPE_CONSTANT && a.array_size == 0 && convert_constant(static_cast<ConstantNode *>(b), a.type)) { + // Implicit cast if no overloads. + continue; + } + found = false; + break; } + } - //see if texture arguments must connect - function_index = i; - break; + // Using the best match index for completion hint if the function not found. + if (valid_args > max_valid_args) { + name = shader->vfunctions[i].name; + funcname->name = name; + max_valid_args = valid_args; + } + + if (!found) { + continue; } + + // Add to current function as dependency. + for (int j = 0; j < shader->vfunctions.size(); j++) { + if (shader->vfunctions[j].name == current_function) { + shader->vfunctions.write[j].uses_function.insert(name); + break; + } + } + + name = shader->vfunctions[i].name; + funcname->name = name; + + // See if texture arguments must connect. + function_index = i; + break; } if (carg >= 0) { @@ -5904,9 +5936,52 @@ ShaderLanguage::Node *ShaderLanguage::_parse_expression(BlockNode *p_block, cons return nullptr; } + if (name != current_function) { // Recursion is not allowed. + // Register call. + if (calls_info.has(name)) { + calls_info[current_function].calls.push_back(&calls_info[name]); + } + + bool is_builtin = false; + + if (is_supported_frag_only_funcs && stages) { + for (const KeyValue<StringName, FunctionInfo> &E : *stages) { + if (E.value.stage_functions.has(name)) { + // Register usage of the restricted stage function. + calls_info[current_function].uses_restricted_items.push_back(Pair<StringName, CallInfo::Item>(name, CallInfo::Item(CallInfo::Item::ITEM_TYPE_BUILTIN, _get_tkpos()))); + is_builtin = true; + break; + } + } + } + + if (!is_builtin) { + int idx = 0; + while (frag_only_func_defs[idx].name) { + if (frag_only_func_defs[idx].name == name) { + // If a built-in function not found for the current shader type, then it shouldn't be parsed further. + if (!is_supported_frag_only_funcs) { + _set_error(vformat(RTR("Built-in function '%s' is not supported for the '%s' shader type."), name, shader_type_identifier)); + return nullptr; + } + // Register usage of the restricted function. + calls_info[current_function].uses_restricted_items.push_back(Pair<StringName, CallInfo::Item>(name, CallInfo::Item(CallInfo::Item::ITEM_TYPE_BUILTIN, _get_tkpos()))); + is_builtin = true; + break; + } + idx++; + } + } + + // Recursively checks for the restricted function call. + if (is_supported_frag_only_funcs && current_function == "vertex" && stages->has(current_function) && !_validate_restricted_func(name, &calls_info[current_function], is_builtin)) { + return nullptr; + } + } + bool is_custom_func = false; if (!_validate_function_call(p_block, p_function_info, func, &func->return_cache, &func->struct_name, &is_custom_func)) { - _set_error(vformat(RTR("No matching function found for: '%s'."), String(funcname->name))); + _set_error(vformat(RTR("No matching function found for: '%s'."), String(funcname->rname))); return nullptr; } completion_class = TAG_GLOBAL; // reset sub-class @@ -6095,7 +6170,7 @@ ShaderLanguage::Node *ShaderLanguage::_parse_expression(BlockNode *p_block, cons } expr = func; #ifdef DEBUG_ENABLED - if (check_warnings) { + if (check_warnings && is_custom_func) { StringName func_name; if (p_block && p_block->parent_function) { @@ -8524,6 +8599,11 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun block = block->parent_block; } } else if (tk.type == TK_CF_DISCARD) { + if (!is_discard_supported) { + _set_error(vformat(RTR("Use of '%s' is not supported for the '%s' shader type."), "discard", shader_type_identifier)); + return ERR_PARSE_ERROR; + } + //check return type BlockNode *b = p_block; while (b && !b->parent_function) { @@ -8535,7 +8615,7 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun } if (!b->parent_function->can_discard) { - _set_error(vformat(RTR("Use of '%s' is not allowed here."), "discard")); + _set_error(vformat(RTR("'%s' cannot be used within the '%s' processor function."), "discard", b->parent_function->name)); return ERR_PARSE_ERROR; } @@ -8544,6 +8624,9 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun pos = _get_tkpos(); tk = _get_token(); + + calls_info[b->parent_function->name].uses_restricted_items.push_back(Pair<StringName, CallInfo::Item>("discard", CallInfo::Item(CallInfo::Item::ITEM_TYPE_BUILTIN, pos))); + if (tk.type != TK_SEMICOLON) { _set_expected_after_error(";", "discard"); return ERR_PARSE_ERROR; @@ -8781,7 +8864,9 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f ShaderNode::Uniform::Scope uniform_scope = ShaderNode::Uniform::SCOPE_LOCAL; stages = &p_functions; - is_supported_frag_only_funcs = shader_type_identifier == "canvas_item" || shader_type_identifier == "spatial" || shader_type_identifier == "sky"; + + is_discard_supported = shader_type_identifier == "canvas_item" || shader_type_identifier == "spatial"; + is_supported_frag_only_funcs = is_discard_supported || shader_type_identifier == "sky"; const FunctionInfo &constants = p_functions.has("constants") ? p_functions["constants"] : FunctionInfo(); @@ -9932,7 +10017,8 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f return ERR_PARSE_ERROR; } - if (shader->structs.has(name) || _find_identifier(nullptr, false, constants, name) || has_builtin(p_functions, name, !is_constant)) { + IdentifierType itype; + if (shader->structs.has(name) || (_find_identifier(nullptr, false, constants, name, nullptr, &itype) && itype != IDENTIFIER_FUNCTION) || has_builtin(p_functions, name, !is_constant)) { _set_redefinition_error(String(name)); return ERR_PARSE_ERROR; } @@ -10260,20 +10346,13 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f function.callable = !p_functions.has(name); function.name = name; + function.rname = name; FunctionNode *func_node = alloc_node<FunctionNode>(); - function.function = func_node; - shader->functions.insert(name, function); - shader->vfunctions.push_back(function); - - CallInfo call_info; - call_info.name = name; - - calls_info.insert(name, call_info); - func_node->name = name; + func_node->rname = name; func_node->return_type = type; func_node->return_struct_name = struct_name; func_node->return_precision = precision; @@ -10282,11 +10361,13 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f if (p_functions.has(name)) { func_node->can_discard = p_functions[name].can_discard; } else { -#ifdef DEBUG_ENABLED - if (check_warnings && HAS_WARNING(ShaderWarning::UNUSED_FUNCTION_FLAG)) { - used_functions.insert(name, Usage(tk_line)); - } -#endif // DEBUG_ENABLED + func_node->can_discard = is_discard_supported; // Allow use it for custom functions (in supported shader types). + } + + if (!function_overload_count.has(name)) { + function_overload_count.insert(name, 0); + } else { + function_overload_count[name]++; } func_node->body = alloc_node<BlockNode>(); @@ -10466,7 +10547,6 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f param_name = tk.text; - ShaderLanguage::IdentifierType itype; if (_find_identifier(func_node->body, false, builtins, param_name, (ShaderLanguage::DataType *)nullptr, &itype)) { if (itype != IDENTIFIER_FUNCTION) { _set_redefinition_error(String(param_name)); @@ -10512,6 +10592,66 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f } } + // Searches for function index and check for the exact duplicate in overloads. + int function_index = 0; + for (int i = 0; i < shader->vfunctions.size(); i++) { + if (!shader->vfunctions[i].callable || shader->vfunctions[i].rname != name) { + continue; + } + + function_index++; + + if (shader->vfunctions[i].function->arguments.size() != func_node->arguments.size()) { + continue; + } + + bool is_same = true; + + for (int j = 0; j < shader->vfunctions[i].function->arguments.size(); j++) { + FunctionNode::Argument a = func_node->arguments[j]; + FunctionNode::Argument b = shader->vfunctions[i].function->arguments[j]; + + if (a.type == b.type && a.array_size == b.array_size) { + if (a.type == TYPE_STRUCT) { + is_same = a.struct_name == b.struct_name; + } + } else { + is_same = false; + } + + if (!is_same) { + break; + } + } + + if (is_same) { + _set_redefinition_error(String(name)); + return ERR_PARSE_ERROR; + } + } + + // Creates a fake name for function overload, which will be replaced by the real name by the compiler. + String name2 = name; + if (function_index > 0) { + name2 = vformat("%s@%s", name, itos(function_index + 1)); + + function.name = name2; + func_node->name = name2; + } + + shader->functions.insert(name2, function); + shader->vfunctions.push_back(function); + + CallInfo call_info; + call_info.name = name2; + calls_info.insert(name2, call_info); + +#ifdef DEBUG_ENABLED + if (check_warnings && HAS_WARNING(ShaderWarning::UNUSED_FUNCTION_FLAG) && !p_functions.has(name)) { + used_functions.insert(name2, Usage(tk_line)); + } +#endif // DEBUG_ENABLED + if (p_functions.has(name)) { //if one of the core functions, make sure they are of the correct form if (func_node->arguments.size() > 0) { @@ -10531,7 +10671,7 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f return ERR_PARSE_ERROR; } - current_function = name; + current_function = name2; #ifdef DEBUG_ENABLED keyword_completion_context = CF_BLOCK; @@ -10812,10 +10952,7 @@ Error ShaderLanguage::complete(const String &p_code, const ShaderCompileInfo &p_ break; // Ignore hint keywords (parsed below). } if (keyword_list[i].flags & keyword_completion_context) { - if (keyword_list[i].excluded_shader_types.has(shader_type_identifier)) { - continue; - } - if (!keyword_list[i].functions.is_empty() && !keyword_list[i].functions.has(current_function)) { + if (keyword_list[i].excluded_shader_types.has(shader_type_identifier) || keyword_list[i].excluded_functions.has(current_function)) { continue; } ScriptLanguage::CodeCompletionOption option(keyword_list[i].text, ScriptLanguage::CODE_COMPLETION_KIND_PLAIN_TEXT); @@ -11044,15 +11181,21 @@ Error ShaderLanguage::complete(const String &p_code, const ShaderCompileInfo &p_ if (!shader->vfunctions[i].callable || shader->vfunctions[i].name == skip_function) { continue; } - matches.insert(String(shader->vfunctions[i].name), ScriptLanguage::CODE_COMPLETION_KIND_FUNCTION); + matches.insert(String(shader->vfunctions[i].rname), ScriptLanguage::CODE_COMPLETION_KIND_FUNCTION); } int idx = 0; bool low_end = RenderingServer::get_singleton()->is_low_end(); - if (stages && stages->has(skip_function)) { - for (const KeyValue<StringName, StageFunctionInfo> &E : (*stages)[skip_function].stage_functions) { - matches.insert(String(E.key), ScriptLanguage::CODE_COMPLETION_KIND_FUNCTION); + if (stages) { + // Stage functions can be used in custom functions as well, that why need to check them all. + for (const KeyValue<StringName, FunctionInfo> &E : *stages) { + for (const KeyValue<StringName, StageFunctionInfo> &F : E.value.stage_functions) { + if (F.value.skip_function == skip_function && stages->has(skip_function)) { + continue; + } + matches.insert(String(F.key), ScriptLanguage::CODE_COMPLETION_KIND_FUNCTION); + } } } @@ -11095,6 +11238,7 @@ Error ShaderLanguage::complete(const String &p_code, const ShaderCompileInfo &p_ case COMPLETION_CALL_ARGUMENTS: { StringName block_function; BlockNode *block = completion_block; + String calltip; while (block) { if (block->parent_function) { @@ -11103,89 +11247,93 @@ Error ShaderLanguage::complete(const String &p_code, const ShaderCompileInfo &p_ block = block->parent_block; } - for (int i = 0; i < shader->vfunctions.size(); i++) { - if (!shader->vfunctions[i].callable) { + for (int i = 0, overload_index = 0; i < shader->vfunctions.size(); i++) { + if (!shader->vfunctions[i].callable || shader->vfunctions[i].rname != completion_function) { continue; } - if (shader->vfunctions[i].name == completion_function) { - String calltip; - if (shader->vfunctions[i].function->return_type == TYPE_STRUCT) { - calltip += String(shader->vfunctions[i].function->return_struct_name); - } else { - calltip += get_datatype_name(shader->vfunctions[i].function->return_type); - } - - if (shader->vfunctions[i].function->return_array_size > 0) { - calltip += "["; - calltip += itos(shader->vfunctions[i].function->return_array_size); - calltip += "]"; - } + if (shader->vfunctions[i].function->return_type == TYPE_STRUCT) { + calltip += String(shader->vfunctions[i].function->return_struct_name); + } else { + calltip += get_datatype_name(shader->vfunctions[i].function->return_type); + } - calltip += " "; - calltip += shader->vfunctions[i].name; - calltip += "("; + if (shader->vfunctions[i].function->return_array_size > 0) { + calltip += "["; + calltip += itos(shader->vfunctions[i].function->return_array_size); + calltip += "]"; + } - for (int j = 0; j < shader->vfunctions[i].function->arguments.size(); j++) { - if (j > 0) { - calltip += ", "; - } else { - calltip += " "; - } + calltip += " "; + calltip += shader->vfunctions[i].rname; + calltip += "("; - if (j == completion_argument) { - calltip += char32_t(0xFFFF); - } + for (int j = 0; j < shader->vfunctions[i].function->arguments.size(); j++) { + if (j > 0) { + calltip += ", "; + } else { + calltip += " "; + } - if (shader->vfunctions[i].function->arguments[j].is_const) { - calltip += "const "; - } + if (j == completion_argument) { + calltip += char32_t(0xFFFF); + } - if (shader->vfunctions[i].function->arguments[j].qualifier != ArgumentQualifier::ARGUMENT_QUALIFIER_IN) { - if (shader->vfunctions[i].function->arguments[j].qualifier == ArgumentQualifier::ARGUMENT_QUALIFIER_OUT) { - calltip += "out "; - } else { // ArgumentQualifier::ARGUMENT_QUALIFIER_INOUT - calltip += "inout "; - } - } + if (shader->vfunctions[i].function->arguments[j].is_const) { + calltip += "const "; + } - if (shader->vfunctions[i].function->arguments[j].type == TYPE_STRUCT) { - calltip += String(shader->vfunctions[i].function->arguments[j].struct_name); - } else { - calltip += get_datatype_name(shader->vfunctions[i].function->arguments[j].type); + if (shader->vfunctions[i].function->arguments[j].qualifier != ArgumentQualifier::ARGUMENT_QUALIFIER_IN) { + if (shader->vfunctions[i].function->arguments[j].qualifier == ArgumentQualifier::ARGUMENT_QUALIFIER_OUT) { + calltip += "out "; + } else { // ArgumentQualifier::ARGUMENT_QUALIFIER_INOUT + calltip += "inout "; } - calltip += " "; - calltip += shader->vfunctions[i].function->arguments[j].name; + } - if (shader->vfunctions[i].function->arguments[j].array_size > 0) { - calltip += "["; - calltip += itos(shader->vfunctions[i].function->arguments[j].array_size); - calltip += "]"; - } + if (shader->vfunctions[i].function->arguments[j].type == TYPE_STRUCT) { + calltip += String(shader->vfunctions[i].function->arguments[j].struct_name); + } else { + calltip += get_datatype_name(shader->vfunctions[i].function->arguments[j].type); + } + calltip += " "; + calltip += shader->vfunctions[i].function->arguments[j].name; - if (j == completion_argument) { - calltip += char32_t(0xFFFF); - } + if (shader->vfunctions[i].function->arguments[j].array_size > 0) { + calltip += "["; + calltip += itos(shader->vfunctions[i].function->arguments[j].array_size); + calltip += "]"; } - if (shader->vfunctions[i].function->arguments.size()) { - calltip += " "; + if (j == completion_argument) { + calltip += char32_t(0xFFFF); } - calltip += ")"; + } - r_call_hint = calltip; - return OK; + if (shader->vfunctions[i].function->arguments.size()) { + calltip += " "; } - } + calltip += ")"; - int idx = 0; + if (overload_index < function_overload_count[shader->vfunctions[i].rname]) { + overload_index++; + calltip += "\n"; + continue; + } - String calltip; - bool low_end = RenderingServer::get_singleton()->is_low_end(); + r_call_hint = calltip; + return OK; + } + + if (stages) { + // Stage functions can be used in custom functions as well, that why need to check them all. + for (const KeyValue<StringName, FunctionInfo> &S : *stages) { + for (const KeyValue<StringName, StageFunctionInfo> &E : S.value.stage_functions) { + // No need to check for the skip function here. + if (completion_function != E.key) { + continue; + } - if (stages && stages->has(block_function)) { - for (const KeyValue<StringName, StageFunctionInfo> &E : (*stages)[block_function].stage_functions) { - if (completion_function == E.key) { calltip += get_datatype_name(E.value.return_type); calltip += " "; calltip += E.key; @@ -11222,6 +11370,9 @@ Error ShaderLanguage::complete(const String &p_code, const ShaderCompileInfo &p_ } } + int idx = 0; + bool low_end = RenderingServer::get_singleton()->is_low_end(); + while (builtin_func_defs[idx].name) { if ((low_end && builtin_func_defs[idx].high_end) || _check_restricted_func(builtin_func_defs[idx].name, block_function)) { idx++; diff --git a/servers/rendering/shader_language.h b/servers/rendering/shader_language.h index 48df77f6bb..ddd4c41059 100644 --- a/servers/rendering/shader_language.h +++ b/servers/rendering/shader_language.h @@ -427,6 +427,7 @@ public: struct VariableNode : public Node { DataType datatype_cache = TYPE_VOID; StringName name; + StringName rname; StringName struct_name; bool is_const = false; bool is_local = false; @@ -604,6 +605,7 @@ public: struct Function { StringName name; + StringName rname; FunctionNode *function = nullptr; HashSet<StringName> uses_function; bool callable; @@ -729,6 +731,7 @@ public: }; StringName name; + StringName rname; DataType return_type = TYPE_VOID; StringName return_struct_name; DataPrecision return_precision = PRECISION_DEFAULT; @@ -856,6 +859,7 @@ public: Vector<Argument> arguments; DataType return_type = TYPE_VOID; + String skip_function; }; struct ModeInfo { @@ -931,7 +935,7 @@ private: const char *text; uint32_t flags; const Vector<String> excluded_shader_types; - const Vector<String> functions; + const Vector<String> excluded_functions; }; static const KeyWord keyword_list[]; @@ -944,6 +948,7 @@ private: Vector<FilePosition> include_positions; HashSet<String> include_markers_handled; + HashMap<StringName, int> function_overload_count; // Additional function information (eg. call hierarchy). No need to expose it to compiler. struct CallInfo { @@ -1146,6 +1151,7 @@ private: const HashMap<StringName, FunctionInfo> *stages = nullptr; bool is_supported_frag_only_funcs = false; + bool is_discard_supported = false; bool _get_completable_identifier(BlockNode *p_block, CompletionType p_type, StringName &identifier); static const BuiltinFuncDef builtin_func_defs[]; diff --git a/servers/rendering/shader_preprocessor.cpp b/servers/rendering/shader_preprocessor.cpp index 27e39551ba..0e41a178b5 100644 --- a/servers/rendering/shader_preprocessor.cpp +++ b/servers/rendering/shader_preprocessor.cpp @@ -393,6 +393,8 @@ void ShaderPreprocessor::process_directive(Tokenizer *p_tokenizer) { process_else(p_tokenizer); } else if (directive == "endif") { process_endif(p_tokenizer); + } else if (directive == "error") { + process_error(p_tokenizer); } else if (directive == "define") { process_define(p_tokenizer); } else if (directive == "undef") { @@ -466,7 +468,7 @@ void ShaderPreprocessor::process_elif(Tokenizer *p_tokenizer) { const int line = p_tokenizer->get_line(); if (state->current_branch == nullptr || state->current_branch->else_defined) { - set_error(RTR("Unmatched elif."), line); + set_error(vformat(RTR("Unmatched '%s' directive."), "elif"), line); return; } if (state->previous_region != nullptr) { @@ -523,7 +525,7 @@ void ShaderPreprocessor::process_else(Tokenizer *p_tokenizer) { const int line = p_tokenizer->get_line(); if (state->current_branch == nullptr || state->current_branch->else_defined) { - set_error(RTR("Unmatched else."), line); + set_error(vformat(RTR("Unmatched '%s' directive."), "else"), line); return; } if (state->previous_region != nullptr) { @@ -531,7 +533,7 @@ void ShaderPreprocessor::process_else(Tokenizer *p_tokenizer) { } if (!p_tokenizer->consume_empty_line()) { - set_error(RTR("Invalid else."), p_tokenizer->get_line()); + set_error(vformat(RTR("Invalid '%s' directive."), "else"), line); } bool skip = false; @@ -559,7 +561,7 @@ void ShaderPreprocessor::process_endif(Tokenizer *p_tokenizer) { state->condition_depth--; if (state->condition_depth < 0) { - set_error(RTR("Unmatched endif."), line); + set_error(vformat(RTR("Unmatched '%s' directive."), "endif"), line); return; } if (state->previous_region != nullptr) { @@ -568,13 +570,28 @@ void ShaderPreprocessor::process_endif(Tokenizer *p_tokenizer) { } if (!p_tokenizer->consume_empty_line()) { - set_error(RTR("Invalid endif."), line); + set_error(vformat(RTR("Invalid '%s' directive."), "endif"), line); } state->current_branch = state->current_branch->parent; state->branches.pop_back(); } +void ShaderPreprocessor::process_error(Tokenizer *p_tokenizer) { + const int line = p_tokenizer->get_line(); + + const String body = tokens_to_string(p_tokenizer->advance('\n')).strip_edges(); + if (body.is_empty()) { + set_error(" ", line); + } else { + set_error(body, line); + } + + if (!p_tokenizer->consume_empty_line()) { + set_error(vformat(RTR("Invalid '%s' directive."), "error"), line); + } +} + void ShaderPreprocessor::process_if(Tokenizer *p_tokenizer) { const int line = p_tokenizer->get_line(); @@ -626,7 +643,7 @@ void ShaderPreprocessor::process_ifdef(Tokenizer *p_tokenizer) { } if (!p_tokenizer->consume_empty_line()) { - set_error(RTR("Invalid ifdef."), line); + set_error(vformat(RTR("Invalid '%s' directive."), "ifdef"), line); return; } @@ -648,7 +665,7 @@ void ShaderPreprocessor::process_ifndef(Tokenizer *p_tokenizer) { } if (!p_tokenizer->consume_empty_line()) { - set_error(RTR("Invalid ifndef."), line); + set_error(vformat(RTR("Invalid '%s' directive."), "ifndef"), line); return; } @@ -771,21 +788,21 @@ void ShaderPreprocessor::process_pragma(Tokenizer *p_tokenizer) { } if (label.is_empty()) { - set_error(RTR("Invalid pragma directive."), line); + set_error(vformat(RTR("Invalid '%s' directive."), "pragma"), line); return; } - // Rxplicitly handle pragma values here. + // Explicitly handle pragma values here. // If more pragma options are created, then refactor into a more defined structure. if (label == "disable_preprocessor") { state->disabled = true; } else { - set_error(RTR("Invalid pragma directive."), line); + set_error(vformat(RTR("Invalid '%s' directive."), "pragma"), line); return; } if (!p_tokenizer->consume_empty_line()) { - set_error(RTR("Invalid pragma directive."), line); + set_error(vformat(RTR("Invalid '%s' directive."), "pragma"), line); return; } } @@ -794,7 +811,7 @@ void ShaderPreprocessor::process_undef(Tokenizer *p_tokenizer) { const int line = p_tokenizer->get_line(); const String label = p_tokenizer->get_identifier(); if (label.is_empty() || !p_tokenizer->consume_empty_line()) { - set_error(RTR("Invalid undef."), line); + set_error(vformat(RTR("Invalid '%s' directive."), "undef"), line); return; } @@ -1383,6 +1400,7 @@ void ShaderPreprocessor::get_keyword_list(List<String> *r_keywords, bool p_inclu r_keywords->push_back("else"); } r_keywords->push_back("endif"); + r_keywords->push_back("error"); if (p_include_shader_keywords) { r_keywords->push_back("if"); } diff --git a/servers/rendering/shader_preprocessor.h b/servers/rendering/shader_preprocessor.h index b29239105a..0a90aec958 100644 --- a/servers/rendering/shader_preprocessor.h +++ b/servers/rendering/shader_preprocessor.h @@ -191,6 +191,7 @@ private: void process_elif(Tokenizer *p_tokenizer); void process_else(Tokenizer *p_tokenizer); void process_endif(Tokenizer *p_tokenizer); + void process_error(Tokenizer *p_tokenizer); void process_if(Tokenizer *p_tokenizer); void process_ifdef(Tokenizer *p_tokenizer); void process_ifndef(Tokenizer *p_tokenizer); diff --git a/servers/rendering/shader_types.cpp b/servers/rendering/shader_types.cpp index f498c0bf93..821009f07c 100644 --- a/servers/rendering/shader_types.cpp +++ b/servers/rendering/shader_types.cpp @@ -63,6 +63,8 @@ ShaderTypes::ShaderTypes() { shader_modes[RS::SHADER_SPATIAL].functions["constants"].built_ins["PI"] = constt(ShaderLanguage::TYPE_FLOAT); shader_modes[RS::SHADER_SPATIAL].functions["constants"].built_ins["TAU"] = constt(ShaderLanguage::TYPE_FLOAT); shader_modes[RS::SHADER_SPATIAL].functions["constants"].built_ins["E"] = constt(ShaderLanguage::TYPE_FLOAT); + shader_modes[RS::SHADER_SPATIAL].functions["constants"].built_ins["OUTPUT_IS_SRGB"] = constt(ShaderLanguage::TYPE_BOOL); + shader_modes[RS::SHADER_SPATIAL].functions["constants"].built_ins["CLIP_SPACE_FAR"] = constt(ShaderLanguage::TYPE_FLOAT); shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["VERTEX"] = ShaderLanguage::TYPE_VEC3; shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["NORMAL"] = ShaderLanguage::TYPE_VEC3; @@ -96,8 +98,6 @@ ShaderTypes::ShaderTypes() { shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["MODELVIEW_MATRIX"] = ShaderLanguage::TYPE_MAT4; shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["MODELVIEW_NORMAL_MATRIX"] = ShaderLanguage::TYPE_MAT3; shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["VIEWPORT_SIZE"] = constt(ShaderLanguage::TYPE_VEC2); - shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["OUTPUT_IS_SRGB"] = constt(ShaderLanguage::TYPE_BOOL); - shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["CLIP_SPACE_FAR"] = constt(ShaderLanguage::TYPE_FLOAT); shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["MAIN_CAM_INV_VIEW_MATRIX"] = constt(ShaderLanguage::TYPE_MAT4); shader_modes[RS::SHADER_SPATIAL].functions["vertex"].built_ins["NODE_POSITION_WORLD"] = constt(ShaderLanguage::TYPE_VEC3); @@ -159,9 +159,6 @@ ShaderTypes::ShaderTypes() { shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["VIEW_RIGHT"] = constt(ShaderLanguage::TYPE_INT); shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["EYE_OFFSET"] = constt(ShaderLanguage::TYPE_VEC3); - shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["OUTPUT_IS_SRGB"] = constt(ShaderLanguage::TYPE_BOOL); - shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["CLIP_SPACE_FAR"] = constt(ShaderLanguage::TYPE_FLOAT); - shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["MODEL_MATRIX"] = constt(ShaderLanguage::TYPE_MAT4); shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["MODEL_NORMAL_MATRIX"] = constt(ShaderLanguage::TYPE_MAT3); shader_modes[RS::SHADER_SPATIAL].functions["fragment"].built_ins["VIEW_MATRIX"] = constt(ShaderLanguage::TYPE_MAT4); @@ -203,8 +200,6 @@ ShaderTypes::ShaderTypes() { shader_modes[RS::SHADER_SPATIAL].functions["light"].built_ins["ROUGHNESS"] = constt(ShaderLanguage::TYPE_FLOAT); shader_modes[RS::SHADER_SPATIAL].functions["light"].built_ins["DIFFUSE_LIGHT"] = ShaderLanguage::TYPE_VEC3; shader_modes[RS::SHADER_SPATIAL].functions["light"].built_ins["SPECULAR_LIGHT"] = ShaderLanguage::TYPE_VEC3; - shader_modes[RS::SHADER_SPATIAL].functions["light"].built_ins["OUTPUT_IS_SRGB"] = constt(ShaderLanguage::TYPE_BOOL); - shader_modes[RS::SHADER_SPATIAL].functions["light"].built_ins["CLIP_SPACE_FAR"] = constt(ShaderLanguage::TYPE_FLOAT); shader_modes[RS::SHADER_SPATIAL].functions["light"].built_ins["ALPHA"] = ShaderLanguage::TYPE_FLOAT; shader_modes[RS::SHADER_SPATIAL].functions["light"].can_discard = true; @@ -284,6 +279,7 @@ ShaderTypes::ShaderTypes() { { ShaderLanguage::StageFunctionInfo func; + func.skip_function = "vertex"; func.arguments.push_back(ShaderLanguage::StageFunctionInfo::Argument("sdf_pos", ShaderLanguage::TYPE_VEC2)); func.return_type = ShaderLanguage::TYPE_FLOAT; //whether it could emit shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].stage_functions["texture_sdf"] = func; @@ -297,6 +293,7 @@ ShaderTypes::ShaderTypes() { { ShaderLanguage::StageFunctionInfo func; + func.skip_function = "vertex"; func.arguments.push_back(ShaderLanguage::StageFunctionInfo::Argument("uv", ShaderLanguage::TYPE_VEC2)); func.return_type = ShaderLanguage::TYPE_VEC2; //whether it could emit shader_modes[RS::SHADER_CANVAS_ITEM].functions["fragment"].stage_functions["screen_uv_to_sdf"] = func; diff --git a/servers/rendering/storage/light_storage.h b/servers/rendering/storage/light_storage.h index 6a0adfa596..1e149e3a97 100644 --- a/servers/rendering/storage/light_storage.h +++ b/servers/rendering/storage/light_storage.h @@ -59,6 +59,8 @@ public: virtual void light_set_cull_mask(RID p_light, uint32_t p_mask) = 0; virtual void light_set_distance_fade(RID p_light, bool p_enabled, float p_begin, float p_shadow, float p_length) = 0; virtual void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) = 0; + virtual void light_set_shadow_caster_mask(RID p_light, uint32_t p_caster_mask) = 0; + virtual uint32_t light_get_shadow_caster_mask(RID p_light) const = 0; virtual void light_set_bake_mode(RID p_light, RS::LightBakeMode p_bake_mode) = 0; virtual void light_set_max_sdfgi_cascade(RID p_light, uint32_t p_cascade) = 0; diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index 32ef5261f3..53dda24dc3 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -2479,6 +2479,7 @@ void RenderingServer::_bind_methods() { ClassDB::bind_method(D_METHOD("light_set_cull_mask", "light", "mask"), &RenderingServer::light_set_cull_mask); ClassDB::bind_method(D_METHOD("light_set_distance_fade", "decal", "enabled", "begin", "shadow", "length"), &RenderingServer::light_set_distance_fade); ClassDB::bind_method(D_METHOD("light_set_reverse_cull_face_mode", "light", "enabled"), &RenderingServer::light_set_reverse_cull_face_mode); + ClassDB::bind_method(D_METHOD("light_set_shadow_caster_mask", "light", "mask"), &RenderingServer::light_set_shadow_caster_mask); ClassDB::bind_method(D_METHOD("light_set_bake_mode", "light", "bake_mode"), &RenderingServer::light_set_bake_mode); ClassDB::bind_method(D_METHOD("light_set_max_sdfgi_cascade", "light", "cascade"), &RenderingServer::light_set_max_sdfgi_cascade); diff --git a/servers/rendering_server.h b/servers/rendering_server.h index 0208a640a5..6c1e1274d4 100644 --- a/servers/rendering_server.h +++ b/servers/rendering_server.h @@ -542,6 +542,7 @@ public: virtual void light_set_cull_mask(RID p_light, uint32_t p_mask) = 0; virtual void light_set_distance_fade(RID p_light, bool p_enabled, float p_begin, float p_shadow, float p_length) = 0; virtual void light_set_reverse_cull_face_mode(RID p_light, bool p_enabled) = 0; + virtual void light_set_shadow_caster_mask(RID p_light, uint32_t p_caster_mask) = 0; enum LightBakeMode { LIGHT_BAKE_DISABLED, diff --git a/servers/text_server.h b/servers/text_server.h index ba3fdaa35e..7dd9669818 100644 --- a/servers/text_server.h +++ b/servers/text_server.h @@ -31,6 +31,7 @@ #ifndef TEXT_SERVER_H #define TEXT_SERVER_H +#include "core/io/image.h" #include "core/object/ref_counted.h" #include "core/os/os.h" #include "core/templates/rid.h" diff --git a/tests/core/string/test_string.h b/tests/core/string/test_string.h index 8559737e74..8d6137cf62 100644 --- a/tests/core/string/test_string.h +++ b/tests/core/string/test_string.h @@ -460,11 +460,27 @@ TEST_CASE("[String] Number to string") { CHECK(String::num(-0.0) == "-0"); // Includes sign even for zero. CHECK(String::num(3.141593) == "3.141593"); CHECK(String::num(3.141593, 3) == "3.142"); + CHECK(String::num(42.100023, 4) == "42.1"); // No trailing zeros. CHECK(String::num_scientific(30000000) == "3e+07"); + + // String::num_int64 tests. CHECK(String::num_int64(3141593) == "3141593"); + CHECK(String::num_int64(-3141593) == "-3141593"); CHECK(String::num_int64(0xA141593, 16) == "a141593"); CHECK(String::num_int64(0xA141593, 16, true) == "A141593"); - CHECK(String::num(42.100023, 4) == "42.1"); // No trailing zeros. + ERR_PRINT_OFF; + CHECK(String::num_int64(3141593, 1) == ""); // Invalid base < 2. + CHECK(String::num_int64(3141593, 37) == ""); // Invalid base > 36. + ERR_PRINT_ON; + + // String::num_uint64 tests. + CHECK(String::num_uint64(4294967295) == "4294967295"); + CHECK(String::num_uint64(0xF141593, 16) == "f141593"); + CHECK(String::num_uint64(0xF141593, 16, true) == "F141593"); + ERR_PRINT_OFF; + CHECK(String::num_uint64(4294967295, 1) == ""); // Invalid base < 2. + CHECK(String::num_uint64(4294967295, 37) == ""); // Invalid base > 36. + ERR_PRINT_ON; // String::num_real tests. CHECK(String::num_real(1.0) == "1.0"); diff --git a/tests/core/templates/test_a_hash_map.h b/tests/core/templates/test_a_hash_map.h new file mode 100644 index 0000000000..e67ee7b441 --- /dev/null +++ b/tests/core/templates/test_a_hash_map.h @@ -0,0 +1,295 @@ +/**************************************************************************/ +/* test_a_hash_map.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef TEST_A_HASH_MAP_H +#define TEST_A_HASH_MAP_H + +#include "core/templates/a_hash_map.h" + +#include "tests/test_macros.h" + +namespace TestAHashMap { + +TEST_CASE("[AHashMap] Insert element") { + AHashMap<int, int> map; + AHashMap<int, int>::Iterator e = map.insert(42, 84); + + CHECK(e); + CHECK(e->key == 42); + CHECK(e->value == 84); + CHECK(map[42] == 84); + CHECK(map.has(42)); + CHECK(map.find(42)); +} + +TEST_CASE("[AHashMap] Overwrite element") { + AHashMap<int, int> map; + map.insert(42, 84); + map.insert(42, 1234); + + CHECK(map[42] == 1234); +} + +TEST_CASE("[AHashMap] Erase via element") { + AHashMap<int, int> map; + AHashMap<int, int>::Iterator e = map.insert(42, 84); + map.remove(e); + CHECK(!map.has(42)); + CHECK(!map.find(42)); +} + +TEST_CASE("[AHashMap] Erase via key") { + AHashMap<int, int> map; + map.insert(42, 84); + map.erase(42); + CHECK(!map.has(42)); + CHECK(!map.find(42)); +} + +TEST_CASE("[AHashMap] Size") { + AHashMap<int, int> map; + map.insert(42, 84); + map.insert(123, 84); + map.insert(123, 84); + map.insert(0, 84); + map.insert(123485, 84); + + CHECK(map.size() == 4); +} + +TEST_CASE("[AHashMap] Iteration") { + AHashMap<int, int> map; + + map.insert(42, 84); + map.insert(123, 12385); + map.insert(0, 12934); + map.insert(123485, 1238888); + map.insert(123, 111111); + + Vector<Pair<int, int>> expected; + expected.push_back(Pair<int, int>(42, 84)); + expected.push_back(Pair<int, int>(123, 111111)); + expected.push_back(Pair<int, int>(0, 12934)); + expected.push_back(Pair<int, int>(123485, 1238888)); + + int idx = 0; + for (const KeyValue<int, int> &E : map) { + CHECK(expected[idx] == Pair<int, int>(E.key, E.value)); + idx++; + } + + idx--; + for (AHashMap<int, int>::Iterator it = map.last(); it; --it) { + CHECK(expected[idx] == Pair<int, int>(it->key, it->value)); + idx--; + } +} + +TEST_CASE("[AHashMap] Const iteration") { + AHashMap<int, int> map; + map.insert(42, 84); + map.insert(123, 12385); + map.insert(0, 12934); + map.insert(123485, 1238888); + map.insert(123, 111111); + + const AHashMap<int, int> const_map = map; + + Vector<Pair<int, int>> expected; + expected.push_back(Pair<int, int>(42, 84)); + expected.push_back(Pair<int, int>(123, 111111)); + expected.push_back(Pair<int, int>(0, 12934)); + expected.push_back(Pair<int, int>(123485, 1238888)); + expected.push_back(Pair<int, int>(123, 111111)); + + int idx = 0; + for (const KeyValue<int, int> &E : const_map) { + CHECK(expected[idx] == Pair<int, int>(E.key, E.value)); + idx++; + } + + idx--; + for (AHashMap<int, int>::ConstIterator it = const_map.last(); it; --it) { + CHECK(expected[idx] == Pair<int, int>(it->key, it->value)); + idx--; + } +} + +TEST_CASE("[AHashMap] Replace key") { + AHashMap<int, int> map; + map.insert(42, 84); + map.insert(0, 12934); + CHECK(map.replace_key(0, 1)); + CHECK(map.has(1)); + CHECK(map[1] == 12934); +} + +TEST_CASE("[AHashMap] Clear") { + AHashMap<int, int> map; + map.insert(42, 84); + map.insert(123, 12385); + map.insert(0, 12934); + + map.clear(); + CHECK(!map.has(42)); + CHECK(map.size() == 0); + CHECK(map.is_empty()); +} + +TEST_CASE("[AHashMap] Get") { + AHashMap<int, int> map; + map.insert(42, 84); + map.insert(123, 12385); + map.insert(0, 12934); + + CHECK(map.get(123) == 12385); + map.get(123) = 10; + CHECK(map.get(123) == 10); + + CHECK(*map.getptr(0) == 12934); + *map.getptr(0) = 1; + CHECK(*map.getptr(0) == 1); + + CHECK(map.get(42) == 84); + CHECK(map.getptr(-10) == nullptr); +} + +TEST_CASE("[AHashMap] Insert, iterate and remove many elements") { + const int elem_max = 1234; + AHashMap<int, int> map; + for (int i = 0; i < elem_max; i++) { + map.insert(i, i); + } + + //insert order should have been kept + int idx = 0; + for (auto &K : map) { + CHECK(idx == K.key); + CHECK(idx == K.value); + CHECK(map.has(idx)); + idx++; + } + + Vector<int> elems_still_valid; + + for (int i = 0; i < elem_max; i++) { + if ((i % 5) == 0) { + map.erase(i); + } else { + elems_still_valid.push_back(i); + } + } + + CHECK(elems_still_valid.size() == map.size()); + + for (int i = 0; i < elems_still_valid.size(); i++) { + CHECK(map.has(elems_still_valid[i])); + } +} + +TEST_CASE("[AHashMap] Insert, iterate and remove many strings") { + const int elem_max = 432; + AHashMap<String, String> map; + for (int i = 0; i < elem_max; i++) { + map.insert(itos(i), itos(i)); + } + + //insert order should have been kept + int idx = 0; + for (auto &K : map) { + CHECK(itos(idx) == K.key); + CHECK(itos(idx) == K.value); + CHECK(map.has(itos(idx))); + idx++; + } + + Vector<String> elems_still_valid; + + for (int i = 0; i < elem_max; i++) { + if ((i % 5) == 0) { + map.erase(itos(i)); + } else { + elems_still_valid.push_back(itos(i)); + } + } + + CHECK(elems_still_valid.size() == map.size()); + + for (int i = 0; i < elems_still_valid.size(); i++) { + CHECK(map.has(elems_still_valid[i])); + } + + elems_still_valid.clear(); +} + +TEST_CASE("[AHashMap] Copy constructor") { + AHashMap<int, int> map0; + const uint32_t count = 5; + for (uint32_t i = 0; i < count; i++) { + map0.insert(i, i); + } + AHashMap<int, int> map1(map0); + CHECK(map0.size() == map1.size()); + CHECK(map0.get_capacity() == map1.get_capacity()); + CHECK(*map0.getptr(0) == *map1.getptr(0)); +} + +TEST_CASE("[AHashMap] Operator =") { + AHashMap<int, int> map0; + AHashMap<int, int> map1; + const uint32_t count = 5; + map1.insert(1234, 1234); + for (uint32_t i = 0; i < count; i++) { + map0.insert(i, i); + } + map1 = map0; + CHECK(map0.size() == map1.size()); + CHECK(map0.get_capacity() == map1.get_capacity()); + CHECK(*map0.getptr(0) == *map1.getptr(0)); +} + +TEST_CASE("[AHashMap] Array methods") { + AHashMap<int, int> map; + for (int i = 0; i < 100; i++) { + map.insert(100 - i, i); + } + for (int i = 0; i < 100; i++) { + CHECK(map.get_by_index(i).value == i); + } + int index = map.get_index(1); + CHECK(map.get_by_index(index).value == 99); + CHECK(map.erase_by_index(index)); + CHECK(!map.erase_by_index(index)); + CHECK(map.get_index(1) == -1); +} + +} // namespace TestAHashMap + +#endif // TEST_A_HASH_MAP_H diff --git a/tests/core/variant/test_variant.h b/tests/core/variant/test_variant.h index be615975f8..599a282b20 100644 --- a/tests/core/variant/test_variant.h +++ b/tests/core/variant/test_variant.h @@ -1806,6 +1806,14 @@ TEST_CASE("[Variant] Writer and parser dictionary") { CHECK_MESSAGE(d_parsed == Variant(d), "Should parse back."); } +TEST_CASE("[Variant] Writer key sorting") { + Dictionary d = build_dictionary(StringName("C"), 3, "A", 1, StringName("B"), 2, "D", 4); + String d_str; + VariantWriter::write_to_string(d, d_str); + + CHECK_EQ(d_str, "{\n\"A\": 1,\n&\"B\": 2,\n&\"C\": 3,\n\"D\": 4\n}"); +} + TEST_CASE("[Variant] Writer recursive dictionary") { // There is no way to accurately represent a recursive dictionary, // the only thing we can do is make sure the writer doesn't blow up diff --git a/tests/scene/test_tree.h b/tests/scene/test_tree.h index e19f8311e2..a74158d328 100644 --- a/tests/scene/test_tree.h +++ b/tests/scene/test_tree.h @@ -139,18 +139,30 @@ TEST_CASE("[SceneTree][Tree]") { TreeItem *child1 = tree->create_item(); TreeItem *child2 = tree->create_item(); TreeItem *child3 = tree->create_item(); + CHECK_EQ(root->get_next(), nullptr); + CHECK_EQ(root->get_next_visible(), child1); + CHECK_EQ(root->get_next_in_tree(), child1); CHECK_EQ(child1->get_next(), child2); + CHECK_EQ(child1->get_next_visible(), child2); CHECK_EQ(child1->get_next_in_tree(), child2); CHECK_EQ(child2->get_next(), child3); + CHECK_EQ(child2->get_next_visible(), child3); CHECK_EQ(child2->get_next_in_tree(), child3); CHECK_EQ(child3->get_next(), nullptr); + CHECK_EQ(child3->get_next_visible(), nullptr); CHECK_EQ(child3->get_next_in_tree(), nullptr); + CHECK_EQ(root->get_prev(), nullptr); + CHECK_EQ(root->get_prev_visible(), nullptr); + CHECK_EQ(root->get_prev_in_tree(), nullptr); CHECK_EQ(child1->get_prev(), nullptr); + CHECK_EQ(child1->get_prev_visible(), root); CHECK_EQ(child1->get_prev_in_tree(), root); CHECK_EQ(child2->get_prev(), child1); + CHECK_EQ(child2->get_prev_visible(), child1); CHECK_EQ(child2->get_prev_in_tree(), child1); CHECK_EQ(child3->get_prev(), child2); + CHECK_EQ(child3->get_prev_visible(), child2); CHECK_EQ(child3->get_prev_in_tree(), child2); TreeItem *nested1 = tree->create_item(child2); @@ -158,13 +170,127 @@ TEST_CASE("[SceneTree][Tree]") { TreeItem *nested3 = tree->create_item(child2); CHECK_EQ(child1->get_next(), child2); + CHECK_EQ(child1->get_next_visible(), child2); CHECK_EQ(child1->get_next_in_tree(), child2); CHECK_EQ(child2->get_next(), child3); + CHECK_EQ(child2->get_next_visible(), nested1); CHECK_EQ(child2->get_next_in_tree(), nested1); CHECK_EQ(child3->get_prev(), child2); + CHECK_EQ(child3->get_prev_visible(), nested3); CHECK_EQ(child3->get_prev_in_tree(), nested3); CHECK_EQ(nested1->get_prev_in_tree(), child2); CHECK_EQ(nested1->get_next_in_tree(), nested2); + CHECK_EQ(nested3->get_next_in_tree(), child3); + + memdelete(tree); + } + + SUBCASE("[Tree] Previous and Next items with hide root.") { + Tree *tree = memnew(Tree); + tree->set_hide_root(true); + TreeItem *root = tree->create_item(); + + TreeItem *child1 = tree->create_item(); + TreeItem *child2 = tree->create_item(); + TreeItem *child3 = tree->create_item(); + CHECK_EQ(root->get_next(), nullptr); + CHECK_EQ(root->get_next_visible(), child1); + CHECK_EQ(root->get_next_in_tree(), child1); + CHECK_EQ(child1->get_next(), child2); + CHECK_EQ(child1->get_next_visible(), child2); + CHECK_EQ(child1->get_next_in_tree(), child2); + CHECK_EQ(child2->get_next(), child3); + CHECK_EQ(child2->get_next_visible(), child3); + CHECK_EQ(child2->get_next_in_tree(), child3); + CHECK_EQ(child3->get_next(), nullptr); + CHECK_EQ(child3->get_next_visible(), nullptr); + CHECK_EQ(child3->get_next_in_tree(), nullptr); + + CHECK_EQ(root->get_prev(), nullptr); + CHECK_EQ(root->get_prev_visible(), nullptr); + CHECK_EQ(root->get_prev_in_tree(), nullptr); + CHECK_EQ(child1->get_prev(), nullptr); + CHECK_EQ(child1->get_prev_visible(), nullptr); + CHECK_EQ(child1->get_prev_in_tree(), nullptr); + CHECK_EQ(child2->get_prev(), child1); + CHECK_EQ(child2->get_prev_visible(), child1); + CHECK_EQ(child2->get_prev_in_tree(), child1); + CHECK_EQ(child3->get_prev(), child2); + CHECK_EQ(child3->get_prev_visible(), child2); + CHECK_EQ(child3->get_prev_in_tree(), child2); + + memdelete(tree); + } + + SUBCASE("[Tree] Previous and Next items wrapping.") { + Tree *tree = memnew(Tree); + TreeItem *root = tree->create_item(); + + TreeItem *child1 = tree->create_item(); + TreeItem *child2 = tree->create_item(); + TreeItem *child3 = tree->create_item(); + CHECK_EQ(root->get_next_visible(true), child1); + CHECK_EQ(root->get_next_in_tree(true), child1); + CHECK_EQ(child1->get_next_visible(true), child2); + CHECK_EQ(child1->get_next_in_tree(true), child2); + CHECK_EQ(child2->get_next_visible(true), child3); + CHECK_EQ(child2->get_next_in_tree(true), child3); + CHECK_EQ(child3->get_next_visible(true), root); + CHECK_EQ(child3->get_next_in_tree(true), root); + + CHECK_EQ(root->get_prev_visible(true), child3); + CHECK_EQ(root->get_prev_in_tree(true), child3); + CHECK_EQ(child1->get_prev_visible(true), root); + CHECK_EQ(child1->get_prev_in_tree(true), root); + CHECK_EQ(child2->get_prev_visible(true), child1); + CHECK_EQ(child2->get_prev_in_tree(true), child1); + CHECK_EQ(child3->get_prev_visible(true), child2); + CHECK_EQ(child3->get_prev_in_tree(true), child2); + + TreeItem *nested1 = tree->create_item(child2); + TreeItem *nested2 = tree->create_item(child2); + TreeItem *nested3 = tree->create_item(child2); + + CHECK_EQ(child1->get_next_visible(true), child2); + CHECK_EQ(child1->get_next_in_tree(true), child2); + CHECK_EQ(child2->get_next_visible(true), nested1); + CHECK_EQ(child2->get_next_in_tree(true), nested1); + CHECK_EQ(nested3->get_next_visible(true), child3); + CHECK_EQ(nested3->get_next_in_tree(true), child3); + CHECK_EQ(child3->get_prev_visible(true), nested3); + CHECK_EQ(child3->get_prev_in_tree(true), nested3); + CHECK_EQ(nested1->get_prev_in_tree(true), child2); + CHECK_EQ(nested1->get_next_in_tree(true), nested2); + CHECK_EQ(nested3->get_next_in_tree(true), child3); + + memdelete(tree); + } + + SUBCASE("[Tree] Previous and Next items wrapping with hide root.") { + Tree *tree = memnew(Tree); + tree->set_hide_root(true); + TreeItem *root = tree->create_item(); + + TreeItem *child1 = tree->create_item(); + TreeItem *child2 = tree->create_item(); + TreeItem *child3 = tree->create_item(); + CHECK_EQ(root->get_next_visible(true), child1); + CHECK_EQ(root->get_next_in_tree(true), child1); + CHECK_EQ(child1->get_next_visible(true), child2); + CHECK_EQ(child1->get_next_in_tree(true), child2); + CHECK_EQ(child2->get_next_visible(true), child3); + CHECK_EQ(child2->get_next_in_tree(true), child3); + CHECK_EQ(child3->get_next_visible(true), root); + CHECK_EQ(child3->get_next_in_tree(true), root); + + CHECK_EQ(root->get_prev_visible(true), child3); + CHECK_EQ(root->get_prev_in_tree(true), child3); + CHECK_EQ(child1->get_prev_visible(true), child3); + CHECK_EQ(child1->get_prev_in_tree(true), child3); + CHECK_EQ(child2->get_prev_visible(true), child1); + CHECK_EQ(child2->get_prev_in_tree(true), child1); + CHECK_EQ(child3->get_prev_visible(true), child2); + CHECK_EQ(child3->get_prev_in_tree(true), child2); memdelete(tree); } diff --git a/tests/test_main.cpp b/tests/test_main.cpp index 465484d605..979aee8001 100644 --- a/tests/test_main.cpp +++ b/tests/test_main.cpp @@ -86,6 +86,7 @@ #include "tests/core/string/test_string.h" #include "tests/core/string/test_translation.h" #include "tests/core/string/test_translation_server.h" +#include "tests/core/templates/test_a_hash_map.h" #include "tests/core/templates/test_command_queue.h" #include "tests/core/templates/test_hash_map.h" #include "tests/core/templates/test_hash_set.h" diff --git a/thirdparty/README.md b/thirdparty/README.md index 58226261f4..2ce82e82df 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -59,12 +59,13 @@ Files extracted from upstream source: ## basis_universal - Upstream: https://github.com/BinomialLLC/basis_universal -- Version: 1.16.4 (900e40fb5d2502927360fe2f31762bdbb624455f, 2023) +- Version: 1.50.0 (051ad6d8a64bb95a79e8601c317055fd1782ad3e, 2024) - License: Apache 2.0 Files extracted from upstream source: -- `encoder/` and `transcoder/` folders, minus `jpgd.{cpp,h}` +- `encoder/` and `transcoder/` folders, with the following files removed from `encoder`: + `jpgd.{cpp,h}`, `3rdparty/{qoi.h,tinydds.h,tinyexr.cpp,tinyexr.h}` - `LICENSE` Applied upstream PR https://github.com/BinomialLLC/basis_universal/pull/344 to @@ -78,7 +79,7 @@ fix build with our own copy of zstd (patch in `patches`). Files extracted from upstream source: -- `bc6h.glsl`, `bc1.glsl`, `CrossPlatformSettings_piece_all.glsl` and `UavCrossPlatform_piece_all.glsl`. +- `bc6h.glsl`, `bc1.glsl`, `bc4.glsl`, `CrossPlatformSettings_piece_all.glsl` and `UavCrossPlatform_piece_all.glsl`. - `LICENSE.md` diff --git a/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp new file mode 100644 index 0000000000..5abfe2faf9 --- /dev/null +++ b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp @@ -0,0 +1,2052 @@ +// File: android_astc_decomp.cpp + +/*------------------------------------------------------------------------- + * drawElements Quality Program Tester Core + * ---------------------------------------- + * + * Copyright 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * rg: Removed external dependencies, minor fix to decompress() so it converts non-sRGB + * output to 8-bits correctly. I've compared this decoder's output + * vs. astc-codec with random inputs. + * + *//*! + * \file + * \brief ASTC Utilities. + *//*--------------------------------------------------------------------*/ +#include "android_astc_decomp.h" +#include <assert.h> +#include <algorithm> +#include <fenv.h> +#include <math.h> + +#define DE_LENGTH_OF_ARRAY(x) (sizeof(x)/sizeof(x[0])) +#define DE_UNREF(x) (void)x + +typedef uint8_t deUint8; +typedef int8_t deInt8; +typedef uint32_t deUint32; +typedef int32_t deInt32; +typedef uint16_t deUint16; +typedef int16_t deInt16; +typedef int64_t deInt64; +typedef uint64_t deUint64; + +#define DE_ASSERT assert + +#ifdef _MSC_VER +#pragma warning (disable:4505) // unreferenced local function has been removed +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +namespace basisu_astc +{ + template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; } + template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } + + static bool inBounds(int v, int l, int h) + { + return (v >= l) && (v < h); + } + + static bool inRange(int v, int l, int h) + { + return (v >= l) && (v <= h); + } + + template<typename T> + static inline T max(T a, T b) + { + return (a > b) ? a : b; + } + + template<typename T> + static inline T min(T a, T b) + { + return (a < b) ? a : b; + } + + template<typename T> + static inline T clamp(T a, T l, T h) + { + if (a < l) + return l; + else if (a > h) + return h; + return a; + } + + struct UVec4 + { + uint32_t m_c[4]; + + UVec4() + { + m_c[0] = 0; + m_c[1] = 0; + m_c[2] = 0; + m_c[3] = 0; + } + + UVec4(uint32_t x, uint32_t y, uint32_t z, uint32_t w) + { + m_c[0] = x; + m_c[1] = y; + m_c[2] = z; + m_c[3] = w; + } + + uint32_t x() const { return m_c[0]; } + uint32_t y() const { return m_c[1]; } + uint32_t z() const { return m_c[2]; } + uint32_t w() const { return m_c[3]; } + + uint32_t& x() { return m_c[0]; } + uint32_t& y() { return m_c[1]; } + uint32_t& z() { return m_c[2]; } + uint32_t& w() { return m_c[3]; } + + uint32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; } + uint32_t& operator[] (uint32_t idx) { assert(idx < 4); return m_c[idx]; } + }; + + struct IVec4 + { + int32_t m_c[4]; + + IVec4() + { + m_c[0] = 0; + m_c[1] = 0; + m_c[2] = 0; + m_c[3] = 0; + } + + IVec4(int32_t x, int32_t y, int32_t z, int32_t w) + { + m_c[0] = x; + m_c[1] = y; + m_c[2] = z; + m_c[3] = w; + } + + int32_t x() const { return m_c[0]; } + int32_t y() const { return m_c[1]; } + int32_t z() const { return m_c[2]; } + int32_t w() const { return m_c[3]; } + + int32_t& x() { return m_c[0]; } + int32_t& y() { return m_c[1]; } + int32_t& z() { return m_c[2]; } + int32_t& w() { return m_c[3]; } + + UVec4 asUint() const + { + return UVec4(maximum(0, m_c[0]), maximum(0, m_c[1]), maximum(0, m_c[2]), maximum(0, m_c[3])); + } + + int32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; } + int32_t& operator[] (uint32_t idx) { assert(idx < 4); return m_c[idx]; } + }; + + struct IVec3 + { + int32_t m_c[3]; + + IVec3() + { + m_c[0] = 0; + m_c[1] = 0; + m_c[2] = 0; + } + + IVec3(int32_t x, int32_t y, int32_t z) + { + m_c[0] = x; + m_c[1] = y; + m_c[2] = z; + } + + int32_t x() const { return m_c[0]; } + int32_t y() const { return m_c[1]; } + int32_t z() const { return m_c[2]; } + + int32_t& x() { return m_c[0]; } + int32_t& y() { return m_c[1]; } + int32_t& z() { return m_c[2]; } + + int32_t operator[] (uint32_t idx) const { assert(idx < 3); return m_c[idx]; } + int32_t& operator[] (uint32_t idx) { assert(idx < 3); return m_c[idx]; } + }; + + static uint32_t deDivRoundUp32(uint32_t a, uint32_t b) + { + return (a + b - 1) / b; + } + + static bool deInBounds32(uint32_t v, uint32_t l, uint32_t h) + { + return (v >= l) && (v < h); + } + +namespace astc +{ + +using std::vector; + +namespace +{ + +// Common utilities +enum +{ + MAX_BLOCK_WIDTH = 12, + MAX_BLOCK_HEIGHT = 12 +}; + +inline deUint32 getBit (deUint32 src, int ndx) +{ + DE_ASSERT(basisu_astc::inBounds(ndx, 0, 32)); + return (src >> ndx) & 1; +} + +inline deUint32 getBits (deUint32 src, int low, int high) +{ + const int numBits = (high-low) + 1; + DE_ASSERT(basisu_astc::inRange(numBits, 1, 32)); + + if (numBits < 32) + return (deUint32)((src >> low) & ((1u<<numBits)-1)); + else + return (deUint32)((src >> low) & 0xFFFFFFFFu); +} + +inline bool isBitSet (deUint32 src, int ndx) +{ + return getBit(src, ndx) != 0; +} + +inline deUint32 reverseBits (deUint32 src, int numBits) +{ + DE_ASSERT(basisu_astc::inRange(numBits, 0, 32)); + + deUint32 result = 0; + for (int i = 0; i < numBits; i++) + result |= ((src >> i) & 1) << (numBits-1-i); + + return result; +} + +inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits) +{ + DE_ASSERT(numSrcBits <= numDstBits); + DE_ASSERT((src & ((1<<numSrcBits)-1)) == src); + + deUint32 dst = 0; + for (int shift = numDstBits-numSrcBits; shift > -numSrcBits; shift -= numSrcBits) + dst |= (shift >= 0) ? (src << shift) : (src >> -shift); + + return dst; +} + +inline deInt32 signExtend (deInt32 src, int numSrcBits) +{ + DE_ASSERT(basisu_astc::inRange(numSrcBits, 2, 31)); + + const bool negative = (src & (1 << (numSrcBits-1))) != 0; + return src | (negative ? ~((1 << numSrcBits) - 1) : 0); +} + +typedef uint16_t deFloat16; + +inline bool isFloat16InfOrNan (deFloat16 v) +{ + return getBits(v, 10, 14) == 31; +} + +float deFloat16To32(deFloat16 val16) +{ + deUint32 sign; + deUint32 expotent; + deUint32 mantissa; + + union + { + float f; + deUint32 u; + } x; + + x.u = 0u; + + sign = ((deUint32)val16 >> 15u) & 0x00000001u; + expotent = ((deUint32)val16 >> 10u) & 0x0000001fu; + mantissa = (deUint32)val16 & 0x000003ffu; + + if (expotent == 0u) + { + if (mantissa == 0u) + { + /* +/- 0 */ + x.u = sign << 31u; + return x.f; + } + else + { + /* Denormalized, normalize it. */ + + while (!(mantissa & 0x00000400u)) + { + mantissa <<= 1u; + expotent -= 1u; + } + + expotent += 1u; + mantissa &= ~0x00000400u; + } + } + else if (expotent == 31u) + { + if (mantissa == 0u) + { + /* +/- InF */ + x.u = (sign << 31u) | 0x7f800000u; + return x.f; + } + else + { + /* +/- NaN */ + x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u); + return x.f; + } + } + + expotent = expotent + (127u - 15u); + mantissa = mantissa << 13u; + + x.u = (sign << 31u) | (expotent << 23u) | mantissa; + return x.f; +} + +enum ISEMode +{ + ISEMODE_TRIT = 0, + ISEMODE_QUINT, + ISEMODE_PLAIN_BIT, + ISEMODE_LAST +}; + +struct ISEParams +{ + ISEMode mode; + int numBits; + ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {} +}; + +inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues) +{ + switch (iseParams.mode) + { + case ISEMODE_TRIT: return deDivRoundUp32(numValues*8, 5) + numValues*iseParams.numBits; + case ISEMODE_QUINT: return deDivRoundUp32(numValues*7, 3) + numValues*iseParams.numBits; + case ISEMODE_PLAIN_BIT: return numValues*iseParams.numBits; + default: + DE_ASSERT(false); + return -1; + } +} + +ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence) +{ + int curBitsForTritMode = 6; + int curBitsForQuintMode = 5; + int curBitsForPlainBitMode = 8; + + while (true) + { + DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0); + const int tritRange = (curBitsForTritMode > 0) ? (3 << curBitsForTritMode) - 1 : -1; + const int quintRange = (curBitsForQuintMode > 0) ? (5 << curBitsForQuintMode) - 1 : -1; + const int plainBitRange = (curBitsForPlainBitMode > 0) ? (1 << curBitsForPlainBitMode) - 1 : -1; + const int maxRange = basisu_astc::max(basisu_astc::max(tritRange, quintRange), plainBitRange); + + if (maxRange == tritRange) + { + const ISEParams params(ISEMODE_TRIT, curBitsForTritMode); + + if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits) + return ISEParams(ISEMODE_TRIT, curBitsForTritMode); + + curBitsForTritMode--; + } + else if (maxRange == quintRange) + { + const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode); + + if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits) + return ISEParams(ISEMODE_QUINT, curBitsForQuintMode); + + curBitsForQuintMode--; + } + else + { + const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode); + DE_ASSERT(maxRange == plainBitRange); + + if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits) + return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode); + + curBitsForPlainBitMode--; + } + } +} + +inline int computeNumColorEndpointValues (deUint32 endpointMode) +{ + DE_ASSERT(endpointMode < 16); + return (endpointMode/4 + 1) * 2; +} + +// Decompression utilities +enum DecompressResult +{ + DECOMPRESS_RESULT_VALID_BLOCK = 0, //!< Decompressed valid block + DECOMPRESS_RESULT_ERROR, //!< Encountered error while decompressing, error color written + DECOMPRESS_RESULT_LAST +}; + +// A helper for getting bits from a 128-bit block. +class Block128 +{ +private: + typedef deUint64 Word; + + enum + { + WORD_BYTES = sizeof(Word), + WORD_BITS = 8*WORD_BYTES, + NUM_WORDS = 128 / WORD_BITS + }; + //DE_STATIC_ASSERT(128 % WORD_BITS == 0); + +public: + Block128 (const deUint8* src) + { + for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++) + { + m_words[wordNdx] = 0; + for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++) + m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx); + } + } + + deUint32 getBit (int ndx) const + { + DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128)); + return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1; + } + + deUint32 getBits (int low, int high) const + { + DE_ASSERT(basisu_astc::inBounds(low, 0, 128)); + DE_ASSERT(basisu_astc::inBounds(high, 0, 128)); + DE_ASSERT(basisu_astc::inRange(high-low+1, 0, 32)); + + if (high-low+1 == 0) + return 0; + + const int word0Ndx = low / WORD_BITS; + const int word1Ndx = high / WORD_BITS; + // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big. + if (word0Ndx == word1Ndx) + return (deUint32)((m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS)); + else + { + DE_ASSERT(word1Ndx == word0Ndx + 1); + return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) | + (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS)); + } + } + + bool isBitSet (int ndx) const + { + DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128)); + return getBit(ndx) != 0; + } + +private: + Word m_words[NUM_WORDS]; +}; + +// A helper for sequential access into a Block128. +class BitAccessStream +{ +public: + BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward) + : m_src (src) + , m_startNdxInSrc (startNdxInSrc) + , m_length (length) + , m_forward (forward) + , m_ndx (0) + { + } + + // Get the next num bits. Bits at positions greater than or equal to m_length are zeros. + deUint32 getNext (int num) + { + if (num == 0 || m_ndx >= m_length) + return 0; + const int end = m_ndx + num; + const int numBitsFromSrc = basisu_astc::max(0, basisu_astc::min(m_length, end) - m_ndx); + const int low = m_ndx; + const int high = m_ndx + numBitsFromSrc - 1; + + m_ndx += num; + + return m_forward ? m_src.getBits(m_startNdxInSrc + low, m_startNdxInSrc + high) + : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc); + } + +private: + const Block128& m_src; + const int m_startNdxInSrc; + const int m_length; + const bool m_forward; + int m_ndx; +}; + +struct ISEDecodedResult +{ + deUint32 m; + deUint32 tq; //!< Trit or quint value, depending on ISE mode. + deUint32 v; +}; + +// Data from an ASTC block's "block mode" part (i.e. bits [0,10]). +struct ASTCBlockMode +{ + bool isError; + // \note Following fields only relevant if !isError. + bool isVoidExtent; + // \note Following fields only relevant if !isVoidExtent. + bool isDualPlane; + int weightGridWidth; + int weightGridHeight; + ISEParams weightISEParams; + + ASTCBlockMode (void) + : isError (true) + , isVoidExtent (true) + , isDualPlane (true) + , weightGridWidth (-1) + , weightGridHeight (-1) + , weightISEParams (ISEMODE_LAST, -1) + { + } +}; + +inline int computeNumWeights (const ASTCBlockMode& mode) +{ + return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1); +} + +struct ColorEndpointPair +{ + UVec4 e0; + UVec4 e1; +}; + +struct TexelWeightPair +{ + deUint32 w[2]; +}; + +ASTCBlockMode getASTCBlockMode (deUint32 blockModeData) +{ + ASTCBlockMode blockMode; + blockMode.isError = true; // \note Set to false later, if not error. + blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc; + if (!blockMode.isVoidExtent) + { + if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0) + return blockMode; // Invalid ("reserved"). + + deUint32 r = (deUint32)-1; // \note Set in the following branches. + + if (getBits(blockModeData, 0, 1) == 0) + { + const deUint32 r0 = getBit(blockModeData, 4); + const deUint32 r1 = getBit(blockModeData, 2); + const deUint32 r2 = getBit(blockModeData, 3); + const deUint32 i78 = getBits(blockModeData, 7, 8); + + r = (r2 << 2) | (r1 << 1) | (r0 << 0); + + if (i78 == 3) + { + const bool i5 = isBitSet(blockModeData, 5); + blockMode.weightGridWidth = i5 ? 10 : 6; + blockMode.weightGridHeight = i5 ? 6 : 10; + } + else + { + const deUint32 a = getBits(blockModeData, 5, 6); + + switch (i78) + { + case 0: blockMode.weightGridWidth = 12; blockMode.weightGridHeight = a + 2; break; + case 1: blockMode.weightGridWidth = a + 2; blockMode.weightGridHeight = 12; break; + case 2: blockMode.weightGridWidth = a + 6; blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6; break; + default: DE_ASSERT(false); + } + } + } + else + { + const deUint32 r0 = getBit(blockModeData, 4); + const deUint32 r1 = getBit(blockModeData, 0); + const deUint32 r2 = getBit(blockModeData, 1); + const deUint32 i23 = getBits(blockModeData, 2, 3); + const deUint32 a = getBits(blockModeData, 5, 6); + + r = (r2 << 2) | (r1 << 1) | (r0 << 0); + if (i23 == 3) + { + const deUint32 b = getBit(blockModeData, 7); + const bool i8 = isBitSet(blockModeData, 8); + blockMode.weightGridWidth = i8 ? b+2 : a+2; + blockMode.weightGridHeight = i8 ? a+2 : b+6; + } + else + { + const deUint32 b = getBits(blockModeData, 7, 8); + switch (i23) + { + case 0: blockMode.weightGridWidth = b + 4; blockMode.weightGridHeight = a + 2; break; + case 1: blockMode.weightGridWidth = b + 8; blockMode.weightGridHeight = a + 2; break; + case 2: blockMode.weightGridWidth = a + 2; blockMode.weightGridHeight = b + 8; break; + default: DE_ASSERT(false); + } + } + } + + const bool zeroDH = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2; + const bool h = zeroDH ? 0 : isBitSet(blockModeData, 9); + blockMode.isDualPlane = zeroDH ? 0 : isBitSet(blockModeData, 10); + + { + ISEMode& m = blockMode.weightISEParams.mode; + int& b = blockMode.weightISEParams.numBits; + m = ISEMODE_PLAIN_BIT; + b = 0; + if (h) + { + switch (r) + { + case 2: m = ISEMODE_QUINT; b = 1; break; + case 3: m = ISEMODE_TRIT; b = 2; break; + case 4: b = 4; break; + case 5: m = ISEMODE_QUINT; b = 2; break; + case 6: m = ISEMODE_TRIT; b = 3; break; + case 7: b = 5; break; + default: DE_ASSERT(false); + } + } + else + { + switch (r) + { + case 2: b = 1; break; + case 3: m = ISEMODE_TRIT; break; + case 4: b = 2; break; + case 5: m = ISEMODE_QUINT; break; + case 6: m = ISEMODE_TRIT; b = 1; break; + case 7: b = 3; break; + default: DE_ASSERT(false); + } + } + } + } + + blockMode.isError = false; + return blockMode; +} + +inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB) +{ + if (isSRGB) + { + deUint8* const dstU = (deUint8*)dst; + for (int i = 0; i < blockWidth*blockHeight; i++) + { + dstU[4*i + 0] = 0xff; + dstU[4*i + 1] = 0; + dstU[4*i + 2] = 0xff; + dstU[4*i + 3] = 0xff; + } + } + else + { + float* const dstF = (float*)dst; + for (int i = 0; i < blockWidth*blockHeight; i++) + { + dstF[4*i + 0] = 1.0f; + dstF[4*i + 1] = 0.0f; + dstF[4*i + 2] = 1.0f; + dstF[4*i + 3] = 1.0f; + } + } +} + +DecompressResult decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode) +{ + const deUint32 minSExtent = blockData.getBits(12, 24); + const deUint32 maxSExtent = blockData.getBits(25, 37); + const deUint32 minTExtent = blockData.getBits(38, 50); + const deUint32 maxTExtent = blockData.getBits(51, 63); + const bool allExtentsAllOnes = (minSExtent == 0x1fff) && (maxSExtent == 0x1fff) && (minTExtent == 0x1fff) && (maxTExtent == 0x1fff); + const bool isHDRBlock = blockData.isBitSet(9); + + if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent))) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + const deUint32 rgba[4] = + { + blockData.getBits(64, 79), + blockData.getBits(80, 95), + blockData.getBits(96, 111), + blockData.getBits(112, 127) + }; + + if (isSRGB) + { + deUint8* const dstU = (deUint8*)dst; + for (int i = 0; i < blockWidth * blockHeight; i++) + { + for (int c = 0; c < 4; c++) + dstU[i * 4 + c] = (deUint8)((rgba[c] & 0xff00) >> 8); + } + } + else + { + float* const dstF = (float*)dst; + + if (isHDRBlock) + { + for (int c = 0; c < 4; c++) + { + if (isFloat16InfOrNan((deFloat16)rgba[c])) + { + //throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)"); + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + } + + for (int i = 0; i < blockWidth * blockHeight; i++) + { + for (int c = 0; c < 4; c++) + dstF[i * 4 + c] = deFloat16To32((deFloat16)rgba[c]); + } + } + else + { + for (int i = 0; i < blockWidth * blockHeight; i++) + { + for (int c = 0; c < 4; c++) + dstF[i * 4 + c] = (rgba[c] == 65535) ? 1.0f : ((float)rgba[c] / 65536.0f); + } + } + } + + return DECOMPRESS_RESULT_VALID_BLOCK; +} + +void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart) +{ + if (numPartitions == 1) + endpointModesDst[0] = blockData.getBits(13, 16); + else + { + const deUint32 highLevelSelector = blockData.getBits(23, 24); + + if (highLevelSelector == 0) + { + const deUint32 mode = blockData.getBits(25, 28); + + for (int i = 0; i < numPartitions; i++) + endpointModesDst[i] = mode; + } + else + { + for (int partNdx = 0; partNdx < numPartitions; partNdx++) + { + const deUint32 cemClass = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1); + const deUint32 lowBit0Ndx = numPartitions + 2*partNdx; + const deUint32 lowBit1Ndx = numPartitions + 2*partNdx + 1; + const deUint32 lowBit0 = blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4); + const deUint32 lowBit1 = blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4); + + endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0; + } + } + } +} + +int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions) +{ + int result = 0; + + for (int i = 0; i < numPartitions; i++) + result += computeNumColorEndpointValues(endpointModes[i]); + + return result; +} + +void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits) +{ + DE_ASSERT(basisu_astc::inRange(numValues, 1, 5)); + + deUint32 m[5]; + m[0] = data.getNext(numBits); + deUint32 T01 = data.getNext(2); + m[1] = data.getNext(numBits); + deUint32 T23 = data.getNext(2); + m[2] = data.getNext(numBits); + deUint32 T4 = data.getNext(1); + m[3] = data.getNext(numBits); + deUint32 T56 = data.getNext(2); + m[4] = data.getNext(numBits); + deUint32 T7 = data.getNext(1); + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough=" +#endif + switch (numValues) + { + // \note Fall-throughs. + case 1: T23 = 0; + case 2: T4 = 0; + case 3: T56 = 0; + case 4: T7 = 0; + case 5: break; + default: + DE_ASSERT(false); + } +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + + const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0); + + static const deUint32 tritsFromT[256][5] = + { + { 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 }, + { 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 }, + { 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 }, + { 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 }, + { 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 }, + { 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 }, + { 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 }, + { 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 }, + { 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 }, + { 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 }, + { 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 }, + { 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 }, + { 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 }, + { 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 }, + { 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 }, + { 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 } + }; + + const deUint32 (& trits)[5] = tritsFromT[T]; + for (int i = 0; i < numValues; i++) + { + dst[i].m = m[i]; + dst[i].tq = trits[i]; + dst[i].v = (trits[i] << numBits) + m[i]; + } +} + +void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits) +{ + DE_ASSERT(basisu_astc::inRange(numValues, 1, 3)); + + deUint32 m[3]; + m[0] = data.getNext(numBits); + deUint32 Q012 = data.getNext(3); + m[1] = data.getNext(numBits); + deUint32 Q34 = data.getNext(2); + m[2] = data.getNext(numBits); + deUint32 Q56 = data.getNext(2); + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough=" +#endif + switch (numValues) + { + // \note Fall-throughs. + case 1: Q34 = 0; + case 2: Q56 = 0; + case 3: break; + default: + DE_ASSERT(false); + } +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + + const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0); + + static const deUint32 quintsFromQ[256][3] = + { + { 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 }, + { 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 }, + { 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 }, + { 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 }, + { 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 }, + { 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 }, + { 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 }, + { 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 } + }; + + const deUint32 (& quints)[3] = quintsFromQ[Q]; + for (int i = 0; i < numValues; i++) + { + dst[i].m = m[i]; + dst[i].tq = quints[i]; + dst[i].v = (quints[i] << numBits) + m[i]; + } +} + +inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits) +{ + dst[0].m = data.getNext(numBits); + dst[0].v = dst[0].m; +} + +void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params) +{ + if (params.mode == ISEMODE_TRIT) + { + const int numBlocks = deDivRoundUp32(numValues, 5); + for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++) + { + const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5; + decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits); + } + } + else if (params.mode == ISEMODE_QUINT) + { + const int numBlocks = deDivRoundUp32(numValues, 3); + for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++) + { + const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3; + decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits); + } + } + else + { + DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT); + for (int i = 0; i < numValues; i++) + decodeISEBitBlock(&dst[i], data, params.numBits); + } +} + +void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams) +{ + if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT)) + { + const int rangeCase = iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1); + DE_ASSERT(basisu_astc::inRange(rangeCase, 0, 10)); + + static const deUint32 Ca[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 }; + const deUint32 C = Ca[rangeCase]; + + for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++) + { + const deUint32 a = getBit(iseResults[endpointNdx].m, 0); + const deUint32 b = getBit(iseResults[endpointNdx].m, 1); + const deUint32 c = getBit(iseResults[endpointNdx].m, 2); + const deUint32 d = getBit(iseResults[endpointNdx].m, 3); + const deUint32 e = getBit(iseResults[endpointNdx].m, 4); + const deUint32 f = getBit(iseResults[endpointNdx].m, 5); + const deUint32 A = (a == 0) ? 0 : (1<<9)-1; + + const deUint32 B = (rangeCase == 0) ? 0 + : (rangeCase == 1) ? 0 + : (rangeCase == 2) ? ((b << 8) | (b << 4) | (b << 2) | (b << 1)) + : (rangeCase == 3) ? ((b << 8) | (b << 3) | (b << 2)) + : (rangeCase == 4) ? ((c << 8) | (b << 7) | (c << 3) | (b << 2) | (c << 1) | (b << 0)) + : (rangeCase == 5) ? ((c << 8) | (b << 7) | (c << 2) | (b << 1) | (c << 0)) + : (rangeCase == 6) ? ((d << 8) | (c << 7) | (b << 6) | (d << 2) | (c << 1) | (b << 0)) + : (rangeCase == 7) ? ((d << 8) | (c << 7) | (b << 6) | (d << 1) | (c << 0)) + : (rangeCase == 8) ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 1) | (d << 0)) + : (rangeCase == 9) ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 0)) + : (rangeCase == 10) ? ((f << 8) | (e << 7) | (d << 6) | (c << 5) | (b << 4) | (f << 0)) + : (deUint32)-1; + + DE_ASSERT(B != (deUint32)-1); + dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80); + } + } + else + { + DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT); + for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++) + dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8); + } +} + +inline void bitTransferSigned (deInt32& a, deInt32& b) +{ + b >>= 1; + b |= a & 0x80; + a >>= 1; + a &= 0x3f; + if (isBitSet(a, 5)) + a -= 0x40; +} + +inline UVec4 clampedRGBA (const IVec4& rgba) +{ + return UVec4(basisu_astc::clamp(rgba.x(), 0, 0xff), + basisu_astc::clamp(rgba.y(), 0, 0xff), + basisu_astc::clamp(rgba.z(), 0, 0xff), + basisu_astc::clamp(rgba.w(), 0, 0xff)); +} + +inline IVec4 blueContract (int r, int g, int b, int a) +{ + return IVec4((r+b)>>1, (g+b)>>1, b, a); +} + +inline bool isColorEndpointModeHDR (deUint32 mode) +{ + return (mode == 2) || + (mode == 3) || + (mode == 7) || + (mode == 11) || + (mode == 14) || + (mode == 15); +} + +void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3) +{ + const deUint32 m10 = getBit(v1, 7) | (getBit(v2, 7) << 1); + const deUint32 m23 = getBits(v0, 6, 7); + + const deUint32 majComp = (m10 != 3) ? m10 + : (m23 != 3) ? m23 + : 0; + + const deUint32 mode = (m10 != 3) ? m23 + : (m23 != 3) ? 4 + : 5; + + deInt32 red = (deInt32)getBits(v0, 0, 5); + deInt32 green = (deInt32)getBits(v1, 0, 4); + deInt32 blue = (deInt32)getBits(v2, 0, 4); + deInt32 scale = (deInt32)getBits(v3, 0, 4); + + { +#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT) +#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false) + + const deUint32 x0 = getBit(v1, 6); + const deUint32 x1 = getBit(v1, 5); + const deUint32 x2 = getBit(v2, 6); + const deUint32 x3 = getBit(v2, 5); + const deUint32 x4 = getBit(v3, 7); + const deUint32 x5 = getBit(v3, 6); + const deUint32 x6 = getBit(v3, 5); + + deInt32& R = red; + deInt32& G = green; + deInt32& B = blue; + deInt32& S = scale; + + switch (mode) + { + case 0: ASSIGN_X_BITS(R,9, R,8, R,7, R,10, R,6, S,6, S,5); break; + case 1: ASSIGN_X_BITS(R,8, G,5, R,7, B,5, R,6, R,10, R,9); break; + case 2: ASSIGN_X_BITS(R,9, R,8, R,7, R,6, S,7, S,6, S,5); break; + case 3: ASSIGN_X_BITS(R,8, G,5, R,7, B,5, R,6, S,6, S,5); break; + case 4: ASSIGN_X_BITS(G,6, G,5, B,6, B,5, R,6, R,7, S,5); break; + case 5: ASSIGN_X_BITS(G,6, G,5, B,6, B,5, R,6, S,6, S,5); break; + default: + DE_ASSERT(false); + } +#undef ASSIGN_X_BITS +#undef SHOR + } + + static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 }; + DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts)); + + red <<= shiftAmounts[mode]; + green <<= shiftAmounts[mode]; + blue <<= shiftAmounts[mode]; + scale <<= shiftAmounts[mode]; + + if (mode != 5) + { + green = red - green; + blue = red - blue; + } + + if (majComp == 1) + std::swap(red, green); + else if (majComp == 2) + std::swap(red, blue); + + e0 = UVec4(basisu_astc::clamp(red - scale, 0, 0xfff), + basisu_astc::clamp(green - scale, 0, 0xfff), + basisu_astc::clamp(blue - scale, 0, 0xfff), + 0x780); + + e1 = UVec4(basisu_astc::clamp(red, 0, 0xfff), + basisu_astc::clamp(green, 0, 0xfff), + basisu_astc::clamp(blue, 0, 0xfff), + 0x780); +} + +void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5) +{ + const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7); + + if (major == 3) + { + e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780); + e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780); + } + else + { + const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7); + + deInt32 a = (deInt32)((getBit(v1, 6) << 8) | v0); + deInt32 c = (deInt32)(getBits(v1, 0, 5)); + deInt32 b0 = (deInt32)(getBits(v2, 0, 5)); + deInt32 b1 = (deInt32)(getBits(v3, 0, 5)); + deInt32 d0 = (deInt32)(getBits(v4, 0, 4)); + deInt32 d1 = (deInt32)(getBits(v5, 0, 4)); + + { +#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT) +#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false) + const deUint32 x0 = getBit(v2, 6); + const deUint32 x1 = getBit(v3, 6); + const deUint32 x2 = getBit(v4, 6); + const deUint32 x3 = getBit(v5, 6); + const deUint32 x4 = getBit(v4, 5); + const deUint32 x5 = getBit(v5, 5); + + switch (mode) + { + case 0: ASSIGN_X_BITS(b0,6, b1,6, d0,6, d1,6, d0,5, d1,5); break; + case 1: ASSIGN_X_BITS(b0,6, b1,6, b0,7, b1,7, d0,5, d1,5); break; + case 2: ASSIGN_X_BITS(a,9, c,6, d0,6, d1,6, d0,5, d1,5); break; + case 3: ASSIGN_X_BITS(b0,6, b1,6, a,9, c,6, d0,5, d1,5); break; + case 4: ASSIGN_X_BITS(b0,6, b1,6, b0,7, b1,7, a,9, a,10); break; + case 5: ASSIGN_X_BITS(a,9, a,10, c,7, c,6, d0,5, d1,5); break; + case 6: ASSIGN_X_BITS(b0,6, b1,6, a,11, c,6, a,9, a,10); break; + case 7: ASSIGN_X_BITS(a,9, a,10, a,11, c,6, d0,5, d1,5); break; + default: + DE_ASSERT(false); + } +#undef ASSIGN_X_BITS +#undef SHOR + } + + static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 }; + DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits)); + d0 = signExtend(d0, numDBits[mode]); + d1 = signExtend(d1, numDBits[mode]); + + const int shiftAmount = (mode >> 1) ^ 3; + a = (uint32_t)a << shiftAmount; + c = (uint32_t)c << shiftAmount; + b0 = (uint32_t)b0 << shiftAmount; + b1 = (uint32_t)b1 << shiftAmount; + d0 = (uint32_t)d0 << shiftAmount; + d1 = (uint32_t)d1 << shiftAmount; + + e0 = UVec4(basisu_astc::clamp(a-c, 0, 0xfff), basisu_astc::clamp(a-b0-c-d0, 0, 0xfff), basisu_astc::clamp(a-b1-c-d1, 0, 0xfff), 0x780); + e1 = UVec4(basisu_astc::clamp(a, 0, 0xfff), basisu_astc::clamp(a-b0, 0, 0xfff), basisu_astc::clamp(a-b1, 0, 0xfff), 0x780); + + if (major == 1) + { + std::swap(e0.x(), e0.y()); + std::swap(e1.x(), e1.y()); + } + else if (major == 2) + { + std::swap(e0.x(), e0.z()); + std::swap(e1.x(), e1.z()); + } + } +} + +void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In) +{ + decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5); + + const deUint32 mode = (getBit(v7In, 7) << 1) | getBit(v6In, 7); + deInt32 v6 = (deInt32)getBits(v6In, 0, 6); + deInt32 v7 = (deInt32)getBits(v7In, 0, 6); + + if (mode == 3) + { + e0.w() = v6 << 5; + e1.w() = v7 << 5; + } + else + { + v6 |= (v7 << (mode+1)) & 0x780; + v7 &= (0x3f >> mode); + v7 ^= 0x20 >> mode; + v7 -= 0x20 >> mode; + v6 <<= 4-mode; + v7 <<= 4-mode; + v7 += v6; + v7 = basisu_astc::clamp(v7, 0, 0xfff); + e0.w() = v6; + e1.w() = v7; + } +} + +void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions) +{ + int unquantizedNdx = 0; + + for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++) + { + const deUint32 endpointMode = endpointModes[partitionNdx]; + const deUint32* v = &unquantizedEndpoints[unquantizedNdx]; + + UVec4& e0 = dst[partitionNdx].e0; + UVec4& e1 = dst[partitionNdx].e1; + unquantizedNdx += computeNumColorEndpointValues(endpointMode); + + switch (endpointMode) + { + case 0: + { + e0 = UVec4(v[0], v[0], v[0], 0xff); + e1 = UVec4(v[1], v[1], v[1], 0xff); + break; + } + case 1: + { + const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6); + const deUint32 L1 = basisu_astc::min(0xffu, L0 + getBits(v[1], 0, 5)); + e0 = UVec4(L0, L0, L0, 0xff); + e1 = UVec4(L1, L1, L1, 0xff); + break; + } + case 2: + { + const deUint32 v1Gr = v[1] >= v[0]; + const deUint32 y0 = v1Gr ? v[0]<<4 : (v[1]<<4) + 8; + const deUint32 y1 = v1Gr ? v[1]<<4 : (v[0]<<4) - 8; + e0 = UVec4(y0, y0, y0, 0x780); + e1 = UVec4(y1, y1, y1, 0x780); + break; + } + case 3: + { + const bool m = isBitSet(v[0], 7); + const deUint32 y0 = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2) + : (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1); + const deUint32 d = m ? getBits(v[1], 0, 4) << 2 + : getBits(v[1], 0, 3) << 1; + const deUint32 y1 = basisu_astc::min(0xfffu, y0+d); + e0 = UVec4(y0, y0, y0, 0x780); + e1 = UVec4(y1, y1, y1, 0x780); + break; + } + case 4: + { + e0 = UVec4(v[0], v[0], v[0], v[2]); + e1 = UVec4(v[1], v[1], v[1], v[3]); + break; + } + case 5: + { + deInt32 v0 = (deInt32)v[0]; + deInt32 v1 = (deInt32)v[1]; + deInt32 v2 = (deInt32)v[2]; + deInt32 v3 = (deInt32)v[3]; + bitTransferSigned(v1, v0); + bitTransferSigned(v3, v2); + e0 = clampedRGBA(IVec4(v0, v0, v0, v2)); + e1 = clampedRGBA(IVec4(v0+v1, v0+v1, v0+v1, v2+v3)); + break; + } + case 6: + e0 = UVec4((v[0]*v[3]) >> 8, (v[1]*v[3]) >> 8, (v[2]*v[3]) >> 8, 0xff); + e1 = UVec4(v[0], v[1], v[2], 0xff); + break; + case 7: + decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]); + break; + case 8: + { + if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4]) + { + e0 = UVec4(v[0], v[2], v[4], 0xff); + e1 = UVec4(v[1], v[3], v[5], 0xff); + } + else + { + e0 = blueContract(v[1], v[3], v[5], 0xff).asUint(); + e1 = blueContract(v[0], v[2], v[4], 0xff).asUint(); + } + break; + } + case 9: + { + deInt32 v0 = (deInt32)v[0]; + deInt32 v1 = (deInt32)v[1]; + deInt32 v2 = (deInt32)v[2]; + deInt32 v3 = (deInt32)v[3]; + deInt32 v4 = (deInt32)v[4]; + deInt32 v5 = (deInt32)v[5]; + bitTransferSigned(v1, v0); + bitTransferSigned(v3, v2); + bitTransferSigned(v5, v4); + if (v1+v3+v5 >= 0) + { + e0 = clampedRGBA(IVec4(v0, v2, v4, 0xff)); + e1 = clampedRGBA(IVec4(v0+v1, v2+v3, v4+v5, 0xff)); + } + else + { + e0 = clampedRGBA(blueContract(v0+v1, v2+v3, v4+v5, 0xff)); + e1 = clampedRGBA(blueContract(v0, v2, v4, 0xff)); + } + break; + } + case 10: + { + e0 = UVec4((v[0]*v[3]) >> 8, (v[1]*v[3]) >> 8, (v[2]*v[3]) >> 8, v[4]); + e1 = UVec4(v[0], v[1], v[2], v[5]); + break; + } + case 11: + { + decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]); + break; + } + case 12: + { + if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) + { + e0 = UVec4(v[0], v[2], v[4], v[6]); + e1 = UVec4(v[1], v[3], v[5], v[7]); + } + else + { + e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7])); + e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6])); + } + break; + } + case 13: + { + deInt32 v0 = (deInt32)v[0]; + deInt32 v1 = (deInt32)v[1]; + deInt32 v2 = (deInt32)v[2]; + deInt32 v3 = (deInt32)v[3]; + deInt32 v4 = (deInt32)v[4]; + deInt32 v5 = (deInt32)v[5]; + deInt32 v6 = (deInt32)v[6]; + deInt32 v7 = (deInt32)v[7]; + bitTransferSigned(v1, v0); + bitTransferSigned(v3, v2); + bitTransferSigned(v5, v4); + bitTransferSigned(v7, v6); + if (v1+v3+v5 >= 0) + { + e0 = clampedRGBA(IVec4(v0, v2, v4, v6)); + e1 = clampedRGBA(IVec4(v0+v1, v2+v3, v4+v5, v6+v7)); + } + else + { + e0 = clampedRGBA(blueContract(v0+v1, v2+v3, v4+v5, v6+v7)); + e1 = clampedRGBA(blueContract(v0, v2, v4, v6)); + } + break; + } + case 14: + decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]); + e0.w() = v[6]; + e1.w() = v[7]; + break; + case 15: + { + decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]); + break; + } + default: + DE_ASSERT(false); + } + } +} + +void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable) +{ + const int colorEndpointDataStart = (numPartitions == 1) ? 17 : 29; + ISEDecodedResult colorEndpointData[18]; + + { + BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true); + decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams); + } + + { + deUint32 unquantizedEndpoints[18]; + unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams); + decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions); + } +} + +void unquantizeWeights (deUint32 dst[64], const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode) +{ + const int numWeights = computeNumWeights(blockMode); + const ISEParams& iseParams = blockMode.weightISEParams; + + if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT)) + { + const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0); + + if ((rangeCase == 0) || (rangeCase == 1)) + { + static const deUint32 map0[3] = { 0, 32, 63 }; + static const deUint32 map1[5] = { 0, 16, 32, 47, 63 }; + const deUint32* const map = (rangeCase == 0) ? &map0[0] : &map1[0]; + + for (int i = 0; i < numWeights; i++) + { + DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u)); + dst[i] = map[weightGrid[i].v]; + } + } + else + { + DE_ASSERT(rangeCase <= 6); + static const deUint32 Ca[5] = { 50, 28, 23, 13, 11 }; + const deUint32 C = Ca[rangeCase-2]; + + for (int weightNdx = 0; weightNdx < numWeights; weightNdx++) + { + const deUint32 a = getBit(weightGrid[weightNdx].m, 0); + const deUint32 b = getBit(weightGrid[weightNdx].m, 1); + const deUint32 c = getBit(weightGrid[weightNdx].m, 2); + + const deUint32 A = (a == 0) ? 0 : (1<<7)-1; + const deUint32 B = (rangeCase == 2) ? 0 + : (rangeCase == 3) ? 0 + : (rangeCase == 4) ? (b << 6) | (b << 2) | (b << 0) + : (rangeCase == 5) ? (b << 6) | (b << 1) + : (rangeCase == 6) ? (c << 6) | (b << 5) | (c << 1) | (b << 0) + : (deUint32)-1; + + dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20); + } + } + } + else + { + DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT); + for (int weightNdx = 0; weightNdx < numWeights; weightNdx++) + dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6); + } + + for (int weightNdx = 0; weightNdx < numWeights; weightNdx++) + dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0; + + // Initialize nonexistent weights to poison values + for (int weightNdx = numWeights; weightNdx < 64; weightNdx++) + dst[weightNdx] = ~0u; +} + +void interpolateWeights (TexelWeightPair* dst, const deUint32 (&unquantizedWeights) [64], int blockWidth, int blockHeight, const ASTCBlockMode& blockMode) +{ + const int numWeightsPerTexel = blockMode.isDualPlane ? 2 : 1; + const deUint32 scaleX = (1024 + blockWidth/2) / (blockWidth-1); + const deUint32 scaleY = (1024 + blockHeight/2) / (blockHeight-1); + DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= (int)DE_LENGTH_OF_ARRAY(unquantizedWeights)); + + for (int texelY = 0; texelY < blockHeight; texelY++) + { + for (int texelX = 0; texelX < blockWidth; texelX++) + { + const deUint32 gX = (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6; + const deUint32 gY = (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6; + const deUint32 jX = gX >> 4; + const deUint32 jY = gY >> 4; + const deUint32 fX = gX & 0xf; + const deUint32 fY = gY & 0xf; + const deUint32 w11 = (fX*fY + 8) >> 4; + const deUint32 w10 = fY - w11; + const deUint32 w01 = fX - w11; + const deUint32 w00 = 16 - fX - fY + w11; + const deUint32 i00 = jY*blockMode.weightGridWidth + jX; + const deUint32 i01 = i00 + 1; + const deUint32 i10 = i00 + blockMode.weightGridWidth; + const deUint32 i11 = i00 + blockMode.weightGridWidth + 1; + + // These addresses can be out of bounds, but respective weights will be 0 then. + DE_ASSERT(deInBounds32(i00, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w00 == 0); + DE_ASSERT(deInBounds32(i01, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w01 == 0); + DE_ASSERT(deInBounds32(i10, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w10 == 0); + DE_ASSERT(deInBounds32(i11, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w11 == 0); + + for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++) + { + // & 0x3f clamps address to bounds of unquantizedWeights + const deUint32 p00 = unquantizedWeights[(i00 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + const deUint32 p01 = unquantizedWeights[(i01 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + const deUint32 p10 = unquantizedWeights[(i10 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + const deUint32 p11 = unquantizedWeights[(i11 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + + dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; + } + } + } +} + +void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode) +{ + ISEDecodedResult weightGrid[64]; + + { + BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false); + decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams); + } + + { + deUint32 unquantizedWeights[64]; + unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode); + + interpolateWeights(dst, unquantizedWeights, blockWidth, blockHeight, blockMode); + } +} + +inline deUint32 hash52 (deUint32 v) +{ + deUint32 p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; +} + +int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock) +{ + DE_ASSERT(zIn == 0); + + const deUint32 x = smallBlock ? xIn << 1 : xIn; + const deUint32 y = smallBlock ? yIn << 1 : yIn; + const deUint32 z = smallBlock ? zIn << 1 : zIn; + const deUint32 seed = seedIn + 1024*(numPartitions-1); + const deUint32 rnum = hash52(seed); + + deUint8 seed1 = (deUint8)( rnum & 0xf); + deUint8 seed2 = (deUint8)((rnum >> 4) & 0xf); + deUint8 seed3 = (deUint8)((rnum >> 8) & 0xf); + deUint8 seed4 = (deUint8)((rnum >> 12) & 0xf); + deUint8 seed5 = (deUint8)((rnum >> 16) & 0xf); + deUint8 seed6 = (deUint8)((rnum >> 20) & 0xf); + deUint8 seed7 = (deUint8)((rnum >> 24) & 0xf); + deUint8 seed8 = (deUint8)((rnum >> 28) & 0xf); + deUint8 seed9 = (deUint8)((rnum >> 18) & 0xf); + deUint8 seed10 = (deUint8)((rnum >> 22) & 0xf); + deUint8 seed11 = (deUint8)((rnum >> 26) & 0xf); + deUint8 seed12 = (deUint8)(((rnum >> 30) | (rnum << 2)) & 0xf); + + seed1 = (deUint8)(seed1 * seed1 ); + seed2 = (deUint8)(seed2 * seed2 ); + seed3 = (deUint8)(seed3 * seed3 ); + seed4 = (deUint8)(seed4 * seed4 ); + seed5 = (deUint8)(seed5 * seed5 ); + seed6 = (deUint8)(seed6 * seed6 ); + seed7 = (deUint8)(seed7 * seed7 ); + seed8 = (deUint8)(seed8 * seed8 ); + seed9 = (deUint8)(seed9 * seed9 ); + seed10 = (deUint8)(seed10 * seed10); + seed11 = (deUint8)(seed11 * seed11); + seed12 = (deUint8)(seed12 * seed12); + + const int shA = (seed & 2) != 0 ? 4 : 5; + const int shB = numPartitions == 3 ? 6 : 5; + const int sh1 = (seed & 1) != 0 ? shA : shB; + const int sh2 = (seed & 1) != 0 ? shB : shA; + const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2; + + seed1 = (deUint8)(seed1 >> sh1); + seed2 = (deUint8)(seed2 >> sh2); + seed3 = (deUint8)(seed3 >> sh1); + seed4 = (deUint8)(seed4 >> sh2); + seed5 = (deUint8)(seed5 >> sh1); + seed6 = (deUint8)(seed6 >> sh2); + seed7 = (deUint8)(seed7 >> sh1); + seed8 = (deUint8)(seed8 >> sh2); + seed9 = (deUint8)(seed9 >> sh3); + seed10 = (deUint8)(seed10 >> sh3); + seed11 = (deUint8)(seed11 >> sh3); + seed12 = (deUint8)(seed12 >> sh3); + + const int a = 0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14)); + const int b = 0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10)); + const int c = (numPartitions >= 3) ? 0x3f & (seed5*x + seed6*y + seed9*z + (rnum >> 6)) : 0; + const int d = (numPartitions >= 4) ? 0x3f & (seed7*x + seed8*y + seed10*z + (rnum >> 2)) : 0; + + return (a >= b && a >= c && a >= d) ? 0 + : (b >= c && b >= d) ? 1 + : (c >= d) ? 2 + : 3; +} + +DecompressResult setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed, + int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes) +{ + const bool smallBlock = blockWidth*blockHeight < 31; + DecompressResult result = DECOMPRESS_RESULT_VALID_BLOCK; + bool isHDREndpoint[4]; + + for (int i = 0; i < numPartitions; i++) + { + isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]); + } + + for (int texelY = 0; texelY < blockHeight; texelY++) + { + for (int texelX = 0; texelX < blockWidth; texelX++) + { + const int texelNdx = texelY * blockWidth + texelX; + const int colorEndpointNdx = (numPartitions == 1) ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock); + + DE_ASSERT(colorEndpointNdx < numPartitions); + const UVec4& e0 = colorEndpoints[colorEndpointNdx].e0; + const UVec4& e1 = colorEndpoints[colorEndpointNdx].e1; + const TexelWeightPair& weight = texelWeights[texelNdx]; + + if (isLDRMode && isHDREndpoint[colorEndpointNdx]) + { + if (isSRGB) + { + ((deUint8*)dst)[texelNdx * 4 + 0] = 0xff; + ((deUint8*)dst)[texelNdx * 4 + 1] = 0; + ((deUint8*)dst)[texelNdx * 4 + 2] = 0xff; + ((deUint8*)dst)[texelNdx * 4 + 3] = 0xff; + } + else + { + ((float*)dst)[texelNdx * 4 + 0] = 1.0f; + ((float*)dst)[texelNdx * 4 + 1] = 0; + ((float*)dst)[texelNdx * 4 + 2] = 1.0f; + ((float*)dst)[texelNdx * 4 + 3] = 1.0f; + } + result = DECOMPRESS_RESULT_ERROR; + } + else + { + for (int channelNdx = 0; channelNdx < 4; channelNdx++) + { + if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR. + { + const deUint32 c0 = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]); + const deUint32 c1 = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]); + const deUint32 w = weight.w[ccs == channelNdx ? 1 : 0]; + const deUint32 c = (c0 * (64 - w) + c1 * w + 32) / 64; + + if (isSRGB) + ((deUint8*)dst)[texelNdx * 4 + channelNdx] = (deUint8)((c & 0xff00) >> 8); + else + ((float*)dst)[texelNdx * 4 + channelNdx] = (c == 65535) ? 1.0f : (float)c / 65536.0f; + } + else + { + DE_ASSERT(!isSRGB); + //DE_STATIC_ASSERT((basisu_astc::meta::TypesSame<deFloat16, deUint16>::Value)); + + const deUint32 c0 = e0[channelNdx] << 4; + const deUint32 c1 = e1[channelNdx] << 4; + const deUint32 w = weight.w[(ccs == channelNdx) ? 1 : 0]; + const deUint32 c = (c0 * (64 - w) + c1 * w + 32) / 64; + const deUint32 e = getBits(c, 11, 15); + const deUint32 m = getBits(c, 0, 10); + const deUint32 mt = (m < 512) ? (3 * m) + : (m >= 1536) ? (5 * m - 2048) + : (4 * m - 512); + + const deFloat16 cf = (deFloat16)((e << 10) + (mt >> 3)); + + ((float*)dst)[texelNdx * 4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf); + } + + } // channelNdx + } + } // texelX + } // texelY + + return result; +} + +DecompressResult decompressBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR) +{ + DE_ASSERT(isLDR || !isSRGB); + + // Decode block mode. + const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10)); + + // Check for block mode errors. + if (blockMode.isError) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + // Separate path for void-extent. + if (blockMode.isVoidExtent) + return decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR); + + // Compute weight grid values. + const int numWeights = computeNumWeights(blockMode); + const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights); + const int numPartitions = (int)blockData.getBits(11, 12) + 1; + + // Check for errors in weight grid, partition and dual-plane parameters. + if ((numWeights > 64) || + (numWeightDataBits > 96) || + (numWeightDataBits < 24) || + (blockMode.weightGridWidth > blockWidth) || + (blockMode.weightGridHeight > blockHeight) || + ((numPartitions == 4) && blockMode.isDualPlane)) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + // Compute number of bits available for color endpoint data. + const bool isSingleUniqueCem = (numPartitions == 1) || (blockData.getBits(23, 24) == 0); + + const int numConfigDataBits = ((numPartitions == 1) ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) + + (blockMode.isDualPlane ? 2 : 0); + + const int numBitsForColorEndpoints = 128 - numWeightDataBits - numConfigDataBits; + + const int extraCemBitsStart = 127 - numWeightDataBits - (isSingleUniqueCem ? -1 + : (numPartitions == 4) ? 7 + : (numPartitions == 3) ? 4 + : (numPartitions == 2) ? 1 + : 0); + + // Decode color endpoint modes. + deUint32 colorEndpointModes[4]; + decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart); + const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions); + + // Check for errors in color endpoint value count. + if ((numColorEndpointValues > 18) || (numBitsForColorEndpoints < (int)deDivRoundUp32(13*numColorEndpointValues, 5))) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + // Compute color endpoints. + ColorEndpointPair colorEndpoints[4]; + computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues, + computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints); + + // Compute texel weights. + TexelWeightPair texelWeights[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT]; + computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode); + + // Set texel colors. + const int ccs = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1; + const deUint32 partitionIndexSeed = (numPartitions > 1) ? blockData.getBits(13, 22) : (deUint32)-1; + + return setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]); +} + +// Returns -1 on error, 0 if LDR, 1 if HDR +int isHDR(const Block128& blockData, int blockWidth, int blockHeight) +{ + // Decode block mode. + const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10)); + + // Check for block mode errors. + if (blockMode.isError) + return -1; + + // Separate path for void-extent. + if (blockMode.isVoidExtent) + { + const bool isHDRBlock = blockData.isBitSet(9); + return isHDRBlock ? 1 : 0; + } + + // Compute weight grid values. + const int numWeights = computeNumWeights(blockMode); + const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights); + const int numPartitions = (int)blockData.getBits(11, 12) + 1; + + // Check for errors in weight grid, partition and dual-plane parameters. + if ((numWeights > 64) || + (numWeightDataBits > 96) || + (numWeightDataBits < 24) || + (blockMode.weightGridWidth > blockWidth) || + (blockMode.weightGridHeight > blockHeight) || + ((numPartitions == 4) && blockMode.isDualPlane)) + { + return -1; + } + + // Compute number of bits available for color endpoint data. + const bool isSingleUniqueCem = (numPartitions == 1) || (blockData.getBits(23, 24) == 0); + + const int extraCemBitsStart = 127 - numWeightDataBits - (isSingleUniqueCem ? -1 + : (numPartitions == 4) ? 7 + : (numPartitions == 3) ? 4 + : (numPartitions == 2) ? 1 + : 0); + + // Decode color endpoint modes. + deUint32 colorEndpointModes[4]; + decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart); + + for (int i = 0; i < numPartitions; i++) + { + if (isColorEndpointModeHDR(colorEndpointModes[i])) + return 1; + } + + return 0; +} + +typedef uint16_t half_float; + +half_float float_to_half(float val, bool toward_zero) +{ + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; + int s = flt_s, e = 0, m = 0; + + // inf/NaN + if (flt_e == 0xff) + { + e = 31; + if (flt_m != 0) // NaN + m = 1; + } + // not zero or denormal + else if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + { + if (toward_zero) + m = (int)truncf((1 << 24) * fabsf(fi.f)); + else + m = lrintf((1 << 24) * fabsf(fi.f)); + } + else + { + e = new_exp + 15; + if (toward_zero) + m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13))); + else + m = lrintf((float)flt_m * (1.0f / (float)(1 << 13))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((s >= 0) && (s <= 1)); + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + half_float result = (half_float)((s << 15) | (e << 10) | m); + return result; +} + +float half_to_float(half_float hval) +{ + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; +} + +} // anonymous + +// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.inline.html#_hdr_endpoint_decoding +static void convert_to_half_prec(uint32_t n, float* pVals) +{ +#if 0 + const int prev_dir = fesetround(FE_TOWARDZERO); + + for (uint32_t i = 0; i < n; i++) + pVals[i] = half_to_float(float_to_half(pVals[i])); + + fesetround(prev_dir); + + for (uint32_t i = 0; i < n; i++) + { + assert(pVals[i] == half_to_float(float_to_half(pVals[i], true))); + } +#else + // This ensures the values are rounded towards zero as half floats. + for (uint32_t i = 0; i < n; i++) + { + pVals[i] = half_to_float(float_to_half(pVals[i], true)); + } +#endif +} + +bool decompress_ldr(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth, int blockHeight) +{ + float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4]; + + const Block128 blockData(data); + + // isSRGB is true, this writes uint8_t's. Otherwise it writes floats. + if (decompressBlock(isSRGB ? (void*)pDst : (void*)&linear[0], blockData, blockWidth, blockHeight, isSRGB, true) != DECOMPRESS_RESULT_VALID_BLOCK) + { + return false; + } + + if (!isSRGB) + { + // Convert the floats to 8-bits with rounding. + int pix = 0; + for (int i = 0; i < blockHeight; i++) + { + for (int j = 0; j < blockWidth; j++, pix++) + { + pDst[4 * pix + 0] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 0] * 65536.0f + .5f), 0, 65535) >> 8); + pDst[4 * pix + 1] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 1] * 65536.0f + .5f), 0, 65535) >> 8); + pDst[4 * pix + 2] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 2] * 65536.0f + .5f), 0, 65535) >> 8); + pDst[4 * pix + 3] = (uint8_t)(basisu_astc::clamp<int>((int)(linear[pix * 4 + 3] * 65536.0f + .5f), 0, 65535) >> 8); + } + } + } + + return true; +} + +bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight) +{ + const Block128 blockData(data); + + if (decompressBlock(pDstRGBA, blockData, blockWidth, blockHeight, false, false) != DECOMPRESS_RESULT_VALID_BLOCK) + { + return false; + } + + convert_to_half_prec(blockWidth * blockHeight * 4, pDstRGBA); + + return true; +} + +bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool &is_hdr) +{ + is_hdr = false; + + const Block128 blockData(data); + + int status = isHDR(blockData, blockWidth, blockHeight); + if (status < 0) + { + return false; + } + + is_hdr = (status == 1); + + return true; +} + +} // astc + +} // basisu_astc + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif diff --git a/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h new file mode 100644 index 0000000000..ad13093a6c --- /dev/null +++ b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.h @@ -0,0 +1,45 @@ +// File: android_astc_decomp.h +#ifndef _TCUASTCUTIL_HPP +#define _TCUASTCUTIL_HPP +/*------------------------------------------------------------------------- + * drawElements Quality Program Tester Core + * ---------------------------------------- + * + * Copyright 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + *//*! + * \file + * \brief ASTC Utilities. + *//*--------------------------------------------------------------------*/ + +#include <vector> +#include <stdint.h> + +namespace basisu_astc +{ +namespace astc +{ + +// Unpacks a single ASTC block to pDst +// If isSRGB is true, the spec requires the decoder to scale the LDR 8-bit endpoints to 16-bit before interpolation slightly differently, +// which will lead to different outputs. So be sure to set it correctly (ideally it should match whatever the encoder did). +bool decompress_ldr(uint8_t* pDst, const uint8_t* data, bool isSRGB, int blockWidth, int blockHeight); +bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight); +bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool& is_hdr); + +} // astc +} // basisu + +#endif diff --git a/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp new file mode 100644 index 0000000000..d698a7ff87 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp @@ -0,0 +1,3310 @@ +// basisu_astc_hdr_enc.cpp +#include "basisu_astc_hdr_enc.h" +#include "../transcoder/basisu_transcoder.h" + +using namespace basist; + +namespace basisu +{ + +const float DEF_R_ERROR_SCALE = 2.0f; +const float DEF_G_ERROR_SCALE = 3.0f; + +static inline uint32_t get_max_qlog(uint32_t bits) +{ + switch (bits) + { + case 7: return MAX_QLOG7; + case 8: return MAX_QLOG8; + case 9: return MAX_QLOG9; + case 10: return MAX_QLOG10; + case 11: return MAX_QLOG11; + case 12: return MAX_QLOG12; + case 16: return MAX_QLOG16; + default: assert(0); break; + } + return 0; +} + +#if 0 +static inline float get_max_qlog_val(uint32_t bits) +{ + switch (bits) + { + case 7: return MAX_QLOG7_VAL; + case 8: return MAX_QLOG8_VAL; + case 9: return MAX_QLOG9_VAL; + case 10: return MAX_QLOG10_VAL; + case 11: return MAX_QLOG11_VAL; + case 12: return MAX_QLOG12_VAL; + case 16: return MAX_QLOG16_VAL; + default: assert(0); break; + } + return 0; +} +#endif + +static inline int get_bit( + int src_val, int src_bit) +{ + assert(src_bit >= 0 && src_bit <= 31); + int bit = (src_val >> src_bit) & 1; + return bit; +} + +static inline void pack_bit( + int& dst, int dst_bit, + int src_val, int src_bit = 0) +{ + assert(dst_bit >= 0 && dst_bit <= 31); + int bit = get_bit(src_val, src_bit); + dst |= (bit << dst_bit); +} + +//-------------------------------------------------------------------------------------------------------------------------- + +astc_hdr_codec_options::astc_hdr_codec_options() +{ + init(); +} + +void astc_hdr_codec_options::init() +{ + m_bc6h_err_weight = .85f; + m_r_err_scale = DEF_R_ERROR_SCALE; + m_g_err_scale = DEF_G_ERROR_SCALE; + + // Disabling by default to avoid transcoding outliers (try kodim26). The quality lost is very low. TODO: Could include the uber result in the output. + m_allow_uber_mode = false; + + // Must set best quality level first to set defaults. + set_quality_best(); + + set_quality_level(cDefaultLevel); +} + +void astc_hdr_codec_options::set_quality_best() +{ + m_mode11_direct_only = false; + + // highest achievable quality + m_use_solid = true; + + m_use_mode11 = true; + m_mode11_uber_mode = true; + m_first_mode11_weight_ise_range = MODE11_FIRST_ISE_RANGE; + m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE; + m_first_mode11_submode = -1; + m_last_mode11_submode = 7; + + m_use_mode7_part1 = true; + m_first_mode7_part1_weight_ise_range = MODE7_PART1_FIRST_ISE_RANGE; + m_last_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE; + + m_use_mode7_part2 = true; + m_mode7_part2_part_masks = UINT32_MAX; + m_first_mode7_part2_weight_ise_range = MODE7_PART2_FIRST_ISE_RANGE; + m_last_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE; + + m_use_mode11_part2 = true; + m_mode11_part2_part_masks = UINT32_MAX; + m_first_mode11_part2_weight_ise_range = MODE11_PART2_FIRST_ISE_RANGE; + m_last_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE; + + m_refine_weights = true; + + m_use_estimated_partitions = false; + m_max_estimated_partitions = 0; +} + +void astc_hdr_codec_options::set_quality_normal() +{ + m_use_solid = true; + + // We'll allow uber mode in normal if the user allows it. + m_use_mode11 = true; + m_mode11_uber_mode = true; + m_first_mode11_weight_ise_range = 6; + m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE; + + m_use_mode7_part1 = true; + m_first_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE; + m_last_mode7_part1_weight_ise_range = MODE7_PART1_LAST_ISE_RANGE; + + m_use_mode7_part2 = true; + m_mode7_part2_part_masks = UINT32_MAX; + m_first_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE; + m_last_mode7_part2_weight_ise_range = MODE7_PART2_LAST_ISE_RANGE; + + m_use_mode11_part2 = true; + m_mode11_part2_part_masks = UINT32_MAX; + m_first_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE; + m_last_mode11_part2_weight_ise_range = MODE11_PART2_LAST_ISE_RANGE; + + m_refine_weights = true; +} + +void astc_hdr_codec_options::set_quality_fastest() +{ + m_use_solid = true; + + m_use_mode11 = true; + m_mode11_uber_mode = false; + m_first_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE; + m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE; + + m_use_mode7_part1 = false; + m_use_mode7_part2 = false; + m_use_mode11_part2 = false; + + m_refine_weights = false; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +void astc_hdr_codec_options::set_quality_level(int level) +{ + level = clamp(level, cMinLevel, cMaxLevel); + + m_level = level; + + switch (level) + { + case 0: + { + set_quality_fastest(); + break; + } + case 1: + { + set_quality_normal(); + + m_first_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE - 1; + m_last_mode11_weight_ise_range = MODE11_LAST_ISE_RANGE; + + m_use_mode7_part1 = false; + m_use_mode7_part2 = false; + + m_use_estimated_partitions = true; + m_max_estimated_partitions = 1; + + m_mode11_part2_part_masks = 1 | 2; + m_mode7_part2_part_masks = 1 | 2; + break; + } + case 2: + { + set_quality_normal(); + + m_use_estimated_partitions = true; + m_max_estimated_partitions = 2; + + m_mode11_part2_part_masks = 1 | 2; + m_mode7_part2_part_masks = 1 | 2; + + break; + } + case 3: + { + set_quality_best(); + + m_use_estimated_partitions = true; + m_max_estimated_partitions = 2; + + m_mode11_part2_part_masks = 1 | 2 | 4 | 8; + m_mode7_part2_part_masks = 1 | 2 | 4 | 8; + + break; + } + case 4: + { + set_quality_best(); + + break; + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +#if 0 +static inline half_float qlog12_to_half_slow(uint32_t qlog12) +{ + return qlog_to_half_slow(qlog12, 12); +} +#endif + +// max usable qlog8 value is 247, 248=inf, >=249 is nan +// max usable qlog7 value is 123, 124=inf, >=125 is nan + +// To go from a smaller qlog to an larger one, shift left by X bits. + +//const uint32_t TOTAL_USABLE_QLOG8 = 248; // 0-247 are usable, 0=0, 247=60416.0, 246=55296.0 + +// for qlog7's shift left by 1 +//half_float g_qlog8_to_half[256]; +//float g_qlog8_to_float[256]; + +//half_float g_qlog12_to_half[4096]; +//float g_qlog12_to_float[4096]; + +static half_float g_qlog16_to_half[65536]; + +inline half_float qlog_to_half(uint32_t val, uint32_t bits) +{ + assert((bits >= 5) && (bits <= 16)); + assert(val < (1U << bits)); + return g_qlog16_to_half[val << (16 - bits)]; +} + +// nearest values given a positive half float value (only) +static uint16_t g_half_to_qlog7[32768], g_half_to_qlog8[32768], g_half_to_qlog9[32768], g_half_to_qlog10[32768], g_half_to_qlog11[32768], g_half_to_qlog12[32768]; + +const uint32_t HALF_TO_QLOG_TABS_BASE = 7; +static uint16_t* g_pHalf_to_qlog_tabs[8] = +{ + g_half_to_qlog7, + g_half_to_qlog8, + + g_half_to_qlog9, + g_half_to_qlog10, + + g_half_to_qlog11, + g_half_to_qlog12 +}; + +static inline uint32_t half_to_qlog7_12(half_float h, uint32_t bits) +{ + assert((bits >= HALF_TO_QLOG_TABS_BASE) && (bits <= 12)); + assert(h < 32768); + + return g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_BASE][h]; +} + +#if 0 +// Input is the low 11 bits of the qlog +// Returns the 10-bit mantissa of the half float value +static int qlog11_to_half_float_mantissa(int M) +{ + assert(M <= 0x7FF); + int Mt; + if (M < 512) + Mt = 3 * M; + else if (M >= 1536) + Mt = 5 * M - 2048; + else + Mt = 4 * M - 512; + return (Mt >> 3); +} +#endif + +// Input is the 10-bit mantissa of the half float value +// Output is the 11-bit qlog value +// Inverse of qlog11_to_half_float_mantissa() +static inline int half_float_mantissa_to_qlog11(int hf) +{ + int q0 = (hf * 8 + 2) / 3; + int q1 = (hf * 8 + 2048 + 4) / 5; + + if (q0 < 512) + return q0; + else if (q1 >= 1536) + return q1; + + int q2 = (hf * 8 + 512 + 2) / 4; + return q2; +} + +static inline int half_to_qlog16(int hf) +{ + // extract 5 bits exponent, which is carried through to qlog16 unchanged + const int exp = (hf >> 10) & 0x1F; + + // extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless) + const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF); + assert(mantissa <= 0x7FF); + + // Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights. + uint32_t qlog16 = (exp << 11) | mantissa; + + // should be a lossless operation + assert(qlog16_to_half_slow(qlog16) == hf); + + return qlog16; +} + +static inline uint32_t quant_qlog16(uint32_t q16, uint32_t desired_bits) +{ + assert((desired_bits >= 7) && (desired_bits <= 12)); + assert(q16 <= 65535); + + const uint32_t shift = 16 - desired_bits; + uint32_t e = (q16 + (1U << (shift - 1U)) - 1U) >> shift; + + uint32_t max_val = (1U << desired_bits) - 1U; + e = minimum<uint32_t>(e, max_val); + + return e; +} + +static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const basisu::vector<float> &qlog16_to_float) +{ + assert(bits >= 5 && bits <= 12); + const uint32_t max_val = (1 << bits) - 1; + + // For all positive half-floats + for (uint32_t h = 0; h < 32768; h++) + { + // Skip invalid values + if (is_half_inf_or_nan((half_float)h)) + continue; + const float desired_val = half_to_float((half_float)h); + + float best_err = 1e+30f; + uint32_t best_qlog = 0; + + // For all possible qlog's + for (uint32_t i = 0; i <= max_val; i++) + { + // Skip invalid values + float v = qlog16_to_float[i << (16 - bits)]; + if (std::isnan(v)) + continue; + + // Compute error + float err = fabs(v - desired_val); + + // Find best + if (err < best_err) + { + best_err = err; + best_qlog = i; + } + } + + pTable[h] = (uint16_t)best_qlog; + } + +#if 0 + uint32_t t = 0; + + const uint32_t nb = 12; + int nb_shift = 16 - nb; + + for (uint32_t q16 = 0; q16 < 65536; q16++) + { + half_float h = qlog16_to_half_slow(q16); + if (is_half_inf_or_nan(h)) + continue; + + int q7 = half_to_qlog7_12(h, nb); + + uint32_t best_err = UINT32_MAX, best_l = 0; + for (int l = 0; l < (1 << nb); l++) + { + int dec_q16 = l << nb_shift; + int err = iabs(dec_q16 - q16); + if (err < best_err) + { + best_err = err; + best_l = l; + } + } + + //int e = (q16 + 253) >> 9; // 345 + + int e = (q16 + (1 << (nb_shift - 1)) - 1) >> nb_shift; // 285 + if (best_l != e) + //if (q7 != best_l) + { + printf("q16=%u, h=%u, q7=%u, e=%u, best_l=%u\n", q16, h, q7, e, best_l); + t++; + } + } + + printf("Mismatches: %u\n", t); + exit(0); +#endif +} + +static void init_qlog_tables() +{ + basisu::vector<float> qlog16_to_float(65536); + + // for all possible qlog16, compute the corresponding half float + for (uint32_t i = 0; i <= 65535; i++) + { + half_float h = qlog16_to_half_slow(i); + g_qlog16_to_half[i] = h; + + qlog16_to_float[i] = half_to_float(h); + } + + // for all possible half floats, find the nearest qlog5-12 float + for (uint32_t bits = HALF_TO_QLOG_TABS_BASE; bits <= 12; bits++) + { + compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_BASE], qlog16_to_float); + } +} + +// [ise_range][0] = # levels +// [ise_range][1...] = lerp value [0,64] +// in ASTC order +// Supported ISE weight ranges: 0 to 10, 11 total +const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = 1; // ISE 1=3 levels +const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = 10; // ISE 10=24 levels + +static const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][32] = +{ + { 0 }, // ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block) + { 3, 0, 32, 64 }, // 1 + { 4, 0, 21, 43, 64 }, // 2 + { 5, 0, 16, 32, 48, 64 }, // 3 + { 6, 0, 64, 12, 52, 25, 39 }, // 4 + { 8, 0, 9, 18, 27, 37, 46, 55, 64 }, // 5 + { 10, 0, 64, 7, 57, 14, 50, 21, 43, 28, 36 }, // 6 + { 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7 + { 16, 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }, // 8 + { 20, 0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35 }, // 9 + { 24, 0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34 } // 10 +}; + +//{ 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7 +//static const uint8_t g_weight_order_7[12] = { 0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1 }; + +static vec3F calc_mean(uint32_t num_pixels, const vec4F* pPixels) +{ + vec3F mean(0.0f); + + for (uint32_t i = 0; i < num_pixels; i++) + { + const vec4F& p = pPixels[i]; + + mean[0] += p[0]; + mean[1] += p[1]; + mean[2] += p[2]; + } + + return mean / static_cast<float>(num_pixels); +} + +static vec3F calc_rgb_pca(uint32_t num_pixels, const vec4F* pPixels, const vec3F& mean_color) +{ + float cov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const vec4F& v = pPixels[i]; + + float r = v[0] - mean_color[0]; + float g = v[1] - mean_color[1]; + float b = v[2] - mean_color[2]; + + cov[0] += r * r; + cov[1] += r * g; + cov[2] += r * b; + cov[3] += g * g; + cov[4] += g * b; + cov[5] += b * b; + } + + float xr = .9f, xg = 1.0f, xb = .7f; + for (uint32_t iter = 0; iter < 3; iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + + float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b)); + + if (m > 1e-10f) + { + m = 1.0f / m; + + r *= m; + g *= m; + b *= m; + } + + xr = r; + xg = g; + xb = b; + } + + float len = xr * xr + xg * xg + xb * xb; + + vec3F axis; + if (len < 1e-10f) + axis.set(0.0f); + else + { + len = 1.0f / sqrtf(len); + + xr *= len; + xg *= len; + xb *= len; + + axis.set(xr, xg, xb, 0); + } + + if (axis.dot(axis) < .5f) + { + axis.set(1.0f, 1.0f, 1.0f, 0.0f); + axis.normalize_in_place(); + } + + return axis; +} + +static vec3F interp_color(const vec3F& mean, const vec3F& dir, float df, const aabb3F& colorspace_box, const aabb3F& input_box, bool* pInside = nullptr) +{ +#if 0 + assert(mean[0] >= input_box[0][0]); + assert(mean[1] >= input_box[0][1]); + assert(mean[2] >= input_box[0][2]); + assert(mean[0] <= input_box[1][0]); + assert(mean[1] <= input_box[1][1]); + assert(mean[2] <= input_box[1][2]); +#endif + + if (pInside) + *pInside = false; + + vec3F k(mean + dir * df); + if (colorspace_box.contains(k)) + { + if (pInside) + *pInside = true; + + return k; + } + + // starts inside + vec3F s(mean); + + // ends outside + vec3F e(mean + dir * df); + + // a ray guaranteed to go from the outside to inside + ray3F r(e, (s - e).normalize_in_place()); + vec3F c; + float t = 0.0f; + + intersection::result res = intersection::ray_aabb(c, t, r, input_box); + if (res != intersection::cSuccess) + c = k; + + return c; +} + +// all in Q16 space, 0-65535 +static bool compute_least_squares_endpoints_rgb( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + q00_r += w * pColors[i][0]; + t_r += pColors[i][0]; + + q00_g += w * pColors[i][1]; + t_g += pColors[i][1]; + + q00_b += w * pColors[i][2]; + t_b += pColors[i][2]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (det == 0.0f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r); + (*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + + (*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g); + (*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g); + + (*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b); + (*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b); + + for (uint32_t c = 0; c < 3; c++) + { + float l = (*pXl)[c], h = (*pXh)[c]; + + if (input_box.get_dim(c) < .0000125f) + { + l = input_box[0][c]; + h = input_box[1][c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + vec3F mean((*pXl + *pXh) * .5f); + vec3F dir(*pXh - *pXl); + + float ln = dir.length(); + if (ln) + { + dir /= ln; + + float ld = (*pXl - mean).dot(dir); + float hd = (*pXh - mean).dot(dir); + + aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL)); + + bool was_inside1 = false; + + vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1); + if (!was_inside1) + *pXl = l; + + bool was_inside2 = false; + vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2); + if (!was_inside2) + *pXh = h; + } + + pXl->clamp(0.0f, MAX_QLOG16_VAL); + pXh->clamp(0.0f, MAX_QLOG16_VAL); + + return true; +} + +static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24]; + +static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24]; // [ise_range][astc_index] -> linear index +static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][24]; // [ise_range][linear_index] -> astc_index + +static void encode_astc_hdr_init() +{ + // Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w + for (uint32_t range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; range++) + { + const uint32_t num_levels = g_ise_weight_lerps[range][0]; + assert((num_levels >= 3) && (num_levels <= 24)); + + for (uint32_t i = 0; i < num_levels; i++) + { + float w = g_ise_weight_lerps[range][1 + i] * (1.0f / 64.0f); + + g_astc_ls_weights_ise[range][i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + } + + for (uint32_t ise_range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; ise_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; ise_range++) + { + const uint32_t num_levels = g_ise_weight_lerps[ise_range][0]; + assert((num_levels >= 3) && (num_levels <= 24)); + + uint32_t s[32]; + for (uint32_t i = 0; i < num_levels; i++) + s[i] = (g_ise_weight_lerps[ise_range][1 + i] << 8) + i; + + std::sort(s, s + num_levels); + + for (uint32_t i = 0; i < num_levels; i++) + g_map_linear_to_astc_order[ise_range][i] = (uint8_t)(s[i] & 0xFF); + + for (uint32_t i = 0; i < num_levels; i++) + g_map_astc_to_linear_order[ise_range][g_map_linear_to_astc_order[ise_range][i]] = (uint8_t)i; + } +} + +void interpolate_qlog12_colors( + const int e[2][3], + half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range) +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + assert(in_range(e[i][j], 0, 0xFFF)); + } + } + + for (uint32_t i = 0; i < n; i++) + { + const int c = g_ise_weight_lerps[ise_weight_range][1 + i]; + assert(c == (int)astc_helpers::dequant_bise_weight(i, ise_weight_range)); + + half_float rf, gf, bf; + + { + uint32_t r0 = e[0][0] << 4; + uint32_t r1 = e[1][0] << 4; + int ri = (r0 * (64 - c) + r1 * c + 32) / 64; + rf = qlog16_to_half_slow(ri); + } + + { + uint32_t g0 = e[0][1] << 4; + uint32_t g1 = e[1][1] << 4; + int gi = (g0 * (64 - c) + g1 * c + 32) / 64; + gf = qlog16_to_half_slow(gi); + } + + { + uint32_t b0 = e[0][2] << 4; + uint32_t b1 = e[1][2] << 4; + int bi = (b0 * (64 - c) + b1 * c + 32) / 64; + bf = qlog16_to_half_slow(bi); + } + + if (pDecoded_half) + { + pDecoded_half[i * 3 + 0] = rf; + pDecoded_half[i * 3 + 1] = gf; + pDecoded_half[i * 3 + 2] = bf; + } + + if (pDecoded_float) + { + pDecoded_float[i][0] = half_to_float(rf); + pDecoded_float[i][1] = half_to_float(gf); + pDecoded_float[i][2] = half_to_float(bf); + } + } +} + +// decoded in ASTC order, not linear order +// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded +bool get_astc_hdr_mode_11_block_colors( + const uint8_t* pEndpoints, + half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range) +{ + assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + int e[2][3]; + if (!decode_mode11_to_qlog12(pEndpoints, e, ise_endpoint_range)) + return false; + + interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range); + + return true; +} + +// decoded in ASTC order, not linear order +// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded +bool get_astc_hdr_mode_7_block_colors( + const uint8_t* pEndpoints, + half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range) +{ + assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + int e[2][3]; + if (!decode_mode7_to_qlog12(pEndpoints, e, nullptr, ise_endpoint_range)) + return false; + + interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range); + + return true; +} + +// Fast high precision piecewise linear approximation of log2(bias+x). +// Half may be zero, positive or denormal. No NaN/Inf/negative. +static inline double q(half_float x) +{ + union { float f; int32_t i; uint32_t u; } fi; + + fi.f = fast_half_to_float_pos_not_inf_or_nan(x); + + assert(fi.f >= 0.0f); + + fi.f += .125f; + + return (double)fi.u; // approx log2f(fi.f), need to return double for the precision +} + +double eval_selectors( + uint32_t num_pixels, + uint8_t* pWeights, + const half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const half_float* pDecoded_half, + const astc_hdr_codec_options& coptions, + uint32_t usable_selector_bitmask) +{ + assert((num_pixels >= 1) && (num_pixels <= 16)); + assert(usable_selector_bitmask); + + const float R_WEIGHT = coptions.m_r_err_scale; + const float G_WEIGHT = coptions.m_g_err_scale; + + double total_error = 0; + +#ifdef _DEBUG + for (uint32_t i = 0; i < num_weight_levels; i++) + { + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2])); + } +#endif + + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; + + double lowest_e = 1e+30f; + + // this is an approximation of MSLE + for (uint32_t i = 0; i < num_weight_levels; i++) + { + if (((1 << i) & usable_selector_bitmask) == 0) + continue; + + // compute piecewise linear approximation of log2(a+eps)-log2(b+eps), for each component, then MSLE + double rd = q(pDecoded_half[i * 3 + 0]) - q(pDesired_half[0]); + double gd = q(pDecoded_half[i * 3 + 1]) - q(pDesired_half[1]); + double bd = q(pDecoded_half[i * 3 + 2]) - q(pDesired_half[2]); + + double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + + if (e < lowest_e) + { + lowest_e = e; + pWeights[p] = (uint8_t)i; + } + } + + total_error += lowest_e; + + } // p + + return total_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +double compute_block_error(const half_float* pOrig_block, const half_float* pPacked_block, const astc_hdr_codec_options& coptions) +{ + const float R_WEIGHT = coptions.m_r_err_scale; + const float G_WEIGHT = coptions.m_g_err_scale; + + double total_error = 0; + + for (uint32_t p = 0; p < 16; p++) + { + double rd = q(pOrig_block[p * 3 + 0]) - q(pPacked_block[p * 3 + 0]); + double gd = q(pOrig_block[p * 3 + 1]) - q(pPacked_block[p * 3 + 1]); + double bd = q(pOrig_block[p * 3 + 2]) - q(pPacked_block[p * 3 + 2]); + + double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + + total_error += e; + } + + return total_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static inline int compute_clamped_val(int v, int l, int h, bool& did_clamp, int& max_clamp_mag) +{ + assert(l < h); + + if (v < l) + { + max_clamp_mag = basisu::maximum<int>(max_clamp_mag, l - v); + + v = l; + did_clamp = true; + } + else if (v > h) + { + max_clamp_mag = basisu::maximum<int>(max_clamp_mag, v - h); + + v = h; + did_clamp = true; + } + + return v; +} + +static bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag) +{ + assert(submode <= 7); + + const uint8_t s_b_bits[8] = { 7, 8, 6, 7, 8, 6, 7, 6 }; + const uint8_t s_c_bits[8] = { 6, 6, 7, 7, 6, 7, 7, 7 }; + const uint8_t s_d_bits[8] = { 7, 6, 7, 6, 5, 6, 5, 6 }; + + const uint32_t a_bits = 9 + (submode >> 1); + const uint32_t b_bits = s_b_bits[submode]; + const uint32_t c_bits = s_c_bits[submode]; + const uint32_t d_bits = s_d_bits[submode]; + + const int max_a_val = (1 << a_bits) - 1; + const int max_b_val = (1 << b_bits) - 1; + const int max_c_val = (1 << c_bits) - 1; + + // The maximum usable value before it turns to NaN/Inf + const int max_a_qlog = get_max_qlog(a_bits); + + const int min_d_val = -(1 << (d_bits - 1)); + const int max_d_val = -min_d_val - 1; + assert((max_d_val - min_d_val + 1) == (1 << d_bits)); + + int val_q[2][3]; + + for (uint32_t c = 0; c < 3; c++) + { +#if 1 + // this is better + const half_float l = qlog16_to_half_slow((uint32_t)std::round(low_q16[c])); + val_q[0][c] = half_to_qlog7_12(l, a_bits); + + const half_float h = qlog16_to_half_slow((uint32_t)std::round(high_q16[c])); + val_q[1][c] = half_to_qlog7_12(h, a_bits); +#else + val_q[0][c] = quant_qlog16((uint32_t)std::round(low_q16[c]), a_bits); + val_q[1][c] = quant_qlog16((uint32_t)std::round(high_q16[c]), a_bits); +#endif + +#if 1 + if (val_q[0][c] == val_q[1][c]) + { +#if 0 + if (l <= h) +#else + if (low_q16[c] < high_q16[c]) +#endif + { + if (val_q[0][c]) + val_q[0][c]--; + + if (val_q[1][c] != max_a_val) + val_q[1][c]++; + } + else + { + if (val_q[0][c] != max_a_val) + val_q[0][c]++; + + if (val_q[1][c]) + val_q[1][c]--; + } + } +#endif + + val_q[0][c] = minimum<uint32_t>(val_q[0][c], max_a_qlog); + val_q[1][c] = minimum<uint32_t>(val_q[1][c], max_a_qlog); + } + + int highest_q = -1, highest_val = 0, highest_comp = 0; + + for (uint32_t v = 0; v < 2; v++) + { + for (uint32_t c = 0; c < 3; c++) + { + assert(val_q[v][c] >= 0 && val_q[v][c] <= max_a_val); + + if (val_q[v][c] > highest_q) + { + highest_q = val_q[v][c]; + highest_val = v; + highest_comp = c; + } + } + } + + const bool had_tie = (val_q[highest_val ^ 1][highest_comp] == highest_q); + + if (highest_val != 1) + { + for (uint32_t c = 0; c < 3; c++) + { + std::swap(val_q[0][c], val_q[1][c]); + } + } + + if (highest_comp) + { + std::swap(val_q[0][0], val_q[0][highest_comp]); + std::swap(val_q[1][0], val_q[1][highest_comp]); + } + + int orig_q[2][3]; + memcpy(orig_q, val_q, sizeof(val_q)); + + // val[1][0] is now guaranteed to be highest + int best_va = 0, best_vb0 = 0, best_vb1 = 0, best_vc = 0, best_vd0 = 0, best_vd1 = 0; + int best_max_clamp_mag = 0; + bool best_did_clamp = false; + int best_q[2][3] = { { 0, 0, 0}, { 0, 0, 0 } }; + BASISU_NOTE_UNUSED(best_q); + uint32_t best_dist = UINT_MAX; + + for (uint32_t pass = 0; pass < 2; pass++) + { + int trial_va = val_q[1][0]; + + assert(trial_va <= max_a_val); + assert(trial_va >= val_q[1][1]); + assert(trial_va >= val_q[1][2]); + + assert(trial_va >= val_q[0][0]); + assert(trial_va >= val_q[0][1]); + assert(trial_va >= val_q[0][2]); + + bool did_clamp = false; + int trial_max_clamp_mag = 0; + + int trial_vb0 = compute_clamped_val(trial_va - val_q[1][1], 0, max_b_val, did_clamp, trial_max_clamp_mag); + int trial_vb1 = compute_clamped_val(trial_va - val_q[1][2], 0, max_b_val, did_clamp, trial_max_clamp_mag); + int trial_vc = compute_clamped_val(trial_va - val_q[0][0], 0, max_c_val, did_clamp, trial_max_clamp_mag); + int trial_vd0 = compute_clamped_val((trial_va - trial_vb0 - trial_vc) - val_q[0][1], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag); + int trial_vd1 = compute_clamped_val((trial_va - trial_vb1 - trial_vc) - val_q[0][2], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag); + + if (!did_clamp) + { + // Make sure decoder gets the expected values + assert(trial_va == val_q[1][0]); + assert(trial_va - trial_vb0 == val_q[1][1]); + assert(trial_va - trial_vb1 == val_q[1][2]); + + assert((trial_va - trial_vc) == val_q[0][0]); + assert((trial_va - trial_vb0 - trial_vc - trial_vd0) == val_q[0][1]); + assert((trial_va - trial_vb1 - trial_vc - trial_vd1) == val_q[0][2]); + } + + const int r_e0 = clamp<int>(trial_va, 0, max_a_val); + const int r_e1 = clamp<int>(trial_va - trial_vb0, 0, max_a_val); + const int r_e2 = clamp<int>(trial_va - trial_vb1, 0, max_a_val); + + const int r_f0 = clamp<int>(trial_va - trial_vc, 0, max_a_val); + const int r_f1 = clamp<int>(trial_va - trial_vb0 - trial_vc - trial_vd0, 0, max_a_val); + const int r_f2 = clamp<int>(trial_va - trial_vb1 - trial_vc - trial_vd1, 0, max_a_val); + + assert(r_e0 <= max_a_qlog); + assert(r_e1 <= max_a_qlog); + assert(r_e2 <= max_a_qlog); + + assert(r_f0 <= max_a_qlog); + assert(r_f1 <= max_a_qlog); + assert(r_f2 <= max_a_qlog); + + if ((!did_clamp) || (!had_tie)) + { + best_va = trial_va; + best_vb0 = trial_vb0; + best_vb1 = trial_vb1; + best_vc = trial_vc; + best_vd0 = trial_vd0; + best_vd1 = trial_vd1; + best_max_clamp_mag = trial_max_clamp_mag; + best_did_clamp = did_clamp; + + best_q[1][0] = r_e0; + best_q[1][1] = r_e1; + best_q[1][2] = r_e2; + best_q[0][0] = r_f0; + best_q[0][1] = r_f1; + best_q[0][2] = r_f2; + break; + } + + // we had a tie and it did clamp, try swapping L/H for a potential slight gain + + const uint32_t r_dist1 = basisu::square<int>(r_e0 - val_q[1][0]) + basisu::square<int>(r_e1 - val_q[1][1]) + basisu::square<int>(r_e2 - val_q[1][2]); + const uint32_t r_dist0 = basisu::square<int>(r_f0 - val_q[0][0]) + basisu::square<int>(r_f1 - val_q[0][1]) + basisu::square<int>(r_f2 - val_q[0][2]); + + const uint32_t total_dist = r_dist1 + r_dist0; + + if (total_dist < best_dist) + { + best_dist = total_dist; + + best_va = trial_va; + best_vb0 = trial_vb0; + best_vb1 = trial_vb1; + best_vc = trial_vc; + best_vd0 = trial_vd0; + best_vd1 = trial_vd1; + best_did_clamp = did_clamp; + + best_q[1][0] = r_e0; + best_q[1][1] = r_e1; + best_q[1][2] = r_e2; + best_q[0][0] = r_f0; + best_q[0][1] = r_f1; + best_q[0][2] = r_f2; + } + + for (uint32_t c = 0; c < 3; c++) + std::swap(val_q[0][c], val_q[1][c]); + } + + // pack bits now + int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0; + + int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0; + switch (submode) + { + case 0: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 1: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 2: + x0 = get_bit(best_va, 9); x1 = get_bit(best_vc, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 3: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 9); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 4: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10); + break; + case 5: + x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_vc, 7); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 6: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10); + break; + case 7: + x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + default: + break; + } + + // write mode + pack_bit(v1, 7, submode, 0); + pack_bit(v2, 7, submode, 1); + pack_bit(v3, 7, submode, 2); + + // highest component + pack_bit(v4, 7, highest_comp, 0); + pack_bit(v5, 7, highest_comp, 1); + + // write bit 8 of va + pack_bit(v1, 6, best_va, 8); + + // extra bits + pack_bit(v2, 6, x0); + pack_bit(v3, 6, x1); + pack_bit(v4, 6, x2); + pack_bit(v5, 6, x3); + pack_bit(v4, 5, x4); + pack_bit(v5, 5, x5); + + v0 = best_va & 0xFF; + v1 |= (best_vc & 63); + v2 |= (best_vb0 & 63); + v3 |= (best_vb1 & 63); + v4 |= (best_vd0 & 31); + v5 |= (best_vd1 & 31); + + assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255)); + + pEndpoints[0] = (uint8_t)v0; + pEndpoints[1] = (uint8_t)v1; + pEndpoints[2] = (uint8_t)v2; + pEndpoints[3] = (uint8_t)v3; + pEndpoints[4] = (uint8_t)v4; + pEndpoints[5] = (uint8_t)v5; + +#ifdef _DEBUG + // Test for valid pack by unpacking + { + if (highest_comp) + { + std::swap(best_q[0][0], best_q[0][highest_comp]); + std::swap(best_q[1][0], best_q[1][highest_comp]); + + std::swap(orig_q[0][0], orig_q[0][highest_comp]); + std::swap(orig_q[1][0], orig_q[1][highest_comp]); + } + + int test_e[2][3]; + decode_mode11_to_qlog12(pEndpoints, test_e, astc_helpers::BISE_256_LEVELS); + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + assert(best_q[i][j] == test_e[i][j] >> (12 - a_bits)); + + if (!best_did_clamp) + { + assert((orig_q[i][j] == test_e[i][j] >> (12 - a_bits)) || + (orig_q[1 - i][j] == test_e[i][j] >> (12 - a_bits))); + } + } + } + } +#endif + + max_clamp_mag = best_max_clamp_mag; + + return best_did_clamp; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_astc_mode11_direct(uint8_t* pEndpoints, const vec3F& l_q16, const vec3F& h_q16) +{ + for (uint32_t i = 0; i < 3; i++) + { + // TODO: This goes from QLOG16->HALF->QLOG8/7 + half_float l_half = qlog16_to_half_slow(clamp((int)std::round(l_q16[i]), 0, 65535)); + half_float h_half = qlog16_to_half_slow(clamp((int)std::round(h_q16[i]), 0, 65535)); + + int l_q, h_q; + + if (i == 2) + { + l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)]; + h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)]; + + l_q = minimum<uint32_t>(l_q, MAX_QLOG7); + h_q = minimum<uint32_t>(h_q, MAX_QLOG7); + } + else + { + l_q = g_half_to_qlog8[bounds_check((uint32_t)l_half, 0U, 32768U)]; + h_q = g_half_to_qlog8[bounds_check((uint32_t)h_half, 0U, 32768U)]; + + l_q = minimum<uint32_t>(l_q, MAX_QLOG8); + h_q = minimum<uint32_t>(h_q, MAX_QLOG8); + } + +#if 1 + if (l_q == h_q) + { + const int m = (i == 2) ? MAX_QLOG7 : MAX_QLOG8; + + if (l_q16[i] <= h_q16[i]) + { + if (l_q) + l_q--; + + if (h_q != m) + h_q++; + } + else + { + if (h_q) + h_q--; + + if (l_q != m) + l_q++; + } + } +#endif + + if (i == 2) + { + assert(l_q <= (int)MAX_QLOG7 && h_q <= (int)MAX_QLOG7); + l_q |= 128; + h_q |= 128; + } + else + { + assert(l_q <= (int)MAX_QLOG8 && h_q <= (int)MAX_QLOG8); + } + + pEndpoints[2 * i + 0] = (uint8_t)l_q; + pEndpoints[2 * i + 1] = (uint8_t)h_q; + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range) +{ + assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + assert(submode <= 5); + max_clamp_mag = 0; + + static const uint8_t s_r_bits[6] = { 11, 11, 10, 9, 8, 7 }; + static const uint8_t s_g_b_bits[6] = { 5, 6, 5, 6, 7, 7 }; + static const uint8_t s_s_bits[6] = { 7, 5, 8, 7, 6, 7 }; + + // The precision of the components + const uint32_t prec_bits = s_r_bits[submode]; + + int qlog[4], pack_bits[4]; + + for (uint32_t i = 0; i < 4; i++) + { + const float f = (i == 3) ? s_q16 : rgb_q16[i]; + + // The # of bits the component is packed into + if (i == 0) + pack_bits[i] = s_r_bits[submode]; + else if (i == 3) + pack_bits[i] = s_s_bits[submode]; + else + pack_bits[i] = s_g_b_bits[submode]; + +#if 0 + // this is slightly worse + // TODO: going from qlog16 to half loses some precision. Then going from half to qlog 7-12 will have extra error. + half_float h = qlog_to_half(clamp((int)std::round(f), 0, MAX_QLOG16), 16); + qlog[i] = half_to_qlog7_12((half_float)bounds_check((uint32_t)h, 0U, 32768U), prec_bits); +#else + qlog[i] = quant_qlog16(clamp<int>((int)std::round(f), 0, MAX_QLOG16), prec_bits); + + // Only bias if there are enough texel weights, 4=6 weights + if (ise_weight_range >= 4) + { + // Explictly bias the high color, and the scale up, to better exploit the weights. + // The quantized range also then encompases the complete input range. + const uint32_t max_val = (1 << prec_bits) - 1; + const uint32_t K = 3; + if (i == 3) + { + qlog[i] = minimum<uint32_t>(qlog[i] + K * 2, max_val); + } + else + { + qlog[i] = minimum<uint32_t>(qlog[i] + K, max_val); + } + } +#endif + + if (i != 3) + qlog[i] = minimum<uint32_t>(qlog[i], get_max_qlog(prec_bits)); + + // If S=0, we lose freedom for the texel weights to add any value. + if ((i == 3) && (qlog[i] == 0)) + qlog[i] = 1; + } + + uint32_t maj_index = 0; + + bool did_clamp = false; + + if (submode != 5) + { + int largest_qlog = 0; + for (uint32_t i = 0; i < 3; i++) + { + if (qlog[i] > largest_qlog) + { + largest_qlog = qlog[i]; + maj_index = i; + } + } + + if (maj_index) + { + std::swap(qlog[0], qlog[maj_index]); + } + + assert(qlog[0] >= qlog[1]); + assert(qlog[0] >= qlog[2]); + + qlog[1] = qlog[0] - qlog[1]; + qlog[2] = qlog[0] - qlog[2]; + + for (uint32_t i = 1; i < 4; i++) + { + const int max_val = (1 << pack_bits[i]) - 1; + + if (qlog[i] > max_val) + { + max_clamp_mag = maximum<int>(max_clamp_mag, qlog[i] - max_val); + qlog[i] = max_val; + did_clamp = true; + } + } + } + + for (uint32_t i = 0; i < 4; i++) + { + const int max_val = (1 << pack_bits[i]) - 1; (void)max_val; + + assert(qlog[i] <= max_val); + } + + int mode = 0; + + int r = qlog[0] & 63; // 6-bits + int g = qlog[1] & 31; // 5-bits + int b = qlog[2] & 31; // 5-bits + int s = qlog[3] & 31; // 5-bits + + int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0; + + switch (submode) + { + case 0: + { + mode = (maj_index << 2) | 0; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 9); // R9 + x1 = get_bit(qlog[0], 8); // R8 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[0], 10); // R10 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 1: + { + mode = (maj_index << 2) | 1; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 8); // R8 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[0], 10); // R10 + x6 = get_bit(qlog[0], 9); // R9 + break; + } + case 2: + { + mode = (maj_index << 2) | 2; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 9); // R9 + x1 = get_bit(qlog[0], 8); // R8 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[0], 6); // R6 + x4 = get_bit(qlog[3], 7); // S7 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 3: + { + mode = (maj_index << 2) | 3; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 8); // R8 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 4: + { + mode = maj_index | 0xC; // 0b1100 + assert((mode & 0xC) == 0xC); + assert(mode != 0xF); + + x0 = get_bit(qlog[1], 6); // G6 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[2], 6); // B6 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[0], 7); // R7 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 5: + { + mode = 0xF; + + x0 = get_bit(qlog[1], 6); // G6 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[2], 6); // B6 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + default: + { + assert(0); + break; + } + } + + pEndpoints[0] = (uint8_t)((get_bit(mode, 1) << 7) | (get_bit(mode, 0) << 6) | r); + pEndpoints[1] = (uint8_t)((get_bit(mode, 2) << 7) | (x0 << 6) | (x1 << 5) | g); + pEndpoints[2] = (uint8_t)((get_bit(mode, 3) << 7) | (x2 << 6) | (x3 << 5) | b); + pEndpoints[3] = (uint8_t)((x4 << 7) | (x5 << 6) | (x6 << 5) | s); + +#ifdef _DEBUG + // Test for valid pack by unpacking + { + const int inv_shift = 12 - prec_bits; + + int unpacked_e[2][3]; + if (submode != 5) + { + unpacked_e[1][0] = left_shift32(qlog[0], inv_shift); + unpacked_e[1][1] = clamp(left_shift32((qlog[0] - qlog[1]), inv_shift), 0, 0xFFF); + unpacked_e[1][2] = clamp(left_shift32((qlog[0] - qlog[2]), inv_shift), 0, 0xFFF); + + unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][1] = clamp(left_shift32(((qlog[0] - qlog[1]) - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][2] = clamp(left_shift32(((qlog[0] - qlog[2]) - qlog[3]), inv_shift), 0, 0xFFF); + } + else + { + unpacked_e[1][0] = left_shift32(qlog[0], inv_shift); + unpacked_e[1][1] = left_shift32(qlog[1], inv_shift); + unpacked_e[1][2] = left_shift32(qlog[2], inv_shift); + + unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][1] = clamp(left_shift32((qlog[1] - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][2] = clamp(left_shift32((qlog[2] - qlog[3]), inv_shift), 0, 0xFFF); + } + + if (maj_index) + { + std::swap(unpacked_e[0][0], unpacked_e[0][maj_index]); + std::swap(unpacked_e[1][0], unpacked_e[1][maj_index]); + } + + int e[2][3]; + decode_mode7_to_qlog12_ise20(pEndpoints, e, nullptr); + + for (uint32_t i = 0; i < 3; i++) + { + assert(unpacked_e[0][i] == e[0][i]); + assert(unpacked_e[1][i] == e[1][i]); + } + } +#endif + + return did_clamp; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void quantize_ise_endpoints(uint32_t ise_endpoint_range, const uint8_t* pSrc_endpoints, uint8_t *pDst_endpoints, uint32_t n) +{ + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + + if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + memcpy(pDst_endpoints, pSrc_endpoints, n); + } + else + { + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = pSrc_endpoints[i]; + assert(v <= 255); + + pDst_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_val_to_ise[v]; + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +// Note this could fail to find any valid solution if use_endpoint_range!=20. +// Returns true if improved. +static bool try_mode11(uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + vec3F& low_color_q16, const vec3F& high_color_q16, + half_float block_pixels_half[16][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_options& coptions, bool direct_only, uint32_t ise_endpoint_range, + bool constrain_ise_weight8_selectors, + int32_t first_submode, int32_t last_submode) // -1, 7 +{ + assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((num_weight_levels >= 3) && (num_weight_levels <= 32)); + assert((num_pixels >= 1) && (num_pixels <= 16)); + + bool improved_flag = false; + + half_float decoded_half[32][3]; + vec3F decoded_float[32]; + uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16]; + + if (direct_only) + { + first_submode = -1; + last_submode = -1; + } + + assert(first_submode <= last_submode); + assert((first_submode >= -1) && (first_submode <= 7)); + assert((last_submode >= -1) && (last_submode <= 7)); + + // TODO: First determine if a submode doesn't clamp first. If one is found, encode to that and we're done. + for (int submode = last_submode; submode >= first_submode; submode--) + { + bool did_clamp = false; + int max_clamp_mag = 0; + if (submode == -1) + { + // If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision. + pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16); + } + else + { + did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag); + + // If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts. + const int MAX_CLAMP_MAG_ACCEPT_THRESH = 4; + if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH)) + continue; + } + + // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20). + // It could massively distort the endpoints, but still result in a valid encoding. + quantize_ise_endpoints(ise_endpoint_range, orig_trial_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS); + + if (!get_astc_hdr_mode_11_block_colors(trial_endpoints, &decoded_half[0][0], decoded_float, num_weight_levels, ise_weight_range, ise_endpoint_range)) + continue; + + uint32_t usable_selector_bitmask = UINT32_MAX; + if ((constrain_ise_weight8_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS)) + usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15); + + double trial_blk_error = eval_selectors(num_pixels, trial_weights, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask); + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(pEndpoints, trial_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(pWeights, trial_weights, num_pixels); + submode_used = submode + 1; + improved_flag = true; + } + + // If it didn't clamp it was a lossless encode at this precision, so we can stop early as there's probably no use trying lower precision submodes. + // (Although it may be, because a lower precision pack could try nearby voxel coords.) + // However, at lower levels quantization may cause the decoded endpoints to be very distorted, so we need to evaluate up to direct. + if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + if (!did_clamp) + break; + } + } + + return improved_flag; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static bool try_mode7( + uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + vec3F& high_color_q16, const float s_q16, + half_float block_pixels_half[16][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_options& coptions, + uint32_t ise_endpoint_range) +{ + assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((num_pixels >= 1) && (num_pixels <= 16)); + + bool improved_flag = false; + + half_float decoded_half[24][3]; + vec3F decoded_float[24]; + + uint8_t orig_trial_endpoints[NUM_MODE7_ENDPOINTS], trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16]; + + // TODO: First determine if a submode doesn't clamp first. If one is found, encode to that and we're done. + for (int submode = 0; submode <= 5; submode++) + { + int max_clamp_mag = 0; + const bool did_clamp = pack_astc_mode7_submode(submode, orig_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range); + + if (submode < 5) + { + const int MAX_CLAMP_MAG_ACCEPT_THRESH = 4; + if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH)) + continue; + } + + // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20). + // It could massively distort the endpoints, but still result in a valid encoding. + quantize_ise_endpoints(ise_endpoint_range, orig_trial_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS); + + if (!get_astc_hdr_mode_7_block_colors(trial_endpoints, &decoded_half[0][0], decoded_float, num_weight_levels, ise_weight_range, ise_endpoint_range)) + continue; + + double trial_blk_error = eval_selectors(num_pixels, trial_weights, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions); + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(pEndpoints, trial_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(pWeights, trial_weights, num_pixels); + submode_used = submode; + improved_flag = true; + } + + if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + if (!did_clamp) + break; + } + } + + return improved_flag; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static double encode_astc_hdr_block_mode_11( + uint32_t num_pixels, + const vec4F* pBlock_pixels, + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, uint8_t* blk_weights, + const astc_hdr_codec_options& coptions, + bool direct_only, + uint32_t ise_endpoint_range, + bool uber_mode, + bool constrain_ise_weight8_selectors, + int32_t first_submode, int32_t last_submode) +{ + assert((ise_weight_range >= 1) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert((num_pixels >= 1) && (num_pixels <= 16)); + + best_submode = 0; + + half_float block_pixels_half[16][3]; + vec4F block_pixels_q16[16]; + + // TODO: This is done redundantly. + for (uint32_t i = 0; i < num_pixels; i++) + { + block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]); + block_pixels_q16[i][0] = (float)half_to_qlog16(block_pixels_half[i][0]); + + block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]); + block_pixels_q16[i][1] = (float)half_to_qlog16(block_pixels_half[i][1]); + + block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]); + block_pixels_q16[i][2] = (float)half_to_qlog16(block_pixels_half[i][2]); + + block_pixels_q16[i][3] = 0.0f; + } + + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range); + + // TODO: should match MAX_SUPPORTED_ISE_WEIGHT_INDEX + const uint32_t MAX_WEIGHT_LEVELS = 32; + (void)MAX_WEIGHT_LEVELS; + assert(num_weight_levels <= MAX_WEIGHT_LEVELS); + + vec3F block_mean_color_q16(calc_mean(num_pixels, block_pixels_q16)); + vec3F block_axis_q16(calc_rgb_pca(num_pixels, block_pixels_q16, block_mean_color_q16)); + + aabb3F color_box_q16(cInitExpand); + + float l = 1e+30f, h = -1e+30f; + vec3F low_color_q16, high_color_q16; + + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(block_pixels_q16[i]); + + vec3F k(vec3F(block_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + if (kd < l) + { + l = kd; + low_color_q16 = block_pixels_q16[i]; + } + + if (kd > h) + { + h = kd; + high_color_q16 = block_pixels_q16[i]; + } + } + + vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16); + for (uint32_t i = 0; i < 3; i++) + { + low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f); + high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f); + } + + uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS]; + uint8_t trial_blk_weights[16]; + uint32_t trial_best_submode = 0; + + clear_obj(trial_blk_endpoints); + clear_obj(trial_blk_weights); + + double trial_blk_error = 1e+30f; + + bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode, + low_color_q16, high_color_q16, + block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors, + first_submode, last_submode); + + // If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do. + if (!did_improve) + return cur_block_error; + + // Did the solution improve? + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(blk_weights, trial_blk_weights, num_pixels); + best_submode = trial_best_submode; + } + +#define USE_LEAST_SQUARES (1) +#if USE_LEAST_SQUARES + // least squares on the most promising trial weight indices found + const uint32_t NUM_LS_PASSES = 3; + + for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++) + { + vec3F l_q16, h_q16; + if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16)) + break; + + bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors, + first_submode, last_submode); + + if (!was_improved) + break; + + // It's improved, so let's take the new weight indices. + memcpy(trial_blk_weights, blk_weights, num_pixels); + + } // pass +#endif + + if (uber_mode) + { + // Try varying the current best weight indices. This can be expanded/improved, but at potentially great cost. + + uint8_t temp_astc_weights[16]; + memcpy(temp_astc_weights, trial_blk_weights, num_pixels); + + uint32_t min_lin_sel = 256, max_lin_sel = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t astc_sel = temp_astc_weights[i]; + + const uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + assert(lin_sel < num_weight_levels); + + min_lin_sel = minimumu(min_lin_sel, lin_sel); + max_lin_sel = maximumu(max_lin_sel, lin_sel); + } + + bool was_improved = false; + (void)was_improved; + + { + bool weights_changed = false; + uint8_t trial_weights[16]; + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = temp_astc_weights[i]; + uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + + if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1))) + { + lin_sel++; + weights_changed = true; + } + + trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel]; + } + + if (weights_changed) + { + vec3F l_q16, h_q16; + if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16)) + { + if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors, + first_submode, last_submode)) + { + was_improved = true; + } + } + } + } + + { + bool weights_changed = false; + uint8_t trial_weights[16]; + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = temp_astc_weights[i]; + uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + + if ((lin_sel == max_lin_sel) && (lin_sel > 0)) + { + lin_sel--; + weights_changed = true; + } + + trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel]; + } + + if (weights_changed) + { + vec3F l_q16, h_q16; + if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16)) + { + if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors, + first_submode, last_submode)) + { + was_improved = true; + } + } + } + } + + { + bool weights_changed = false; + uint8_t trial_weights[16]; + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = temp_astc_weights[i]; + uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + + if ((lin_sel == max_lin_sel) && (lin_sel > 0)) + { + lin_sel--; + weights_changed = true; + } + else if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1))) + { + lin_sel++; + weights_changed = true; + } + + trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel]; + } + + if (weights_changed) + { + vec3F l_q16, h_q16; + if (compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, block_pixels_q16, color_box_q16)) + { + if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + block_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight8_selectors, + first_submode, last_submode)) + { + was_improved = true; + } + } + } + } + } // uber_mode + + return cur_block_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static double encode_astc_hdr_block_mode_7( + uint32_t num_pixels, const vec4F* pBlock_pixels, + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, //[4] + uint8_t* blk_weights, // [num_pixels] + const astc_hdr_codec_options& coptions, + uint32_t ise_endpoint_range) +{ + assert((num_pixels >= 1) && (num_pixels <= 16)); + assert((ise_weight_range >= 1) && (ise_weight_range <= 10)); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range); + + const uint32_t MAX_WEIGHT_LEVELS = 24; + assert(num_weight_levels <= MAX_WEIGHT_LEVELS); + BASISU_NOTE_UNUSED(MAX_WEIGHT_LEVELS); + + best_submode = 0; + + half_float block_pixels_half[16][3]; + + vec4F block_pixels_q16[16]; + for (uint32_t i = 0; i < num_pixels; i++) + { + block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]); + block_pixels_q16[i][0] = (float)half_to_qlog16(block_pixels_half[i][0]); + + block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]); + block_pixels_q16[i][1] = (float)half_to_qlog16(block_pixels_half[i][1]); + + block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]); + block_pixels_q16[i][2] = (float)half_to_qlog16(block_pixels_half[i][2]); + + block_pixels_q16[i][3] = 0.0f; + } + + vec3F block_mean_color_q16(calc_mean(num_pixels, block_pixels_q16)); + + vec3F block_axis_q16(0.577350259f); + + aabb3F color_box_q16(cInitExpand); + + float l = 1e+30f, h = -1e+30f; + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(block_pixels_q16[i]); + + vec3F k(vec3F(block_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + l = basisu::minimum<float>(l, kd); + h = basisu::maximum<float>(h, kd); + } + + vec3F low_color_q16(interp_color(block_mean_color_q16, block_axis_q16, l, color_box_q16, color_box_q16)); + vec3F high_color_q16(interp_color(block_mean_color_q16, block_axis_q16, h, color_box_q16, color_box_q16)); + + low_color_q16.clamp(0.0f, MAX_QLOG16_VAL); + high_color_q16.clamp(0.0f, MAX_QLOG16_VAL); + + vec3F diff(high_color_q16 - low_color_q16); + float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0]; + + uint8_t trial_blk_endpoints[NUM_MODE7_ENDPOINTS]; + uint8_t trial_blk_weights[16]; + uint32_t trial_best_submode = 0; + + clear_obj(trial_blk_endpoints); + clear_obj(trial_blk_weights); + + double trial_blk_error = 1e+30f; + + bool did_improve = try_mode7(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode, + high_color_q16, ceilf(s_q16), + block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range); + + // If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do. + if (!did_improve) + { + return cur_block_error; + } + + // Did the solution improve? + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(blk_weights, trial_blk_weights, num_pixels); + best_submode = trial_best_submode; + } + + const float one_over_num_pixels = 1.0f / (float)num_pixels; + + const uint32_t NUM_TRIALS = 2; + for (uint32_t trial = 0; trial < NUM_TRIALS; trial++) + { + // Given a set of selectors and S, try to compute a better high color + vec3F new_high_color_q16(block_mean_color_q16); + + int e[2][3]; + int cur_s = 0; + if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, &cur_s, ise_endpoint_range)) + break; + + cur_s <<= 4; + + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = trial_blk_weights[i]; + float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f); + + float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels; + new_high_color_q16[0] += k; + new_high_color_q16[1] += k; + new_high_color_q16[2] += k; + } + + bool improved = try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + new_high_color_q16, (float)cur_s, + block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range); + + if (improved) + { + memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(trial_blk_weights, blk_weights, num_pixels); + } + + // Given a set of selectors and a high color, try to compute a better S. + float t = 0.0f; + + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = trial_blk_weights[i]; + float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f); + + t += (1.0f) - lerp; + } + + t *= one_over_num_pixels; + + //int e[2][3]; + if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, nullptr, ise_endpoint_range)) + break; + + vec3F cur_h_q16((float)(e[1][0] << 4), (float)(e[1][1] << 4), (float)(e[1][2] << 4)); + + if (fabs(t) > .0000125f) + { + float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t; + float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t; + float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t; + + // TODO: gather statistics on these + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf(s_r), + block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range)) + { + improved = true; + } + + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf(s_g), + block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range)) + { + improved = true; + } + + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf(s_b), + block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range)) + { + improved = true; + } + + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf((s_r + s_g + s_b) / 3.0f), + block_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range)) + { + improved = true; + } + } + + if (!improved) + break; + + memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(trial_blk_weights, blk_weights, num_pixels); + + } // trial + + return cur_block_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static bool pack_solid(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions) +{ + float r = 0.0f, g = 0.0f, b = 0.0f; + + const float LOG_BIAS = .125f; + + bool solid_block = true; + for (uint32_t i = 0; i < 16; i++) + { + if ((pBlock_linear_colors[0][0] != pBlock_linear_colors[i][0]) || + (pBlock_linear_colors[0][1] != pBlock_linear_colors[i][1]) || + (pBlock_linear_colors[0][2] != pBlock_linear_colors[i][2])) + { + solid_block = false; + } + + r += log2f(pBlock_linear_colors[i][0] + LOG_BIAS); + g += log2f(pBlock_linear_colors[i][1] + LOG_BIAS); + b += log2f(pBlock_linear_colors[i][2] + LOG_BIAS); + } + + if (solid_block) + { + r = pBlock_linear_colors[0][0]; + g = pBlock_linear_colors[0][1]; + b = pBlock_linear_colors[0][2]; + } + else + { + r = maximum<float>(0.0f, powf(2.0f, r * (1.0f / 16.0f)) - LOG_BIAS); + g = maximum<float>(0.0f, powf(2.0f, g * (1.0f / 16.0f)) - LOG_BIAS); + b = maximum<float>(0.0f, powf(2.0f, b * (1.0f / 16.0f)) - LOG_BIAS); + + // for safety + r = minimum<float>(r, MAX_HALF_FLOAT); + g = minimum<float>(g, MAX_HALF_FLOAT); + b = minimum<float>(b, MAX_HALF_FLOAT); + } + + half_float rh = float_to_half_non_neg_no_nan_inf(r), gh = float_to_half_non_neg_no_nan_inf(g), bh = float_to_half_non_neg_no_nan_inf(b), ah = float_to_half_non_neg_no_nan_inf(1.0f); + + astc_hdr_pack_results results; + results.clear(); + + uint8_t* packed_blk = (uint8_t*)&results.m_solid_blk; + results.m_is_solid = true; + + packed_blk[0] = 0b11111100; + packed_blk[1] = 255; + packed_blk[2] = 255; + packed_blk[3] = 255; + packed_blk[4] = 255; + packed_blk[5] = 255; + packed_blk[6] = 255; + packed_blk[7] = 255; + + packed_blk[8] = (uint8_t)rh; + packed_blk[9] = (uint8_t)(rh >> 8); + packed_blk[10] = (uint8_t)gh; + packed_blk[11] = (uint8_t)(gh >> 8); + packed_blk[12] = (uint8_t)bh; + packed_blk[13] = (uint8_t)(bh >> 8); + packed_blk[14] = (uint8_t)ah; + packed_blk[15] = (uint8_t)(ah >> 8); + + results.m_best_block_error = 0; + + if (!solid_block) + { + const float R_WEIGHT = coptions.m_r_err_scale; + const float G_WEIGHT = coptions.m_g_err_scale; + + // This MUST match how errors are computed in eval_selectors(). + for (uint32_t i = 0; i < 16; i++) + { + half_float dr = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]), dg = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]), db = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]); + double rd = q(rh) - q(dr); + double gd = q(gh) - q(dg); + double bd = q(bh) - q(db); + + double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + + results.m_best_block_error += e; + } + } + + const half_float hc[3] = { rh, gh, bh }; + + bc6h_enc_block_solid_color(&results.m_bc6h_block, hc); + + all_results.push_back(results); + + return solid_block; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode11( + const vec4F* pBlock_linear_colors, + basisu::vector<astc_hdr_pack_results>& all_results, + const astc_hdr_codec_options& coptions, + uint32_t first_weight_ise_range, uint32_t last_weight_ise_range, bool constrain_ise_weight8_selectors) +{ + uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16]; + uint32_t trial_submode11 = 0; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + + for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++) + { + const bool direct_only = coptions.m_mode11_direct_only; + + uint32_t endpoint_ise_range = astc_helpers::BISE_256_LEVELS; + if (weight_ise_range == astc_helpers::BISE_16_LEVELS) + endpoint_ise_range = astc_helpers::BISE_192_LEVELS; + else + { + assert(weight_ise_range < astc_helpers::BISE_16_LEVELS); + } + + double trial_error = encode_astc_hdr_block_mode_11(16, pBlock_linear_colors, weight_ise_range, trial_submode11, 1e+30f, trial_endpoints, trial_weights, coptions, direct_only, + endpoint_ise_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, constrain_ise_weight8_selectors, coptions.m_first_mode11_submode, coptions.m_last_mode11_submode); + + if (trial_error < 1e+30f) + { + astc_hdr_pack_results results; + results.clear(); + + results.m_best_block_error = trial_error; + + results.m_best_submodes[0] = trial_submode11; + results.m_constrained_weights = constrain_ise_weight8_selectors; + + results.m_best_blk.m_num_partitions = 1; + results.m_best_blk.m_color_endpoint_modes[0] = 11; + results.m_best_blk.m_weight_ise_range = weight_ise_range; + results.m_best_blk.m_endpoint_ise_range = endpoint_ise_range; + + memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(results.m_best_blk.m_weights, trial_weights, 16); + +#ifdef _DEBUG + { + half_float block_pixels_half[16][3]; + + vec4F block_pixels_q16[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]); + block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]); + block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]); + } + + half_float unpacked_astc_blk_rgba[4][4][4]; + bool res = astc_helpers::decode_block(results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16); + assert(res); + + half_float unpacked_astc_blk_rgb[4][4][3]; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + for (uint32_t c = 0; c < 3; c++) + unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c]; + + double cmp_err = compute_block_error(&block_pixels_half[0][0], &unpacked_astc_blk_rgb[0][0][0], coptions); + assert(results.m_best_block_error == cmp_err); + } +#endif + + // transcode to BC6H + assert(results.m_best_blk.m_color_endpoint_modes[0] == 11); + + // Get qlog12 endpoints + int e[2][3]; + bool success = decode_mode11_to_qlog12(results.m_best_blk.m_endpoints, e, results.m_best_blk.m_endpoint_ise_range); + assert(success); + BASISU_NOTE_UNUSED(success); + + // Transform endpoints to half float + half_float h_e[3][2] = + { + { qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) }, + { qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) }, + { qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) } + }; + + // Transcode to bc6h + success = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block); + assert(success); + + all_results.push_back(results); + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode7_single_part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions) +{ + uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16]; + uint32_t trial_submode7 = 0; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + + for (uint32_t weight_ise_range = coptions.m_first_mode7_part1_weight_ise_range; weight_ise_range <= coptions.m_last_mode7_part1_weight_ise_range; weight_ise_range++) + { + const uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS; + + double trial_error = encode_astc_hdr_block_mode_7(16, pBlock_linear_colors, weight_ise_range, trial_submode7, 1e+30f, trial_endpoints, trial_weights, coptions, ise_endpoint_range); + + if (trial_error < 1e+30f) + { + astc_hdr_pack_results results; + results.clear(); + + results.m_best_block_error = trial_error; + + results.m_best_submodes[0] = trial_submode7; + + results.m_best_blk.m_num_partitions = 1; + results.m_best_blk.m_color_endpoint_modes[0] = 7; + results.m_best_blk.m_weight_ise_range = weight_ise_range; + results.m_best_blk.m_endpoint_ise_range = ise_endpoint_range; + + memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(results.m_best_blk.m_weights, trial_weights, 16); + + // transcode to BC6H + assert(results.m_best_blk.m_color_endpoint_modes[0] == 7); + + // Get qlog12 endpoints + int e[2][3]; + if (!decode_mode7_to_qlog12(results.m_best_blk.m_endpoints, e, nullptr, results.m_best_blk.m_endpoint_ise_range)) + continue; + + // Transform endpoints to half float + half_float h_e[3][2] = + { + { qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) }, + { qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) }, + { qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) } + }; + + // Transcode to bc6h + bool status = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block); + assert(status); + (void)status; + + all_results.push_back(results); + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static bool estimate_partition2(const vec4F* pBlock_pixels, int* pBest_parts, uint32_t num_best_parts) +{ + assert(num_best_parts <= basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + vec3F training_vecs[16], mean(0.0f); + + for (uint32_t i = 0; i < 16; i++) + { + vec3F& v = training_vecs[i]; + + v[0] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][0]); + v[1] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][1]); + v[2] = (float)float_to_half_non_neg_no_nan_inf(pBlock_pixels[i][2]); + + mean += v; + } + mean *= (1.0f / 16.0f); + + vec3F cluster_centroids[2] = { mean - vec3F(.1f), mean + vec3F(.1f) }; + + uint32_t cluster_pixels[2][16]; + uint32_t num_cluster_pixels[2]; + vec3F new_cluster_means[2]; + + for (uint32_t s = 0; s < 4; s++) + { + num_cluster_pixels[0] = 0; + num_cluster_pixels[1] = 0; + + new_cluster_means[0].clear(); + new_cluster_means[1].clear(); + + for (uint32_t i = 0; i < 16; i++) + { + float d0 = training_vecs[i].squared_distance(cluster_centroids[0]); + float d1 = training_vecs[i].squared_distance(cluster_centroids[1]); + + if (d0 < d1) + { + cluster_pixels[0][num_cluster_pixels[0]] = i; + new_cluster_means[0] += training_vecs[i]; + num_cluster_pixels[0]++; + } + else + { + cluster_pixels[1][num_cluster_pixels[1]] = i; + new_cluster_means[1] += training_vecs[i]; + num_cluster_pixels[1]++; + } + } + + if (!num_cluster_pixels[0] || !num_cluster_pixels[1]) + return false; + + cluster_centroids[0] = new_cluster_means[0] / (float)num_cluster_pixels[0]; + cluster_centroids[1] = new_cluster_means[1] / (float)num_cluster_pixels[1]; + } + + int desired_parts[4][4]; // [y][x] + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < num_cluster_pixels[p]; i++) + { + const uint32_t pix_index = cluster_pixels[p][i]; + + desired_parts[pix_index >> 2][pix_index & 3] = p; + } + } + + uint32_t part_similarity[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2]; + + for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; part_index++) + { + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + + int total_sim_non_inv = 0; + int total_sim_inv = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + if (part == desired_parts[y][x]) + total_sim_non_inv++; + + if ((part ^ 1) == desired_parts[y][x]) + total_sim_inv++; + } + } + + int total_sim = maximum(total_sim_non_inv, total_sim_inv); + + part_similarity[part_index] = (total_sim << 8) | part_index; + + } // part_index; + + std::sort(part_similarity, part_similarity + basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + for (uint32_t i = 0; i < num_best_parts; i++) + pBest_parts[i] = part_similarity[(basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 - 1) - i] & 0xFF; + + return true; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode7_2part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions, + int num_estimated_partitions, const int *pEstimated_partitions, + uint32_t first_weight_ise_range, uint32_t last_weight_ise_range) +{ + assert(coptions.m_mode7_part2_part_masks); + + astc_helpers::log_astc_block trial_blk; + clear_obj(trial_blk); + trial_blk.m_grid_width = 4; + trial_blk.m_grid_height = 4; + + trial_blk.m_num_partitions = 2; + trial_blk.m_color_endpoint_modes[0] = 7; + trial_blk.m_color_endpoint_modes[1] = 7; + + uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; + + if (num_estimated_partitions) + { + first_part_index = 0; + last_part_index = num_estimated_partitions; + } + + for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter) + { + uint32_t part_index; + if (num_estimated_partitions) + { + part_index = pEstimated_partitions[part_index_iter]; + assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + } + else + { + part_index = part_index_iter; + if (((1U << part_index) & coptions.m_mode7_part2_part_masks) == 0) + continue; + } + + const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert; + + vec4F part_pixels[2][16]; + uint32_t pixel_part_index[4][4]; // [y][x] + uint32_t num_part_pixels[2] = { 0, 0 }; + + // Extract each subset's texels for this partition pattern + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + if (invert_flag) + part = 1 - part; + + pixel_part_index[y][x] = part; + part_pixels[part][num_part_pixels[part]] = pBlock_linear_colors[x + y * 4]; + + num_part_pixels[part]++; + } + } + + trial_blk.m_partition_id = astc_pattern; + + for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++) + { + assert(weight_ise_range <= astc_helpers::BISE_8_LEVELS); + + uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS; + if (weight_ise_range == astc_helpers::BISE_5_LEVELS) + ise_endpoint_range = astc_helpers::BISE_192_LEVELS; + else if (weight_ise_range == astc_helpers::BISE_6_LEVELS) + ise_endpoint_range = astc_helpers::BISE_128_LEVELS; + else if (weight_ise_range == astc_helpers::BISE_8_LEVELS) + ise_endpoint_range = astc_helpers::BISE_80_LEVELS; + + uint8_t trial_endpoints[2][NUM_MODE7_ENDPOINTS], trial_weights[2][16]; + uint32_t trial_submode7[2]; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + clear_obj(trial_submode7); + + double total_trial_err = 0; + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + { + total_trial_err += encode_astc_hdr_block_mode_7( + num_part_pixels[pack_part_index], &part_pixels[pack_part_index][0], + weight_ise_range, trial_submode7[pack_part_index], 1e+30f, + &trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions, ise_endpoint_range); + + } // pack_part_index + + if (total_trial_err < 1e+30f) + { + trial_blk.m_weight_ise_range = weight_ise_range; + trial_blk.m_endpoint_ise_range = ise_endpoint_range; + + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE7_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE7_ENDPOINTS); + + uint32_t src_pixel_index[2] = { 0, 0 }; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t p = pixel_part_index[y][x]; + trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++]; + } + } + + astc_hdr_pack_results results; + results.clear(); + + results.m_best_block_error = total_trial_err; + results.m_best_submodes[0] = trial_submode7[0]; + results.m_best_submodes[1] = trial_submode7[1]; + results.m_best_pat_index = part_index; + + results.m_best_blk = trial_blk; + + bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block); + assert(status); + BASISU_NOTE_UNUSED(status); + + all_results.push_back(results); + } + + } // weight_ise_range + + } // part_index +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode11_2part(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_pack_results>& all_results, const astc_hdr_codec_options& coptions, + int num_estimated_partitions, const int* pEstimated_partitions) +{ + assert(coptions.m_mode11_part2_part_masks); + + astc_helpers::log_astc_block trial_blk; + clear_obj(trial_blk); + trial_blk.m_grid_width = 4; + trial_blk.m_grid_height = 4; + + trial_blk.m_num_partitions = 2; + trial_blk.m_color_endpoint_modes[0] = 11; + trial_blk.m_color_endpoint_modes[1] = 11; + + uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; + + if (num_estimated_partitions) + { + first_part_index = 0; + last_part_index = num_estimated_partitions; + } + + for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter) + { + uint32_t part_index; + if (num_estimated_partitions) + { + part_index = pEstimated_partitions[part_index_iter]; + assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + } + else + { + part_index = part_index_iter; + if (((1U << part_index) & coptions.m_mode11_part2_part_masks) == 0) + continue; + } + + const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert; + + vec4F part_pixels[2][16]; + uint32_t pixel_part_index[4][4]; // [y][x] + uint32_t num_part_pixels[2] = { 0, 0 }; + + // Extract each subset's texels for this partition pattern + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + if (invert_flag) + part = 1 - part; + + pixel_part_index[y][x] = part; + part_pixels[part][num_part_pixels[part]] = pBlock_linear_colors[x + y * 4]; + + num_part_pixels[part]++; + } + } + + trial_blk.m_partition_id = astc_pattern; + + for (uint32_t weight_ise_range = coptions.m_first_mode11_part2_weight_ise_range; weight_ise_range <= coptions.m_last_mode11_part2_weight_ise_range; weight_ise_range++) + { + bool direct_only = false; + uint32_t ise_endpoint_range = astc_helpers::BISE_64_LEVELS; + if (weight_ise_range == astc_helpers::BISE_4_LEVELS) + ise_endpoint_range = astc_helpers::BISE_40_LEVELS; + + uint8_t trial_endpoints[2][NUM_MODE11_ENDPOINTS], trial_weights[2][16]; + uint32_t trial_submode11[2]; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + clear_obj(trial_submode11); + + double total_trial_err = 0; + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + { + total_trial_err += encode_astc_hdr_block_mode_11( + num_part_pixels[pack_part_index], &part_pixels[pack_part_index][0], + weight_ise_range, trial_submode11[pack_part_index], 1e+30f, + &trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions, + direct_only, ise_endpoint_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, false, + coptions.m_first_mode11_submode, coptions.m_last_mode11_submode); + + } // pack_part_index + + if (total_trial_err < 1e+30f) + { + trial_blk.m_weight_ise_range = weight_ise_range; + trial_blk.m_endpoint_ise_range = ise_endpoint_range; + + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE11_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE11_ENDPOINTS); + + uint32_t src_pixel_index[2] = { 0, 0 }; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t p = pixel_part_index[y][x]; + trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++]; + } + } + + astc_hdr_pack_results results; + results.clear(); + + results.m_best_block_error = total_trial_err; + results.m_best_submodes[0] = trial_submode11[0]; + results.m_best_submodes[1] = trial_submode11[1]; + results.m_best_pat_index = part_index; + + results.m_best_blk = trial_blk; + + bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block); + assert(status); + BASISU_NOTE_UNUSED(status); + + all_results.push_back(results); + } + + } // weight_ise_range + + } // part_index +} + +//-------------------------------------------------------------------------------------------------------------------------- + +bool g_astc_hdr_enc_initialized; + +void astc_hdr_enc_init() +{ + if (g_astc_hdr_enc_initialized) + return; + + astc_hdr_core_init(); + + astc_helpers::init_tables(true); + + init_qlog_tables(); + + encode_astc_hdr_init(); + + g_astc_hdr_enc_initialized = true; +} + +bool astc_hdr_enc_block( + const float* pRGBPixels, + const astc_hdr_codec_options& coptions, + basisu::vector<astc_hdr_pack_results>& all_results) +{ + assert(g_astc_hdr_enc_initialized); + if (!g_astc_hdr_enc_initialized) + { + // astc_hdr_enc_init() MUST be called first. + assert(0); + return false; + } + + all_results.resize(0); + + vec4F block_linear_colors[16]; + + // Sanity check the input block. + for (uint32_t i = 0; i < 16; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + float v = pRGBPixels[i * 3 + j]; + + if (std::isinf(v) || std::isnan(v)) + { + // Input pixels cannot be NaN or +-Inf. + assert(0); + return false; + } + + if (v < 0.0f) + { + // Input pixels cannot be signed. + assert(0); + return false; + } + + if (v > MAX_HALF_FLOAT) + { + // Too large for half float. + assert(0); + return false; + } + + block_linear_colors[i][j] = v; + } + + block_linear_colors[i][3] = 1.0f; + } + + assert(coptions.m_use_solid || coptions.m_use_mode11 || coptions.m_use_mode7_part2 || coptions.m_use_mode7_part1 || coptions.m_use_mode11_part2); + + bool is_solid = false; + if (coptions.m_use_solid) + is_solid = pack_solid(block_linear_colors, all_results, coptions); + + if (!is_solid) + { + if (coptions.m_use_mode11) + { + const size_t cur_num_results = all_results.size(); + + pack_mode11(block_linear_colors, all_results, coptions, coptions.m_first_mode11_weight_ise_range, coptions.m_last_mode11_weight_ise_range, false); + + if (coptions.m_last_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS) + { + pack_mode11(block_linear_colors, all_results, coptions, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_16_LEVELS, true); + } + + // If we couldn't get any mode 11 results at all, and we were restricted to just trying weight ISE range 8 (which required endpoint quantization) then + // fall back to weight ISE range 7 (which doesn't need any endpoint quantization). + // This is to guarantee we always get at least 1 non-solid result. + if (all_results.size() == cur_num_results) + { + if (coptions.m_first_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS) + { + pack_mode11(block_linear_colors, all_results, coptions, astc_helpers::BISE_12_LEVELS, astc_helpers::BISE_12_LEVELS, false); + } + } + } + + if (coptions.m_use_mode7_part1) + { + // Mode 7 1-subset never requires endpoint quantization, so it cannot fail to find at least one usable solution. + pack_mode7_single_part(block_linear_colors, all_results, coptions); + } + + bool have_est = false; + int best_parts[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2]; + + if ((coptions.m_use_mode7_part2) || (coptions.m_use_mode11_part2)) + { + if (coptions.m_use_estimated_partitions) + have_est = estimate_partition2(block_linear_colors, best_parts, coptions.m_max_estimated_partitions); + } + + if (coptions.m_use_mode7_part2) + { + const size_t cur_num_results = all_results.size(); + + pack_mode7_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts, + coptions.m_first_mode7_part2_weight_ise_range, coptions.m_last_mode7_part2_weight_ise_range); + + // If we couldn't find any packable 2-subset mode 7 results at weight levels >= 5 levels (which always requires endpoint quant), then try falling back to + // 5 levels which doesn't require endpoint quantization. + if (all_results.size() == cur_num_results) + { + if (coptions.m_first_mode7_part2_weight_ise_range >= astc_helpers::BISE_5_LEVELS) + { + pack_mode7_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts, + astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_4_LEVELS); + } + } + } + + if (coptions.m_use_mode11_part2) + { + // This always requires endpoint quant, so it could fail to find any usable solutions. + pack_mode11_2part(block_linear_colors, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts); + } + } + + if (coptions.m_refine_weights) + { + // TODO: Move this above, do it once only. + basist::half_float rgb_pixels_half[16 * 3]; + for (uint32_t i = 0; i < 16; i++) + { + rgb_pixels_half[i * 3 + 0] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 0]); + rgb_pixels_half[i * 3 + 1] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 1]); + rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(pRGBPixels[i * 3 + 2]); + } + + for (uint32_t i = 0; i < all_results.size(); i++) + { + bool status = astc_hdr_refine_weights(rgb_pixels_half, all_results[i], coptions, coptions.m_bc6h_err_weight, &all_results[i].m_improved_via_refinement_flag); + assert(status); + BASISU_NOTE_UNUSED(status); + } + } + + return true; +} + +bool astc_hdr_pack_results_to_block(astc_blk& dst_blk, const astc_hdr_pack_results& results) +{ + assert(g_astc_hdr_enc_initialized); + if (!g_astc_hdr_enc_initialized) + return false; + + if (results.m_is_solid) + { + memcpy(&dst_blk, &results.m_solid_blk, sizeof(results.m_solid_blk)); + } + else + { + bool status = astc_helpers::pack_astc_block((astc_helpers::astc_block&)dst_blk, results.m_best_blk); + if (!status) + { + assert(0); + return false; + } + } + + return true; +} + +// Refines a block's chosen weight indices, balancing BC6H and ASTC HDR error. +bool astc_hdr_refine_weights(const half_float *pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool *pImproved_flag) +{ + if (pImproved_flag) + *pImproved_flag = false; + + if (cur_results.m_is_solid) + return true; + + const uint32_t total_weights = astc_helpers::get_ise_levels(cur_results.m_best_blk.m_weight_ise_range); + + assert((total_weights >= 3) && (total_weights <= 16)); + + double best_err[4][4]; + uint8_t best_weight[4][4]; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + best_err[y][x] = 1e+30f; + best_weight[y][x] = 0; + } + } + + astc_hdr_pack_results temp_results; + + const float c_weights[3] = { coptions.m_r_err_scale, coptions.m_g_err_scale, 1.0f }; + + for (uint32_t weight_index = 0; weight_index < total_weights; weight_index++) + { + temp_results = cur_results; + for (uint32_t i = 0; i < 16; i++) + temp_results.m_best_blk.m_weights[i] = (uint8_t)weight_index; + + half_float unpacked_astc_blk_rgba[4][4][4]; + bool res = astc_helpers::decode_block(temp_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16); + assert(res); + + basist::bc6h_block trial_bc6h_blk; + res = basist::astc_hdr_transcode_to_bc6h(temp_results.m_best_blk, trial_bc6h_blk); + assert(res); + + half_float unpacked_bc6h_blk[4][4][3]; + res = unpack_bc6h(&trial_bc6h_blk, unpacked_bc6h_blk, false); + assert(res); + BASISU_NOTE_UNUSED(res); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + double total_err = 0.0f; + + for (uint32_t c = 0; c < 3; c++) + { + const half_float orig_c = pSource_block[(x + y * 4) * 3 + c]; + const double orig_c_q = q(orig_c); + + const half_float astc_c = unpacked_astc_blk_rgba[y][x][c]; + const double astc_c_q = q(astc_c); + const double astc_e = square(astc_c_q - orig_c_q) * c_weights[c]; + + const half_float bc6h_c = unpacked_bc6h_blk[y][x][c]; + const double bc6h_c_q = q(bc6h_c); + const double bc6h_e = square(bc6h_c_q - orig_c_q) * c_weights[c]; + + const double overall_err = astc_e * (1.0f - bc6h_weight) + bc6h_e * bc6h_weight; + + total_err += overall_err; + + } // c + + if (total_err < best_err[y][x]) + { + best_err[y][x] = total_err; + best_weight[y][x] = (uint8_t)weight_index; + } + + } // x + } // y + + } // weight_index + + bool any_changed = false; + for (uint32_t i = 0; i < 16; i++) + { + if (cur_results.m_best_blk.m_weights[i] != best_weight[i >> 2][i & 3]) + { + any_changed = true; + break; + } + } + + if (any_changed) + { + memcpy(cur_results.m_best_blk.m_weights, best_weight, 16); + + { + bool res = basist::astc_hdr_transcode_to_bc6h(cur_results.m_best_blk, cur_results.m_bc6h_block); + assert(res); + BASISU_NOTE_UNUSED(res); + + half_float unpacked_astc_blk_rgba[4][4][4]; + res = astc_helpers::decode_block(cur_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16); + assert(res); + + half_float unpacked_astc_blk_rgb[4][4][3]; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + for (uint32_t c = 0; c < 3; c++) + unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c]; + + cur_results.m_best_block_error = compute_block_error(pSource_block, &unpacked_astc_blk_rgb[0][0][0], coptions); + } + + if (pImproved_flag) + *pImproved_flag = true; + } + + return true; +} + +void astc_hdr_block_stats::update(const astc_hdr_pack_results& log_blk) +{ + std::lock_guard<std::mutex> lck(m_mutex); + + m_total_blocks++; + + if (log_blk.m_improved_via_refinement_flag) + m_total_refined++; + + if (log_blk.m_is_solid) + { + m_total_solid++; + } + else + { + int best_weight_range = log_blk.m_best_blk.m_weight_ise_range; + + if (log_blk.m_best_blk.m_color_endpoint_modes[0] == 7) + { + m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 6U)]++; + + if (log_blk.m_best_blk.m_num_partitions == 2) + { + m_total_mode7_2part++; + + m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 6U)]++; + m_total_2part++; + + m_weight_range_hist_7_2part[bounds_check(best_weight_range, 0, 11)]++; + + m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++; + } + else + { + m_total_mode7_1part++; + + m_weight_range_hist_7[bounds_check(best_weight_range, 0, 11)]++; + } + } + else + { + m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 9U)]++; + if (log_blk.m_constrained_weights) + m_total_mode11_1part_constrained_weights++; + + if (log_blk.m_best_blk.m_num_partitions == 2) + { + m_total_mode11_2part++; + + m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 9U)]++; + m_total_2part++; + + m_weight_range_hist_11_2part[bounds_check(best_weight_range, 0, 11)]++; + + m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++; + } + else + { + m_total_mode11_1part++; + + m_weight_range_hist_11[bounds_check(best_weight_range, 0, 11)]++; + } + } + } +} + +void astc_hdr_block_stats::print() +{ + std::lock_guard<std::mutex> lck(m_mutex); + + assert(m_total_blocks); + if (!m_total_blocks) + return; + + printf("\nLow-level ASTC Encoder Statistics:\n"); + printf("Total blocks: %u\n", m_total_blocks); + printf("Total solid: %u %3.2f%%\n", m_total_solid, (m_total_solid * 100.0f) / m_total_blocks); + printf("Total refined: %u %3.2f%%\n", m_total_refined, (m_total_refined * 100.0f) / m_total_blocks); + + printf("Total mode 11, 1 partition: %u %3.2f%%\n", m_total_mode11_1part, (m_total_mode11_1part * 100.0f) / m_total_blocks); + printf("Total mode 11, 1 partition, constrained weights: %u %3.2f%%\n", m_total_mode11_1part_constrained_weights, (m_total_mode11_1part_constrained_weights * 100.0f) / m_total_blocks); + printf("Total mode 11, 2 partition: %u %3.2f%%\n", m_total_mode11_2part, (m_total_mode11_2part * 100.0f) / m_total_blocks); + + printf("Total mode 7, 1 partition: %u %3.2f%%\n", m_total_mode7_1part, (m_total_mode7_1part * 100.0f) / m_total_blocks); + printf("Total mode 7, 2 partition: %u %3.2f%%\n", m_total_mode7_2part, (m_total_mode7_2part * 100.0f) / m_total_blocks); + + printf("Total 2 partitions: %u %3.2f%%\n", m_total_2part, (m_total_2part * 100.0f) / m_total_blocks); + printf("\n"); + + printf("ISE texel weight range histogram mode 11:\n"); + for (uint32_t i = 1; i <= MODE11_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_11[i]); + printf("\n"); + + printf("ISE texel weight range histogram mode 11, 2 partition:\n"); + for (uint32_t i = 1; i <= MODE11_PART2_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_11_2part[i]); + printf("\n"); + + printf("ISE texel weight range histogram mode 7:\n"); + for (uint32_t i = 1; i <= MODE7_PART1_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_7[i]); + printf("\n"); + + printf("ISE texel weight range histogram mode 7, 2 partition:\n"); + for (uint32_t i = 1; i <= MODE7_PART2_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_7_2part[i]); + printf("\n"); + + printf("Mode 11 submode histogram:\n"); + for (uint32_t i = 0; i <= MODE11_TOTAL_SUBMODES; i++) // +1 because of the extra direct encoding + printf("%u %u\n", i, m_mode11_submode_hist[i]); + printf("\n"); + + printf("Mode 7 submode histogram:\n"); + for (uint32_t i = 0; i < MODE7_TOTAL_SUBMODES; i++) + printf("%u %u\n", i, m_mode7_submode_hist[i]); + printf("\n"); + + printf("Partition pattern table usage histogram:\n"); + for (uint32_t i = 0; i < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2; i++) + printf("%u:%u ", i, m_part_hist[i]); + printf("\n\n"); +} + +} // namespace basisu + diff --git a/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h new file mode 100644 index 0000000000..ee122ff7ce --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.h @@ -0,0 +1,224 @@ +// basisu_astc_hdr_enc.h +#pragma once +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "../transcoder/basisu_astc_hdr_core.h" + +namespace basisu +{ + // This MUST be called before encoding any blocks. + void astc_hdr_enc_init(); + + const uint32_t MODE11_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS; + const uint32_t MODE7_PART1_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART1_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS; + const uint32_t MODE7_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART2_LAST_ISE_RANGE = astc_helpers::BISE_8_LEVELS; + const uint32_t MODE11_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_PART2_LAST_ISE_RANGE = astc_helpers::BISE_4_LEVELS; + const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec) + const uint32_t MODE7_TOTAL_SUBMODES = 6; + + struct astc_hdr_codec_options + { + float m_bc6h_err_weight; + + bool m_use_solid; + + bool m_use_mode11; + bool m_mode11_uber_mode; + uint32_t m_first_mode11_weight_ise_range; + uint32_t m_last_mode11_weight_ise_range; + bool m_mode11_direct_only; + int32_t m_first_mode11_submode; + int32_t m_last_mode11_submode; + + bool m_use_mode7_part1; + uint32_t m_first_mode7_part1_weight_ise_range; + uint32_t m_last_mode7_part1_weight_ise_range; + + bool m_use_mode7_part2; + uint32_t m_mode7_part2_part_masks; + uint32_t m_first_mode7_part2_weight_ise_range; + uint32_t m_last_mode7_part2_weight_ise_range; + + bool m_use_mode11_part2; + uint32_t m_mode11_part2_part_masks; + uint32_t m_first_mode11_part2_weight_ise_range; + uint32_t m_last_mode11_part2_weight_ise_range; + + float m_r_err_scale, m_g_err_scale; + + bool m_refine_weights; + + uint32_t m_level; + + bool m_use_estimated_partitions; + uint32_t m_max_estimated_partitions; + + // If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however. + bool m_allow_uber_mode; + + astc_hdr_codec_options(); + + void init(); + + // TODO: set_quality_level() is preferred to configure the codec for transcoding purposes. + static const int cMinLevel = 0; + static const int cMaxLevel = 4; + static const int cDefaultLevel = 1; + void set_quality_level(int level); + + private: + void set_quality_best(); + void set_quality_normal(); + void set_quality_fastest(); + }; + + struct astc_hdr_pack_results + { + double m_best_block_error; + double m_bc6h_block_error; // note this is not used/set by the encoder, here for convienance + + // Encoder results (logical ASTC block) + astc_helpers::log_astc_block m_best_blk; + + // For statistical use + uint32_t m_best_submodes[2]; + uint32_t m_best_pat_index; + bool m_constrained_weights; + + bool m_improved_via_refinement_flag; + + // Only valid if the block is solid + basist::astc_blk m_solid_blk; + + // The BC6H transcoded block + basist::bc6h_block m_bc6h_block; + + // Solid color/void extent flag + bool m_is_solid; + + void clear() + { + m_best_block_error = 1e+30f; + m_bc6h_block_error = 1e+30f; + + m_best_blk.clear(); + m_best_blk.m_grid_width = 4; + m_best_blk.m_grid_height = 4; + m_best_blk.m_endpoint_ise_range = 20; // 0-255 + + clear_obj(m_best_submodes); + + m_best_pat_index = 0; + m_constrained_weights = false; + + clear_obj(m_bc6h_block); + + m_is_solid = false; + m_improved_via_refinement_flag = false; + } + }; + + void interpolate_qlog12_colors( + const int e[2][3], + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range); + + bool get_astc_hdr_mode_11_block_colors( + const uint8_t* pEndpoints, + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range); + + bool get_astc_hdr_mode_7_block_colors( + const uint8_t* pEndpoints, + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range); + + double eval_selectors( + uint32_t num_pixels, + uint8_t* pWeights, + const basist::half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const basist::half_float* pDecoded_half, + const astc_hdr_codec_options& coptions, + uint32_t usable_selector_bitmask = UINT32_MAX); + + double compute_block_error(const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_options& coptions); + + // Encodes a 4x4 ASTC HDR block given a 4x4 array of source block pixels/texels. + // Supports solid color blocks, mode 11 (all submodes), mode 7/1 partition (all submodes), + // and mode 7/2 partitions (all submodes) - 30 patterns, only the ones also in common with the BC6H format. + // The packed ASTC weight grid dimensions are currently always 4x4 texels, but may be also 3x3 in the future. + // This function is thread safe, i.e. it may be called from multiple encoding threads simultanously with different blocks. + // + // Parameters: + // pRGBPixels - An array of 48 (16 RGB) floats: the 4x4 block to pack + // pPacked_block - A pointer to the packed ASTC HDR block + // coptions - Codec options + // pInternal_results - An optional pointer to details about how the block was packed, for statistics/debugging purposes. May be nullptr. + // + // Requirements: + // astc_hdr_enc_init() MUST have been called first to initialized the codec. + // Input pixels are checked and cannot be NaN's, Inf's, signed, or too large (greater than MAX_HALF_FLOAT, or 65504). + // Normal values and denormals are okay. + bool astc_hdr_enc_block( + const float* pRGBPixels, + const astc_hdr_codec_options& coptions, + basisu::vector<astc_hdr_pack_results> &all_results); + + bool astc_hdr_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_pack_results& results); + + bool astc_hdr_refine_weights(const basist::half_float* pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool* pImproved_flag); + + struct astc_hdr_block_stats + { + std::mutex m_mutex; + + uint32_t m_total_blocks; + uint32_t m_total_2part, m_total_solid; + uint32_t m_total_mode7_1part, m_total_mode7_2part; + uint32_t m_total_mode11_1part, m_total_mode11_2part; + uint32_t m_total_mode11_1part_constrained_weights; + + uint32_t m_weight_range_hist_7[11]; + uint32_t m_weight_range_hist_7_2part[11]; + uint32_t m_mode7_submode_hist[6]; + + uint32_t m_weight_range_hist_11[11]; + uint32_t m_weight_range_hist_11_2part[11]; + uint32_t m_mode11_submode_hist[9]; + + uint32_t m_part_hist[32]; + + uint32_t m_total_refined; + + astc_hdr_block_stats() { clear(); } + + void clear() + { + std::lock_guard<std::mutex> lck(m_mutex); + + m_total_blocks = 0; + m_total_mode7_1part = 0, m_total_mode7_2part = 0, m_total_mode11_1part = 0, m_total_2part = 0, m_total_solid = 0, m_total_mode11_2part = 0; + m_total_mode11_1part_constrained_weights = 0; + m_total_refined = 0; + + clear_obj(m_weight_range_hist_11); + clear_obj(m_weight_range_hist_11_2part); + clear_obj(m_weight_range_hist_7); + clear_obj(m_weight_range_hist_7_2part); + clear_obj(m_mode7_submode_hist); + clear_obj(m_mode11_submode_hist); + clear_obj(m_part_hist); + } + + void update(const astc_hdr_pack_results& log_blk); + + void print(); + }; + +} // namespace basisu + diff --git a/thirdparty/basis_universal/encoder/basisu_backend.cpp b/thirdparty/basis_universal/encoder/basisu_backend.cpp index abb61750a6..3fa3d8892f 100644 --- a/thirdparty/basis_universal/encoder/basisu_backend.cpp +++ b/thirdparty/basis_universal/encoder/basisu_backend.cpp @@ -1,5 +1,5 @@ // basisu_backend.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_backend.h b/thirdparty/basis_universal/encoder/basisu_backend.h index 07778aeb9b..58a9a8aa0e 100644 --- a/thirdparty/basis_universal/encoder/basisu_backend.h +++ b/thirdparty/basis_universal/encoder/basisu_backend.h @@ -1,5 +1,5 @@ // basisu_backend.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_basis_file.cpp b/thirdparty/basis_universal/encoder/basisu_basis_file.cpp index f4c77bef23..77f467f670 100644 --- a/thirdparty/basis_universal/encoder/basisu_basis_file.cpp +++ b/thirdparty/basis_universal/encoder/basisu_basis_file.cpp @@ -1,5 +1,5 @@ // basisu_basis_file.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_basis_file.h b/thirdparty/basis_universal/encoder/basisu_basis_file.h index 98498a0121..57448bccb1 100644 --- a/thirdparty/basis_universal/encoder/basisu_basis_file.h +++ b/thirdparty/basis_universal/encoder/basisu_basis_file.h @@ -1,5 +1,5 @@ // basisu_basis_file.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp index 22fdfa603f..914e7fbbb9 100644 --- a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp +++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp @@ -1,5 +1,5 @@ // File: basisu_bc7enc.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -394,6 +394,7 @@ void bc7enc_compress_block_init() static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors) { // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; @@ -1301,6 +1302,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color for (uint32_t c = 0; c < 4; c++) colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); +#ifdef _DEBUG uint64_t total_err = 0; for (uint32_t p = 0; p < pParams->m_num_pixels; p++) { @@ -1313,6 +1315,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights); } assert(total_err == pResults->m_best_overall_err); +#endif // HACK HACK //if (total_err != pResults->m_best_overall_err) diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.h b/thirdparty/basis_universal/encoder/basisu_bc7enc.h index 8d8b7888ca..925d6b2e8d 100644 --- a/thirdparty/basis_universal/encoder/basisu_bc7enc.h +++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.h @@ -1,5 +1,5 @@ // File: basisu_bc7enc.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_comp.cpp b/thirdparty/basis_universal/encoder/basisu_comp.cpp index 4e69e9e2ee..81813257cd 100644 --- a/thirdparty/basis_universal/encoder/basisu_comp.cpp +++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp @@ -1,5 +1,5 @@ // basisu_comp.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,9 @@ #include "basisu_enc.h" #include <unordered_set> #include <atomic> +#include <map> + +//#define UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS // basisu_transcoder.cpp is where basisu_miniz lives now, we just need the declarations here. #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES @@ -23,6 +26,8 @@ #include "basisu_opencl.h" +#include "../transcoder/basisu_astc_hdr_core.h" + #if !BASISD_SUPPORT_KTX2 #error BASISD_SUPPORT_KTX2 must be enabled (set to 1). #endif @@ -34,7 +39,7 @@ // Set to 1 to disable the mipPadding alignment workaround (which only seems to be needed when no key-values are written at all) #define BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND (0) -// Set to 1 to disable writing all KTX2 key values, triggering the validator bug. +// Set to 1 to disable writing all KTX2 key values, triggering an early validator bug. #define BASISU_DISABLE_KTX2_KEY_VALUES (0) using namespace buminiz; @@ -46,27 +51,143 @@ using namespace buminiz; namespace basisu { - basis_compressor::basis_compressor() : - m_pOpenCL_context(nullptr), + basis_compressor::basis_compressor() : + m_pOpenCL_context(nullptr), m_basis_file_size(0), m_basis_bits_per_texel(0.0f), m_total_blocks(0), m_any_source_image_has_alpha(false), - m_opencl_failed(false) + m_opencl_failed(false) { debug_printf("basis_compressor::basis_compressor\n"); assert(g_library_initialized); } - basis_compressor::~basis_compressor() - { - if (m_pOpenCL_context) - { - opencl_destroy_context(m_pOpenCL_context); - m_pOpenCL_context = nullptr; - } - } + basis_compressor::~basis_compressor() + { + if (m_pOpenCL_context) + { + opencl_destroy_context(m_pOpenCL_context); + m_pOpenCL_context = nullptr; + } + } + + void basis_compressor::check_for_hdr_inputs() + { + if ((!m_params.m_source_filenames.size()) && (!m_params.m_source_images.size())) + { + if (m_params.m_source_images_hdr.size()) + { + // Assume they want UASTC HDR if they've specified any HDR source images. + m_params.m_hdr = true; + } + } + + if (!m_params.m_hdr) + { + // See if any files are .EXR or .HDR, if so switch the compressor to UASTC HDR mode. + for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++) + { + std::string filename; + string_get_filename(m_params.m_source_filenames[i].c_str(), filename); + + std::string ext(string_get_extension(filename)); + string_tolower(ext); + + if ((ext == "exr") || (ext == "hdr")) + { + m_params.m_hdr = true; + break; + } + } + } + + if (m_params.m_hdr) + { + if (m_params.m_source_alpha_filenames.size()) + { + debug_printf("Warning: Alpha channel image filenames are not supported in UASTC HDR mode.\n"); + m_params.m_source_alpha_filenames.clear(); + } + } + + if (m_params.m_hdr) + m_params.m_uastc = true; + } + + bool basis_compressor::sanity_check_input_params() + { + // Check for no source filenames specified. + if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) + { + assert(0); + return false; + } + + // See if they've specified any source filenames, but didn't tell us to read them. + if ((!m_params.m_read_source_images) && (m_params.m_source_filenames.size())) + { + assert(0); + return false; + } + + // Sanity check the input image parameters. + if (m_params.m_read_source_images) + { + // Caller can't specify their own images if they want us to read source images from files. + if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size()) + { + assert(0); + return false; + } + + if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size()) + { + assert(0); + return false; + } + } + else + { + // They didn't tell us to read any source files, so check for no LDR/HDR source images. + if (!m_params.m_source_images.size() && !m_params.m_source_images_hdr.size()) + { + assert(0); + return false; + } + + // Now we know we've been supplied LDR and/or HDR source images, check for LDR vs. HDR conflicts. + + if (m_params.m_source_images.size()) + { + // They've supplied LDR images, so make sure they also haven't specified HDR input images. + if (m_params.m_source_images_hdr.size() || m_params.m_source_mipmap_images_hdr.size()) + { + assert(0); + return false; + } + } + else + { + // No LDR images, so make sure they haven't specified any LDR mipmaps. + if (m_params.m_source_mipmap_images.size()) + { + assert(0); + return false; + } + + // No LDR images, so ensure they've supplied some HDR images to process. + if (!m_params.m_source_images_hdr.size()) + { + assert(0); + return false; + } + } + } + + return true; + } bool basis_compressor::init(const basis_compressor_params ¶ms) { @@ -85,7 +206,12 @@ namespace basisu } m_params = params; - + + if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data)) + m_params.m_validate_output_data = true; + + check_for_hdr_inputs(); + if (m_params.m_debug) { debug_printf("basis_compressor::init:\n"); @@ -95,8 +221,10 @@ namespace basisu #define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast<uint32_t>(m_params.v), m_params.v.was_changed()); #define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast<float>(m_params.v), m_params.v.was_changed()); - debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i, Source mipmap images: %u\n", - m_params.m_source_images.size(), m_params.m_source_filenames.size(), m_params.m_source_alpha_filenames.size(), m_params.m_source_mipmap_images.size()); + debug_printf("Source LDR images: %u, HDR images: %u, filenames: %u, alpha filenames: %i, LDR mipmap images: %u, HDR mipmap images: %u\n", + m_params.m_source_images.size(), m_params.m_source_images_hdr.size(), + m_params.m_source_filenames.size(), m_params.m_source_alpha_filenames.size(), + m_params.m_source_mipmap_images.size(), m_params.m_source_mipmap_images_hdr.size()); if (m_params.m_source_mipmap_images.size()) { @@ -106,6 +234,15 @@ namespace basisu debug_printf("\n"); } + if (m_params.m_source_mipmap_images_hdr.size()) + { + debug_printf("m_source_mipmap_images_hdr array sizes:\n"); + for (uint32_t i = 0; i < m_params.m_source_mipmap_images_hdr.size(); i++) + debug_printf("%u ", m_params.m_source_mipmap_images_hdr[i].size()); + debug_printf("\n"); + } + + PRINT_BOOL_VALUE(m_hdr); PRINT_BOOL_VALUE(m_uastc); PRINT_BOOL_VALUE(m_use_opencl); PRINT_BOOL_VALUE(m_y_flip); @@ -117,7 +254,7 @@ namespace basisu PRINT_BOOL_VALUE(m_no_endpoint_rdo); PRINT_BOOL_VALUE(m_no_selector_rdo); PRINT_BOOL_VALUE(m_read_source_images); - PRINT_BOOL_VALUE(m_write_output_basis_files); + PRINT_BOOL_VALUE(m_write_output_basis_or_ktx2_files); PRINT_BOOL_VALUE(m_compute_stats); PRINT_BOOL_VALUE(m_check_for_alpha); PRINT_BOOL_VALUE(m_force_alpha); @@ -146,6 +283,7 @@ namespace basisu debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters); debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters); debug_printf("m_quality_level: %i\n", m_params.m_quality_level); + debug_printf("UASTC HDR quality level: %u\n", m_params.m_uastc_hdr_options.m_level); debug_printf("m_tex_type: %u\n", m_params.m_tex_type); debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); @@ -185,6 +323,9 @@ namespace basisu } PRINT_BOOL_VALUE(m_validate_output_data); + PRINT_BOOL_VALUE(m_hdr_ldr_srgb_to_linear_conversion); + debug_printf("Allow UASTC HDR uber mode: %u\n", m_params.m_uastc_hdr_options.m_allow_uber_mode); + PRINT_BOOL_VALUE(m_hdr_favor_astc); #undef PRINT_BOOL_VALUE #undef PRINT_INT_VALUE @@ -192,19 +333,9 @@ namespace basisu #undef PRINT_FLOAT_VALUE } - if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) - { - assert(0); + if (!sanity_check_input_params()) return false; - } - - if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data)) - { - m_params.m_validate_output_data = true; - - debug_printf("Note: m_compute_stats is true, so forcing m_validate_output_data to true as well\n"); - } - + if ((m_params.m_use_opencl) && opencl_is_available() && !m_pOpenCL_context && !m_opencl_failed) { m_pOpenCL_context = opencl_create_context(); @@ -219,6 +350,9 @@ namespace basisu { debug_printf("basis_compressor::process\n"); + if (!read_dds_source_images()) + return cECFailedReadingSourceImages; + if (!read_source_images()) return cECFailedReadingSourceImages; @@ -228,20 +362,38 @@ namespace basisu if (m_params.m_create_ktx2_file) { if (!validate_ktx2_constraints()) + { + error_printf("Inputs do not satisfy .KTX2 texture constraints: all source images must be the same resolution and have the same number of mipmap levels.\n"); return cECFailedValidating; + } } if (!extract_source_blocks()) return cECFailedFrontEnd; - if (m_params.m_uastc) + if (m_params.m_hdr) + { + // UASTC HDR + printf("Mode: UASTC HDR Level %u\n", m_params.m_uastc_hdr_options.m_level); + + error_code ec = encode_slices_to_uastc_hdr(); + if (ec != cECSuccess) + return ec; + } + else if (m_params.m_uastc) { + // UASTC + printf("Mode: UASTC LDR Level %u\n", m_params.m_pack_uastc_flags & cPackUASTCLevelMask); + error_code ec = encode_slices_to_uastc(); if (ec != cECSuccess) return ec; } else { + // ETC1S + printf("Mode: ETC1S Quality %i, Level %i\n", m_params.m_quality_level, (int)m_params.m_compression_level); + if (!process_frontend()) return cECFailedFrontEnd; @@ -254,7 +406,7 @@ namespace basisu if (!create_basis_file_and_transcode()) return cECFailedCreateBasisFile; - + if (m_params.m_create_ktx2_file) { if (!create_ktx2_file()) @@ -267,6 +419,309 @@ namespace basisu return cECSuccess; } + basis_compressor::error_code basis_compressor::encode_slices_to_uastc_hdr() + { + debug_printf("basis_compressor::encode_slices_to_uastc_hdr\n"); + + interval_timer tm; + tm.start(); + + m_uastc_slice_textures.resize(m_slice_descs.size()); + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + m_uastc_slice_textures[slice_index].init(texture_format::cUASTC_HDR_4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); + + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC_HDR_4x4; + m_uastc_backend_output.m_etc1s = false; + m_uastc_backend_output.m_slice_desc = m_slice_descs; + m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); + m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); + + if (!m_params.m_perceptual) + { + m_params.m_uastc_hdr_options.m_r_err_scale = 1.0f; + m_params.m_uastc_hdr_options.m_g_err_scale = 1.0f; + } + + const float DEFAULT_BC6H_ERROR_WEIGHT = .85f; + const float LOWEST_BC6H_ERROR_WEIGHT = .1f; + m_params.m_uastc_hdr_options.m_bc6h_err_weight = m_params.m_hdr_favor_astc ? LOWEST_BC6H_ERROR_WEIGHT : DEFAULT_BC6H_ERROR_WEIGHT; + + std::atomic<bool> any_failures; + any_failures = false; + + astc_hdr_block_stats enc_stats; + + struct uastc_blk_desc + { + uint32_t m_solid_flag; + uint32_t m_num_partitions; + uint32_t m_cem_index; + uint32_t m_weight_ise_range; + uint32_t m_endpoint_ise_range; + + bool operator< (const uastc_blk_desc& desc) const + { + if (this == &desc) + return false; + +#define COMP(XX) if (XX < desc.XX) return true; else if (XX != desc.XX) return false; + COMP(m_solid_flag) + COMP(m_num_partitions) + COMP(m_cem_index) + COMP(m_weight_ise_range) + COMP(m_endpoint_ise_range) +#undef COMP + + return false; + } + + bool operator== (const uastc_blk_desc& desc) const + { + if (this == &desc) + return true; + if ((*this < desc) || (desc < *this)) + return false; + return true; + } + + bool operator!= (const uastc_blk_desc& desc) const + { + return !(*this == desc); + } + }; + + struct uastc_blk_desc_stats + { + uastc_blk_desc_stats() : m_count(0) { } + uint32_t m_count; +#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS + basisu::vector<basist::astc_blk> m_blks; +#endif + }; + + std::map<uastc_blk_desc, uastc_blk_desc_stats> unique_block_descs; + std::mutex unique_block_desc_mutex; + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + gpu_image& tex = m_uastc_slice_textures[slice_index]; + basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + (void)slice_desc; + + const uint32_t num_blocks_x = tex.get_blocks_x(); + const uint32_t num_blocks_y = tex.get_blocks_y(); + const uint32_t total_blocks = tex.get_total_blocks(); + const imagef& source_image = m_slice_images_hdr[slice_index]; + + std::atomic<uint32_t> total_blocks_processed; + total_blocks_processed = 0; + + const uint32_t N = 256; + for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum<uint32_t>(total_blocks, block_index_iter + N); + + // FIXME: This sucks, but we're having a stack size related problem with std::function with emscripten. +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, + &tex, &total_blocks_processed, &any_failures, &enc_stats, &unique_block_descs, &unique_block_desc_mutex] + { +#endif + BASISU_NOTE_UNUSED(num_blocks_y); + + basisu::vector<astc_hdr_pack_results> all_results; + all_results.reserve(256); + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_x = block_index % num_blocks_x; + const uint32_t block_y = block_index / num_blocks_x; + + vec4F block_pixels[16]; + + source_image.extract_block_clamped(&block_pixels[0], block_x * 4, block_y * 4, 4, 4); + + basist::astc_blk& dest_block = *(basist::astc_blk*)tex.get_block_ptr(block_x, block_y); + + float rgb_pixels[16 * 3]; + basist::half_float rgb_pixels_half[16 * 3]; + for (uint32_t i = 0; i < 16; i++) + { + rgb_pixels[i * 3 + 0] = block_pixels[i][0]; + rgb_pixels_half[i * 3 + 0] = float_to_half_non_neg_no_nan_inf(block_pixels[i][0]); + + rgb_pixels[i * 3 + 1] = block_pixels[i][1]; + rgb_pixels_half[i * 3 + 1] = float_to_half_non_neg_no_nan_inf(block_pixels[i][1]); + + rgb_pixels[i * 3 + 2] = block_pixels[i][2]; + rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(block_pixels[i][2]); + } + + bool status = astc_hdr_enc_block(&rgb_pixels[0], m_params.m_uastc_hdr_options, all_results); + if (!status) + { + any_failures = true; + continue; + } + + double best_err = 1e+30f; + int best_result_index = -1; + + const double bc6h_err_weight = m_params.m_uastc_hdr_options.m_bc6h_err_weight; + const double astc_err_weight = (1.0f - bc6h_err_weight); + + for (uint32_t i = 0; i < all_results.size(); i++) + { + basist::half_float unpacked_bc6h_block[4 * 4 * 3]; + unpack_bc6h(&all_results[i].m_bc6h_block, unpacked_bc6h_block, false); + + all_results[i].m_bc6h_block_error = compute_block_error(rgb_pixels_half, unpacked_bc6h_block, m_params.m_uastc_hdr_options); + + double overall_err = (all_results[i].m_bc6h_block_error * bc6h_err_weight) + (all_results[i].m_best_block_error * astc_err_weight); + + if ((!i) || (overall_err < best_err)) + { + best_err = overall_err; + best_result_index = i; + } + } + + const astc_hdr_pack_results& best_results = all_results[best_result_index]; + + astc_hdr_pack_results_to_block(dest_block, best_results); + + // Verify that this block is valid UASTC HDR and we can successfully transcode it to BC6H. + // (Well, except in fastest mode.) + if (m_params.m_uastc_hdr_options.m_level > 0) + { + basist::bc6h_block transcoded_bc6h_blk; + bool transcode_results = astc_hdr_transcode_to_bc6h(dest_block, transcoded_bc6h_blk); + assert(transcode_results); + if ((!transcode_results) && (!any_failures)) + { + error_printf("basis_compressor::encode_slices_to_uastc_hdr: UASTC HDR block transcode check failed!\n"); + + any_failures = true; + continue; + } + } + + if (m_params.m_debug) + { + // enc_stats has its own mutex + enc_stats.update(best_results); + + uastc_blk_desc blk_desc; + clear_obj(blk_desc); + + blk_desc.m_solid_flag = best_results.m_is_solid; + if (!blk_desc.m_solid_flag) + { + blk_desc.m_num_partitions = best_results.m_best_blk.m_num_partitions; + blk_desc.m_cem_index = best_results.m_best_blk.m_color_endpoint_modes[0]; + blk_desc.m_weight_ise_range = best_results.m_best_blk.m_weight_ise_range; + blk_desc.m_endpoint_ise_range = best_results.m_best_blk.m_endpoint_ise_range; + } + + { + std::lock_guard<std::mutex> lck(unique_block_desc_mutex); + + auto res = unique_block_descs.insert(std::make_pair(blk_desc, uastc_blk_desc_stats())); + + (res.first)->second.m_count++; +#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS + (res.first)->second.m_blks.push_back(dest_block); +#endif + } + } + + total_blocks_processed++; + + uint32_t val = total_blocks_processed; + if (((val & 1023) == 1023) && m_params.m_status_output) + { + debug_printf("basis_compressor::encode_slices_to_uastc_hdr: %3.1f%% done\n", static_cast<float>(val) * 100.0f / total_blocks); + } + } + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } // block_index_iter + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + if (any_failures) + return cECFailedEncodeUASTC; + + m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); + memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); + + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); + + } // slice_index + + debug_printf("basis_compressor::encode_slices_to_uastc_hdr: Total time: %3.3f secs\n", tm.get_elapsed_secs()); + + if (m_params.m_debug) + { + debug_printf("\n----- Total unique UASTC block descs: %u\n", (uint32_t)unique_block_descs.size()); + + uint32_t c = 0; + for (auto it = unique_block_descs.begin(); it != unique_block_descs.end(); ++it) + { + debug_printf("%u. Total uses: %u %3.2f%%, solid color: %u\n", c, it->second.m_count, + ((float)it->second.m_count * 100.0f) / enc_stats.m_total_blocks, it->first.m_solid_flag); + + if (!it->first.m_solid_flag) + { + debug_printf(" Num partitions: %u\n", it->first.m_num_partitions); + debug_printf(" CEM index: %u\n", it->first.m_cem_index); + debug_printf(" Weight ISE range: %u (%u levels)\n", it->first.m_weight_ise_range, astc_helpers::get_ise_levels(it->first.m_weight_ise_range)); + debug_printf(" Endpoint ISE range: %u (%u levels)\n", it->first.m_endpoint_ise_range, astc_helpers::get_ise_levels(it->first.m_endpoint_ise_range)); + } + +#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS + debug_printf(" -- UASTC HDR block bytes:\n"); + for (uint32_t j = 0; j < minimum<uint32_t>(4, it->second.m_blks.size()); j++) + { + basist::astc_blk& blk = it->second.m_blks[j]; + + debug_printf(" - UASTC HDR: { "); + for (uint32_t k = 0; k < 16; k++) + debug_printf("%u%s", ((const uint8_t*)&blk)[k], (k != 15) ? ", " : ""); + debug_printf(" }\n"); + + basist::bc6h_block bc6h_blk; + bool res = astc_hdr_transcode_to_bc6h(blk, bc6h_blk); + assert(res); + if (!res) + { + error_printf("astc_hdr_transcode_to_bc6h() failed!\n"); + return cECFailedEncodeUASTC; + } + + debug_printf(" - BC6H: { "); + for (uint32_t k = 0; k < 16; k++) + debug_printf("%u%s", ((const uint8_t*)&bc6h_blk)[k], (k != 15) ? ", " : ""); + debug_printf(" }\n"); + } +#endif + + c++; + } + printf("\n"); + + enc_stats.print(); + } + + return cECSuccess; + } + basis_compressor::error_code basis_compressor::encode_slices_to_uastc() { debug_printf("basis_compressor::encode_slices_to_uastc\n"); @@ -328,7 +783,7 @@ namespace basisu total_blocks_processed++; uint32_t val = total_blocks_processed; - if ((val & 16383) == 16383) + if (((val & 16383) == 16383) && m_params.m_status_output) { debug_printf("basis_compressor::encode_slices_to_uastc: %3.1f%% done\n", static_cast<float>(val) * 100.0f / total_blocks); } @@ -374,6 +829,57 @@ namespace basisu return cECSuccess; } + bool basis_compressor::generate_mipmaps(const imagef& img, basisu::vector<imagef>& mips, bool has_alpha) + { + debug_printf("basis_compressor::generate_mipmaps\n"); + + interval_timer tm; + tm.start(); + + uint32_t total_levels = 1; + uint32_t w = img.get_width(), h = img.get_height(); + while (maximum<uint32_t>(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) + { + w = maximum(w >> 1U, 1U); + h = maximum(h >> 1U, 1U); + total_levels++; + } + + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level); + const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level); + + imagef& level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + const imagef* pSource_image = &img; + + if (m_params.m_mip_fast) + { + if (level > 1) + pSource_image = &mips[level - 1]; + } + + bool status = image_resample(*pSource_image, level_img, + //m_params.m_mip_filter.c_str(), + "box", // TODO: negative lobes in the filter are causing negative colors, try Mitchell + m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); + if (!status) + { + error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); + return false; + } + + clean_hdr_image(level_img); + } + + if (m_params.m_debug) + debug_printf("Total mipmap generation time: %3.3f secs\n", tm.get_elapsed_secs()); + + return true; + } + bool basis_compressor::generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha) { debug_printf("basis_compressor::generate_mipmaps\n"); @@ -463,17 +969,224 @@ namespace basisu return true; } + void basis_compressor::clean_hdr_image(imagef& src_img) + { + const uint32_t width = src_img.get_width(); + const uint32_t height = src_img.get_height(); + + float max_used_val = 0.0f; + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + vec4F& c = src_img(x, y); + for (uint32_t i = 0; i < 3; i++) + max_used_val = maximum(max_used_val, c[i]); + } + } + + double hdr_image_scale = 1.0f; + if (max_used_val > basist::ASTC_HDR_MAX_VAL) + { + hdr_image_scale = max_used_val / basist::ASTC_HDR_MAX_VAL; + + const double inv_hdr_image_scale = basist::ASTC_HDR_MAX_VAL / max_used_val; + + for (uint32_t y = 0; y < src_img.get_height(); y++) + { + for (uint32_t x = 0; x < src_img.get_width(); x++) + { + vec4F& c = src_img(x, y); + + for (uint32_t i = 0; i < 3; i++) + c[i] = (float)minimum<double>(c[i] * inv_hdr_image_scale, basist::ASTC_HDR_MAX_VAL); + } + } + + printf("Warning: The input HDR image's maximum used float value was %f, which is too high to encode as ASTC HDR. The image's components have been linearly scaled so the maximum used value is %f, by multiplying by %f.\n", + max_used_val, basist::ASTC_HDR_MAX_VAL, inv_hdr_image_scale); + + printf("The decoded ASTC HDR texture will have to be scaled up by %f.\n", hdr_image_scale); + } + + // TODO: Determine a constant scale factor, apply if > MAX_HALF_FLOAT + if (!src_img.clean_astc_hdr_pixels(basist::ASTC_HDR_MAX_VAL)) + printf("Warning: clean_astc_hdr_pixels() had to modify the input image to encode to ASTC HDR - see previous warning(s).\n"); + + float lowest_nonzero_val = 1e+30f; + float lowest_val = 1e+30f; + float highest_val = -1e+30f; + + for (uint32_t y = 0; y < src_img.get_height(); y++) + { + for (uint32_t x = 0; x < src_img.get_width(); x++) + { + const vec4F& c = src_img(x, y); + + for (uint32_t i = 0; i < 3; i++) + { + lowest_val = basisu::minimum(lowest_val, c[i]); + + if (c[i] != 0.0f) + lowest_nonzero_val = basisu::minimum(lowest_nonzero_val, c[i]); + + highest_val = basisu::maximum(highest_val, c[i]); + } + } + } + + debug_printf("Lowest image value: %e, lowest non-zero value: %e, highest value: %e, dynamic range: %e\n", lowest_val, lowest_nonzero_val, highest_val, highest_val / lowest_nonzero_val); + } + + bool basis_compressor::read_dds_source_images() + { + debug_printf("basis_compressor::read_dds_source_images\n"); + + // Nothing to do if the caller doesn't want us reading source images. + if ((!m_params.m_read_source_images) || (!m_params.m_source_filenames.size())) + return true; + + // Just bail of the caller has specified their own source images. + if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size()) + return true; + + if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size()) + return true; + + // See if any input filenames are .DDS + bool any_dds = false, all_dds = true; + for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++) + { + std::string ext(string_get_extension(m_params.m_source_filenames[i])); + if (strcasecmp(ext.c_str(), "dds") == 0) + any_dds = true; + else + all_dds = false; + } + + // Bail if no .DDS files specified. + if (!any_dds) + return true; + + // If any input is .DDS they all must be .DDS, for simplicity. + if (!all_dds) + { + error_printf("If any filename is DDS, all filenames must be DDS.\n"); + return false; + } + + // Can't jam in alpha channel images if any .DDS files specified. + if (m_params.m_source_alpha_filenames.size()) + { + error_printf("Source alpha filenames are not supported in DDS mode.\n"); + return false; + } + + bool any_mipmaps = false; + + // Read each .DDS texture file + for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++) + { + basisu::vector<image> ldr_mips; + basisu::vector<imagef> hdr_mips; + bool status = read_uncompressed_dds_file(m_params.m_source_filenames[i].c_str(), ldr_mips, hdr_mips); + if (!status) + return false; + + assert(ldr_mips.size() || hdr_mips.size()); + + if (m_params.m_status_output) + { + printf("Read DDS file \"%s\", %s, %ux%u, %u mipmap levels\n", + m_params.m_source_filenames[i].c_str(), + ldr_mips.size() ? "LDR" : "HDR", + ldr_mips.size() ? ldr_mips[0].get_width() : hdr_mips[0].get_width(), + ldr_mips.size() ? ldr_mips[0].get_height() : hdr_mips[0].get_height(), + ldr_mips.size() ? ldr_mips.size() : hdr_mips.size()); + } + + if (ldr_mips.size()) + { + if (m_params.m_source_images_hdr.size()) + { + error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n"); + return false; + } + + m_params.m_source_images.push_back(ldr_mips[0]); + m_params.m_source_mipmap_images.resize(m_params.m_source_mipmap_images.size() + 1); + + if (ldr_mips.size() > 1) + { + ldr_mips.erase(0U); + + m_params.m_source_mipmap_images.back().swap(ldr_mips); + + any_mipmaps = true; + } + } + else + { + if (m_params.m_source_images.size()) + { + error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n"); + return false; + } + + m_params.m_source_images_hdr.push_back(hdr_mips[0]); + m_params.m_source_mipmap_images_hdr.resize(m_params.m_source_mipmap_images_hdr.size() + 1); + + if (hdr_mips.size() > 1) + { + hdr_mips.erase(0U); + + m_params.m_source_mipmap_images_hdr.back().swap(hdr_mips); + + any_mipmaps = true; + } + + m_params.m_hdr = true; + m_params.m_uastc = true; + } + } + + m_params.m_read_source_images = false; + m_params.m_source_filenames.clear(); + m_params.m_source_alpha_filenames.clear(); + + if (!any_mipmaps) + { + m_params.m_source_mipmap_images.clear(); + m_params.m_source_mipmap_images_hdr.clear(); + } + + if ((m_params.m_hdr) && (!m_params.m_source_images_hdr.size())) + { + error_printf("HDR mode enabled, but only LDR .DDS files were loaded. HDR mode requires half or float (HDR) .DDS inputs.\n"); + return false; + } + + return true; + } + bool basis_compressor::read_source_images() { debug_printf("basis_compressor::read_source_images\n"); - const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size(); + const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : + (m_params.m_hdr ? (uint32_t)m_params.m_source_images_hdr.size() : (uint32_t)m_params.m_source_images.size()); + if (!total_source_files) + { + debug_printf("basis_compressor::read_source_images: No source images to process\n"); + return false; + } m_stats.resize(0); m_slice_descs.resize(0); m_slice_images.resize(0); + m_slice_images_hdr.resize(0); m_total_blocks = 0; uint32_t total_macroblocks = 0; @@ -481,106 +1194,196 @@ namespace basisu m_any_source_image_has_alpha = false; basisu::vector<image> source_images; + basisu::vector<imagef> source_images_hdr; + basisu::vector<std::string> source_filenames; + // TODO: Note HDR images don't support alpha here, currently. + // First load all source images, and determine if any have an alpha channel. for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) { - const char *pSource_filename = ""; + const char* pSource_filename = ""; image file_image; - + imagef file_image_hdr; + if (m_params.m_read_source_images) { pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); // Load the source image - if (!load_image(pSource_filename, file_image)) + if (m_params.m_hdr) { - error_printf("Failed reading source image: %s\n", pSource_filename); - return false; + if (!load_image_hdr(pSource_filename, file_image_hdr, m_params.m_hdr_ldr_srgb_to_linear_conversion)) + { + error_printf("Failed reading source image: %s\n", pSource_filename); + return false; + } + + // For now, just slam alpha to 1.0f. UASTC HDR doesn't support alpha yet. + for (uint32_t y = 0; y < file_image_hdr.get_height(); y++) + for (uint32_t x = 0; x < file_image_hdr.get_width(); x++) + file_image_hdr(x, y)[3] = 1.0f; } + else + { + if (!load_image(pSource_filename, file_image)) + { + error_printf("Failed reading source image: %s\n", pSource_filename); + return false; + } + } + + const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width(); + const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height(); if (m_params.m_status_output) { - printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); + printf("Read source image \"%s\", %ux%u\n", pSource_filename, width, height); } - // Optionally load another image and put a grayscale version of it into the alpha channel. - if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) + if (m_params.m_hdr) + { + clean_hdr_image(file_image_hdr); + } + else { - const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); + // Optionally load another image and put a grayscale version of it into the alpha channel. + if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) + { + const char* pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); - image alpha_data; + image alpha_data; - if (!load_image(pSource_alpha_image, alpha_data)) - { - error_printf("Failed reading source image: %s\n", pSource_alpha_image); - return false; - } + if (!load_image(pSource_alpha_image, alpha_data)) + { + error_printf("Failed reading source image: %s\n", pSource_alpha_image); + return false; + } - printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); + printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); - alpha_data.crop(file_image.get_width(), file_image.get_height()); + alpha_data.crop(width, height); - for (uint32_t y = 0; y < file_image.get_height(); y++) - for (uint32_t x = 0; x < file_image.get_width(); x++) - file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); + for (uint32_t y = 0; y < height; y++) + for (uint32_t x = 0; x < width; x++) + file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); + } } } else { - file_image = m_params.m_source_images[source_file_index]; + if (m_params.m_hdr) + { + file_image_hdr = m_params.m_source_images_hdr[source_file_index]; + clean_hdr_image(file_image_hdr); + } + else + { + file_image = m_params.m_source_images[source_file_index]; + } } - if (m_params.m_renormalize) - file_image.renormalize_normal_map(); + if (!m_params.m_hdr) + { + if (m_params.m_renormalize) + file_image.renormalize_normal_map(); + } bool alpha_swizzled = false; + if (m_params.m_swizzle[0] != 0 || m_params.m_swizzle[1] != 1 || m_params.m_swizzle[2] != 2 || m_params.m_swizzle[3] != 3) { - // Used for XY normal maps in RG - puts X in color, Y in alpha - for (uint32_t y = 0; y < file_image.get_height(); y++) - for (uint32_t x = 0; x < file_image.get_width(); x++) + if (!m_params.m_hdr) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < file_image.get_height(); y++) + { + for (uint32_t x = 0; x < file_image.get_width(); x++) + { + const color_rgba& c = file_image(x, y); + file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + } + } + + alpha_swizzled = (m_params.m_swizzle[3] != 3); + } + else + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < file_image_hdr.get_height(); y++) { - const color_rgba &c = file_image(x, y); - file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + for (uint32_t x = 0; x < file_image_hdr.get_width(); x++) + { + const vec4F& c = file_image_hdr(x, y); + + // For now, alpha is always 1.0f in UASTC HDR. + file_image_hdr(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]); + } } - alpha_swizzled = m_params.m_swizzle[3] != 3; + } } - + bool has_alpha = false; - if (m_params.m_force_alpha || alpha_swizzled) - has_alpha = true; - else if (!m_params.m_check_for_alpha) - file_image.set_alpha(255); - else if (file_image.has_alpha()) - has_alpha = true; - if (has_alpha) - m_any_source_image_has_alpha = true; + if (!m_params.m_hdr) + { + if (m_params.m_force_alpha || alpha_swizzled) + has_alpha = true; + else if (!m_params.m_check_for_alpha) + file_image.set_alpha(255); + else if (file_image.has_alpha()) + has_alpha = true; + + if (has_alpha) + m_any_source_image_has_alpha = true; + } + + { + const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width(); + const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height(); + + debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, width, height, has_alpha); + } - debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); - if (m_params.m_y_flip) - file_image.flip_y(); + { + if (m_params.m_hdr) + file_image_hdr.flip_y(); + else + file_image.flip_y(); + } #if DEBUG_EXTRACT_SINGLE_BLOCK - image block_image(4, 4); const uint32_t block_x = 0; const uint32_t block_y = 0; - block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0); - file_image = block_image; + + if (m_params.m_hdr) + { + imagef block_image(4, 4); + block_image_hdr.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image_hdr, 0); + file_image_hdr = block_image; + } + else + { + image block_image(4, 4); + block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0); + file_image = block_image; + } #endif #if DEBUG_CROP_TEXTURE_TO_64x64 - file_image.resize(64, 64); + if (m_params.m_hdr) + file_image_hdr.resize(64, 64); + else + file_image.resize(64, 64); #endif - if (m_params.m_resample_width > 0 && m_params.m_resample_height > 0) + if ((m_params.m_resample_width > 0) && (m_params.m_resample_height > 0)) { int new_width = basisu::minimum<int>(m_params.m_resample_width, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); int new_height = basisu::minimum<int>(m_params.m_resample_height, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); @@ -588,129 +1391,225 @@ namespace basisu debug_printf("Resampling to %ix%i\n", new_width, new_height); // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. - image temp_img(new_width, new_height); - image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); - temp_img.swap(file_image); + if (m_params.m_hdr) + { + imagef temp_img(new_width, new_height); + image_resample(file_image_hdr, temp_img, "box"); // "kaiser"); + clean_hdr_image(temp_img); + temp_img.swap(file_image_hdr); + } + else + { + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } } else if (m_params.m_resample_factor > 0.0f) { - int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); - int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. + if (m_params.m_hdr) + { + int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image_hdr.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image_hdr.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); - debug_printf("Resampling to %ix%i\n", new_width, new_height); + debug_printf("Resampling to %ix%i\n", new_width, new_height); - // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. - image temp_img(new_width, new_height); - image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); - temp_img.swap(file_image); + imagef temp_img(new_width, new_height); + image_resample(file_image_hdr, temp_img, "box"); // "kaiser"); + clean_hdr_image(temp_img); + temp_img.swap(file_image_hdr); + } + else + { + int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } } - if ((!file_image.get_width()) || (!file_image.get_height())) + const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width(); + const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height(); + + if ((!width) || (!height)) { error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n"); return false; } - if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) + if ((width > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (height > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) { error_printf("basis_compressor::read_source_images: Source image \"%s\" is too large!\n", pSource_filename); return false; } - source_images.enlarge(1)->swap(file_image); + if (!m_params.m_hdr) + source_images.enlarge(1)->swap(file_image); + else + source_images_hdr.enlarge(1)->swap(file_image_hdr); + source_filenames.push_back(pSource_filename); } // Check if the caller has generated their own mipmaps. - if (m_params.m_source_mipmap_images.size()) + if (m_params.m_hdr) { - // Make sure they've passed us enough mipmap chains. - if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size())) + if (m_params.m_source_mipmap_images_hdr.size()) { - error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n"); - return false; + // Make sure they've passed us enough mipmap chains. + if ((m_params.m_source_images_hdr.size() != m_params.m_source_mipmap_images_hdr.size()) || (total_source_files != m_params.m_source_images_hdr.size())) + { + error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images_hdr.size() must equal m_params.m_source_images_hdr.size()!\n"); + return false; + } } - - // Check if any of the user-supplied mipmap levels has alpha. - // We're assuming the user has already preswizzled their mipmap source images. - if (!m_any_source_image_has_alpha) + } + else + { + if (m_params.m_source_mipmap_images.size()) { - for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + // Make sure they've passed us enough mipmap chains. + if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size())) { - for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) - { - const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n"); + return false; + } - if (mip_img.has_alpha()) + // Check if any of the user-supplied mipmap levels has alpha. + if (!m_any_source_image_has_alpha) + { + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) { - m_any_source_image_has_alpha = true; - break; + const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + + // Be sure to take into account any swizzling which will be applied. + if (mip_img.has_alpha(m_params.m_swizzle[3])) + { + m_any_source_image_has_alpha = true; + break; + } } - } - if (m_any_source_image_has_alpha) - break; + if (m_any_source_image_has_alpha) + break; + } } } } debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha); + // Now, for each source image, create the slices corresponding to that image. for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) { const std::string &source_filename = source_filenames[source_file_index]; - - // Now, for each source image, create the slices corresponding to that image. + basisu::vector<image> slices; + basisu::vector<imagef> slices_hdr; slices.reserve(32); + slices_hdr.reserve(32); // The first (largest) mipmap level. - image& file_image = source_images[source_file_index]; - + image *pFile_image = source_images.size() ? &source_images[source_file_index] : nullptr; + imagef *pFile_image_hdr = source_images_hdr.size() ? &source_images_hdr[source_file_index] : nullptr; + // Reserve a slot for mip0. - slices.resize(1); - - if (m_params.m_source_mipmap_images.size()) + if (m_params.m_hdr) + slices_hdr.resize(1); + else + slices.resize(1); + + if ((!m_params.m_hdr) && (m_params.m_source_mipmap_images.size())) { // User-provided mipmaps for each layer or image in the texture array. for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) { image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; - if (m_params.m_swizzle[0] != 0 || - m_params.m_swizzle[1] != 1 || - m_params.m_swizzle[2] != 2 || - m_params.m_swizzle[3] != 3) + if ((m_params.m_swizzle[0] != 0) || + (m_params.m_swizzle[1] != 1) || + (m_params.m_swizzle[2] != 2) || + (m_params.m_swizzle[3] != 3)) { // Used for XY normal maps in RG - puts X in color, Y in alpha for (uint32_t y = 0; y < mip_img.get_height(); y++) + { for (uint32_t x = 0; x < mip_img.get_width(); x++) { - const color_rgba &c = mip_img(x, y); + const color_rgba& c = mip_img(x, y); mip_img(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); } + } } slices.push_back(mip_img); } } + else if ((m_params.m_hdr) && (m_params.m_source_mipmap_images_hdr.size())) + { + // User-provided mipmaps for each layer or image in the texture array. + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images_hdr[source_file_index].size(); mip_index++) + { + imagef& mip_img = m_params.m_source_mipmap_images_hdr[source_file_index][mip_index]; + + if ((m_params.m_swizzle[0] != 0) || + (m_params.m_swizzle[1] != 1) || + (m_params.m_swizzle[2] != 2) || + (m_params.m_swizzle[3] != 3)) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < mip_img.get_height(); y++) + { + for (uint32_t x = 0; x < mip_img.get_width(); x++) + { + const vec4F& c = mip_img(x, y); + + // For now, HDR alpha is always 1.0f. + mip_img(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]); + } + } + } + + clean_hdr_image(mip_img); + + slices_hdr.push_back(mip_img); + } + } else if (m_params.m_mip_gen) { // Automatically generate mipmaps. - if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha)) - return false; + if (m_params.m_hdr) + { + if (!generate_mipmaps(*pFile_image_hdr, slices_hdr, m_any_source_image_has_alpha)) + return false; + } + else + { + if (!generate_mipmaps(*pFile_image, slices, m_any_source_image_has_alpha)) + return false; + } } // Swap in the largest mipmap level here to avoid copying it, because generate_mips() will change the array. // NOTE: file_image is now blank. - slices[0].swap(file_image); + if (m_params.m_hdr) + slices_hdr[0].swap(*pFile_image_hdr); + else + slices[0].swap(*pFile_image); - uint_vec mip_indices(slices.size()); - for (uint32_t i = 0; i < slices.size(); i++) + uint_vec mip_indices(m_params.m_hdr ? slices_hdr.size() : slices.size()); + for (uint32_t i = 0; i < (m_params.m_hdr ? slices_hdr.size() : slices.size()); i++) mip_indices[i] = i; - if ((m_any_source_image_has_alpha) && (!m_params.m_uastc)) + if ((!m_params.m_hdr) && (m_any_source_image_has_alpha) && (!m_params.m_uastc)) { // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. basisu::vector<image> alpha_slices; @@ -745,20 +1644,29 @@ namespace basisu mip_indices.swap(new_mip_indices); } - assert(slices.size() == mip_indices.size()); - - for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) + if (m_params.m_hdr) + { + assert(slices_hdr.size() == mip_indices.size()); + } + else + { + assert(slices.size() == mip_indices.size()); + } + + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? slices_hdr.size() : slices.size()); slice_index++) { - image& slice_image = slices[slice_index]; - const uint32_t orig_width = slice_image.get_width(); - const uint32_t orig_height = slice_image.get_height(); + image *pSlice_image = m_params.m_hdr ? nullptr : &slices[slice_index]; + imagef *pSlice_image_hdr = m_params.m_hdr ? &slices_hdr[slice_index] : nullptr; + + const uint32_t orig_width = m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(); + const uint32_t orig_height = m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height(); bool is_alpha_slice = false; - if (m_any_source_image_has_alpha) + if ((!m_params.m_hdr) && (m_any_source_image_has_alpha)) { if (m_params.m_uastc) { - is_alpha_slice = slice_image.has_alpha(); + is_alpha_slice = pSlice_image->has_alpha(); } else { @@ -767,43 +1675,69 @@ namespace basisu } // Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks. - slice_image.crop_dup_borders(slice_image.get_block_width(4) * 4, slice_image.get_block_height(4) * 4); + if (m_params.m_hdr) + pSlice_image_hdr->crop_dup_borders(pSlice_image_hdr->get_block_width(4) * 4, pSlice_image_hdr->get_block_height(4) * 4); + else + pSlice_image->crop_dup_borders(pSlice_image->get_block_width(4) * 4, pSlice_image->get_block_height(4) * 4); if (m_params.m_debug_images) { - save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), slice_image); + if (m_params.m_hdr) + write_exr(string_format("basis_debug_source_image_%u_slice_%u.exr", source_file_index, slice_index).c_str(), *pSlice_image_hdr, 3, 0); + else + save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), *pSlice_image); } - const uint32_t dest_image_index = m_slice_images.size(); + const uint32_t dest_image_index = (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); enlarge_vector(m_stats, 1); - enlarge_vector(m_slice_images, 1); + + if (m_params.m_hdr) + enlarge_vector(m_slice_images_hdr, 1); + else + enlarge_vector(m_slice_images, 1); + enlarge_vector(m_slice_descs, 1); - + m_stats[dest_image_index].m_filename = source_filename.c_str(); m_stats[dest_image_index].m_width = orig_width; m_stats[dest_image_index].m_height = orig_height; - - debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, slice_image.get_width(), slice_image.get_height()); - basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index]; + debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", + m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), + orig_width, orig_height, + m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(), + m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height()); + + basisu_backend_slice_desc& slice_desc = m_slice_descs[dest_image_index]; slice_desc.m_first_block_index = m_total_blocks; slice_desc.m_orig_width = orig_width; slice_desc.m_orig_height = orig_height; - slice_desc.m_width = slice_image.get_width(); - slice_desc.m_height = slice_image.get_height(); + if (m_params.m_hdr) + { + slice_desc.m_width = pSlice_image_hdr->get_width(); + slice_desc.m_height = pSlice_image_hdr->get_height(); - slice_desc.m_num_blocks_x = slice_image.get_block_width(4); - slice_desc.m_num_blocks_y = slice_image.get_block_height(4); + slice_desc.m_num_blocks_x = pSlice_image_hdr->get_block_width(4); + slice_desc.m_num_blocks_y = pSlice_image_hdr->get_block_height(4); + } + else + { + slice_desc.m_width = pSlice_image->get_width(); + slice_desc.m_height = pSlice_image->get_height(); + + slice_desc.m_num_blocks_x = pSlice_image->get_block_width(4); + slice_desc.m_num_blocks_y = pSlice_image->get_block_height(4); + } slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1; slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; slice_desc.m_source_file_index = source_file_index; - + slice_desc.m_mip_index = mip_indices[slice_index]; slice_desc.m_alpha = is_alpha_slice; @@ -818,8 +1752,11 @@ namespace basisu // Finally, swap in the slice's image to avoid copying it. // NOTE: slice_image is now blank. - m_slice_images[dest_image_index].swap(slice_image); - + if (m_params.m_hdr) + m_slice_images_hdr[dest_image_index].swap(*pSlice_image_hdr); + else + m_slice_images[dest_image_index].swap(*pSlice_image); + } // slice_index } // source_file_index @@ -855,7 +1792,7 @@ namespace basisu if (m_params.m_status_output) { - printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size()); + printf("Total slices: %u\n", (uint32_t)m_slice_descs.size()); } for (uint32_t i = 0; i < m_slice_descs.size(); i++) @@ -865,11 +1802,17 @@ namespace basisu if (m_params.m_status_output) { printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n", - i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); + i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, + slice_desc.m_width, slice_desc.m_height, + slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); } if (m_any_source_image_has_alpha) { + // HDR doesn't support alpha yet + if (m_params.m_hdr) + return false; + if (!m_params.m_uastc) { // For ETC1S, alpha slices must be at odd slice indices. @@ -903,6 +1846,7 @@ namespace basisu if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height)) return false; + if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) { if (!slice_desc.m_iframe) @@ -924,7 +1868,7 @@ namespace basisu uint32_t total_basis_images = 0; - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++) { const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; @@ -945,7 +1889,7 @@ namespace basisu uint_vec image_mipmap_levels(total_basis_images); int width = -1, height = -1; - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++) { const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; @@ -982,20 +1926,52 @@ namespace basisu { debug_printf("basis_compressor::extract_source_blocks\n"); - m_source_blocks.resize(m_total_blocks); + if (m_params.m_hdr) + m_source_blocks_hdr.resize(m_total_blocks); + else + m_source_blocks.resize(m_total_blocks); - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++) { const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; - const image& source_image = m_slice_images[slice_index]; + const image *pSource_image = m_params.m_hdr ? nullptr : &m_slice_images[slice_index]; + const imagef *pSource_image_hdr = m_params.m_hdr ? &m_slice_images_hdr[slice_index] : nullptr; for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); + { + if (m_params.m_hdr) + { + vec4F* pBlock = m_source_blocks_hdr[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(); + + pSource_image_hdr->extract_block_clamped(pBlock, block_x * 4, block_y * 4, 4, 4); + + // Additional (technically optional) early sanity checking of the block texels. + for (uint32_t i = 0; i < 16; i++) + { + for (uint32_t c = 0; c < 3; c++) + { + float v = pBlock[i][c]; + + if (std::isnan(v) || std::isinf(v) || (v < 0.0f) || (v > basist::MAX_HALF_FLOAT)) + { + error_printf("basis_compressor::extract_source_blocks: invalid float component\n"); + return false; + } + } + } + } + else + { + pSource_image->extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); + } + } + } } return true; @@ -1304,6 +2280,8 @@ namespace basisu m_output_basis_file = comp_data; uint32_t total_orig_pixels = 0, total_texels = 0, total_orig_texels = 0; + (void)total_texels; + for (uint32_t i = 0; i < m_slice_descs.size(); i++) { const basisu_backend_slice_desc& slice_desc = m_slice_descs[i]; @@ -1335,10 +2313,21 @@ namespace basisu } m_decoded_output_textures.resize(m_slice_descs.size()); - m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); - m_decoded_output_textures_bc7.resize(m_slice_descs.size()); - m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size()); + if (m_params.m_hdr) + { + m_decoded_output_textures_bc6h_hdr_unpacked.resize(m_slice_descs.size()); + + m_decoded_output_textures_astc_hdr.resize(m_slice_descs.size()); + m_decoded_output_textures_astc_hdr_unpacked.resize(m_slice_descs.size()); + } + else + { + m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); + + m_decoded_output_textures_bc7.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size()); + } tm.start(); if (m_params.m_pGlobal_codebooks) @@ -1360,12 +2349,16 @@ namespace basisu for (uint32_t i = 0; i < m_slice_descs.size(); i++) { + basisu::texture_format tex_format = m_params.m_hdr ? texture_format::cBC6HUnsigned : (m_params.m_uastc ? texture_format::cUASTC4x4 : texture_format::cETC1); + basist::block_format format = m_params.m_hdr ? basist::block_format::cBC6H : (m_params.m_uastc ? basist::block_format::cUASTC_4x4 : basist::block_format::cETC1); + gpu_image decoded_texture; - decoded_texture.init(m_params.m_uastc ? texture_format::cUASTC4x4 : texture_format::cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + decoded_texture.init( + tex_format, + m_slice_descs[i].m_width, m_slice_descs[i].m_height); tm.start(); - - basist::block_format format = m_params.m_uastc ? basist::block_format::cUASTC_4x4 : basist::block_format::cETC1; + uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8; if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, @@ -1391,43 +2384,87 @@ namespace basisu m_decoded_output_textures[i] = decoded_texture; } - double total_time_bc7 = 0; + double total_alt_transcode_time = 0; + tm.start(); - if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) && - basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S)) + if (m_params.m_hdr) { + assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_4x4_RGBA, basist::basis_tex_format::cUASTC_HDR_4x4)); + for (uint32_t i = 0; i < m_slice_descs.size(); i++) { gpu_image decoded_texture; - decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + decoded_texture.init(texture_format::cASTC_HDR_4x4, m_slice_descs[i].m_width, m_slice_descs[i].m_height); tm.start(); if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast<etc_block*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16)) + reinterpret_cast<basist::astc_blk*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_4x4, 16)) { - error_printf("Transcoding failed to BC7 on slice %u!\n", i); + error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i); return false; } - - total_time_bc7 += tm.get_elapsed_secs(); - - m_decoded_output_textures_bc7[i] = decoded_texture; + + m_decoded_output_textures_astc_hdr[i] = decoded_texture; + } + } + else + { + if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) && + basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S)) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast<etc_block*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16)) + { + error_printf("Transcoding failed to BC7 on slice %u!\n", i); + return false; + } + + m_decoded_output_textures_bc7[i] = decoded_texture; + } } } + total_alt_transcode_time = tm.get_elapsed_secs(); + for (uint32_t i = 0; i < m_slice_descs.size(); i++) { - m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); + if (m_params.m_hdr) + { + // BC6H + bool status = m_decoded_output_textures[i].unpack_hdr(m_decoded_output_textures_bc6h_hdr_unpacked[i]); + assert(status); + BASISU_NOTE_UNUSED(status); + + // ASTC HDR + status = m_decoded_output_textures_astc_hdr[i].unpack_hdr(m_decoded_output_textures_astc_hdr_unpacked[i]); + assert(status); + } + else + { + bool status = m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); + assert(status); + BASISU_NOTE_UNUSED(status); - if (m_decoded_output_textures_bc7[i].get_pixel_width()) - m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]); + if (m_decoded_output_textures_bc7[i].get_pixel_width()) + { + status = m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]); + assert(status); + } + } } - debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n", m_params.m_uastc ? "ASTC" : "ETC1", total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc); + debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n", + m_params.m_hdr ? "BC6H" : (m_params.m_uastc ? "ASTC" : "ETC1"), + total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc); - if (total_time_bc7 != 0) - debug_printf("Transcoded to BC7 in %3.3fms, %f texels/sec\n", total_time_bc7 * 1000.0f, total_orig_pixels / total_time_bc7); + if (total_alt_transcode_time != 0) + debug_printf("Alternate transcode in %3.3fms, %f texels/sec\n", total_alt_transcode_time * 1000.0f, total_orig_pixels / total_alt_transcode_time); for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { @@ -1438,17 +2475,82 @@ namespace basisu assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); } + } // if (m_params.m_validate_output_data) return true; } + bool basis_compressor::write_hdr_debug_images(const char* pBasename, const imagef& orig_hdr_img, uint32_t width, uint32_t height) + { + // Copy image to account for 4x4 block expansion + imagef hdr_img(orig_hdr_img); + hdr_img.resize(width, height); + + image srgb_img(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + vec4F p(hdr_img(x, y)); + + p[0] = clamp(p[0], 0.0f, 1.0f); + p[1] = clamp(p[1], 0.0f, 1.0f); + p[2] = clamp(p[2], 0.0f, 1.0f); + + int rc = (int)std::round(linear_to_srgb(p[0]) * 255.0f); + int gc = (int)std::round(linear_to_srgb(p[1]) * 255.0f); + int bc = (int)std::round(linear_to_srgb(p[2]) * 255.0f); + + srgb_img.set_clipped(x, y, color_rgba(rc, gc, bc, 255)); + } + } + + { + const std::string filename(string_format("%s_linear_clamped_to_srgb.png", pBasename)); + save_png(filename.c_str(), srgb_img); + printf("Wrote .PNG file %s\n", filename.c_str()); + } + + { + const std::string filename(string_format("%s_compressive_tonemapped.png", pBasename)); + image compressive_tonemapped_img; + + bool status = tonemap_image_compressive(compressive_tonemapped_img, hdr_img); + if (!status) + { + error_printf("basis_compressor::write_hdr_debug_images: tonemap_image_compressive() failed (invalid half-float input)\n"); + } + else + { + save_png(filename.c_str(), compressive_tonemapped_img); + printf("Wrote .PNG file %s\n", filename.c_str()); + } + } + + image tonemapped_img; + + for (int e = -5; e <= 5; e++) + { + const float scale = powf(2.0f, (float)e); + + tonemap_image_reinhard(tonemapped_img, hdr_img, scale); + + std::string filename(string_format("%s_reinhard_tonemapped_scale_%f.png", pBasename, scale)); + save_png(filename.c_str(), tonemapped_img, cImageSaveIgnoreAlpha); + printf("Wrote .PNG file %s\n", filename.c_str()); + } + + return true; + } + bool basis_compressor::write_output_files_and_compute_stats() { debug_printf("basis_compressor::write_output_files_and_compute_stats\n"); const uint8_vec& comp_data = m_params.m_create_ktx2_file ? m_output_ktx2_file : m_basis_file.get_compressed_data(); - if (m_params.m_write_output_basis_files) + if (m_params.m_write_output_basis_or_ktx2_files) { const std::string& output_filename = m_params.m_out_filename; @@ -1458,7 +2560,7 @@ namespace basisu return false; } - if (m_params.m_status_output) + //if (m_params.m_status_output) { printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str()); } @@ -1485,7 +2587,7 @@ namespace basisu m_basis_bits_per_texel = comp_size * 8.0f / total_texels; - debug_printf(".basis file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n", + debug_printf("Output file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n", (uint32_t)comp_data.size(), (uint32_t)comp_size, m_basis_bits_per_texel); @@ -1495,191 +2597,324 @@ namespace basisu if (m_params.m_validate_output_data) { - for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + if (m_params.m_hdr) { - const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + if (m_params.m_print_stats) + { + printf("ASTC/BC6H half float space error metrics (a piecewise linear approximation of log2 error):\n"); + } - if (m_params.m_compute_stats) + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { - if (m_params.m_print_stats) - printf("Slice: %u\n", slice_index); + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; - image_stats& s = m_stats[slice_index]; + if (m_params.m_compute_stats) + { + image_stats& s = m_stats[slice_index]; - // TODO: We used to output SSIM (during heavy encoder development), but this slowed down compression too much. We'll be adding it back. + if (m_params.m_print_stats) + { + printf("Slice: %u\n", slice_index); + } - image_metrics em; + image_metrics im; - // ---- .basis stats - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); - if (m_params.m_print_stats) - em.print(".basis RGB Avg: "); - s.m_basis_rgb_avg_psnr = em.m_psnr; + if (m_params.m_print_stats) + { + printf("\nASTC channels:\n"); + for (uint32_t i = 0; i < 3; i++) + { + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], i, 1, true); - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4); - if (m_params.m_print_stats) - em.print(".basis RGBA Avg: "); - s.m_basis_rgba_avg_psnr = em.m_psnr; + printf("%c: ", "RGB"[i]); + im.print_hp(); + } - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1); - if (m_params.m_print_stats) - em.print(".basis R Avg: "); + printf("BC6H channels:\n"); + for (uint32_t i = 0; i < 3; i++) + { + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], i, 1, true); - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1); - if (m_params.m_print_stats) - em.print(".basis G Avg: "); + printf("%c: ", "RGB"[i]); + im.print_hp(); + } + } - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1); - if (m_params.m_print_stats) - em.print(".basis B Avg: "); + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true); + s.m_basis_rgb_avg_psnr = (float)im.m_psnr; - if (m_params.m_uastc) - { - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1); if (m_params.m_print_stats) - em.print(".basis A Avg: "); + { + printf("\nASTC RGB: "); + im.print_hp(); +#if 0 + // Validation + im.calc_half2(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true); + printf("\nASTC RGB (Alt): "); + im.print_hp(); +#endif + } - s.m_basis_a_avg_psnr = em.m_psnr; + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], 0, 3, true); + s.m_basis_rgb_avg_bc6h_psnr = (float)im.m_psnr; + + if (m_params.m_print_stats) + { + printf("BC6H RGB: "); + im.print_hp(); + printf("\n"); + } } + + if (m_params.m_debug_images) + { + std::string out_basename; + if (m_params.m_out_filename.size()) + string_get_filename(m_params.m_out_filename.c_str(), out_basename); + else if (m_params.m_source_filenames.size()) + string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); - if (m_params.m_print_stats) - em.print(".basis 709 Luma: "); - s.m_basis_luma_709_psnr = static_cast<float>(em.m_psnr); - s.m_basis_luma_709_ssim = static_cast<float>(em.m_ssim); + string_remove_extension(out_basename); + out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); - if (m_params.m_print_stats) - em.print(".basis 601 Luma: "); - s.m_basis_luma_601_psnr = static_cast<float>(em.m_psnr); + // Write BC6H .DDS file. + { + gpu_image bc6h_tex(m_decoded_output_textures[slice_index]); + bc6h_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename(out_basename + "_bc6h.dds"); + write_compressed_texture_file(filename.c_str(), bc6h_tex, true); + printf("Wrote .DDS file %s\n", filename.c_str()); + } - if (m_slice_descs.size() == 1) - { - const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size(); - if (m_params.m_print_stats) + // Write ASTC .KTX/.astc files. ("astcenc -dh input.astc output.exr" to decode the astc file.) + { + gpu_image astc_tex(m_decoded_output_textures_astc_hdr[slice_index]); + astc_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename1(out_basename + "_astc.astc"); + write_astc_file(filename1.c_str(), astc_tex.get_ptr(), 4, 4, slice_desc.m_orig_width, slice_desc.m_orig_height); + printf("Wrote .ASTC file %s\n", filename1.c_str()); + + std::string filename2(out_basename + "_astc.ktx"); + write_compressed_texture_file(filename2.c_str(), astc_tex, true); + printf("Wrote .KTX file %s\n", filename2.c_str()); + } + + // Write unpacked ASTC image to .EXR { - debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); - debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + imagef astc_img(m_decoded_output_textures_astc_hdr_unpacked[slice_index]); + astc_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename(out_basename + "_unpacked_astc.exr"); + write_exr(filename.c_str(), astc_img, 3, 0); + printf("Wrote .EXR file %s\n", filename.c_str()); } + + // Write unpacked BC6H image to .EXR + { + imagef bc6h_img(m_decoded_output_textures_bc6h_hdr_unpacked[slice_index]); + bc6h_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename(out_basename + "_unpacked_bc6h.exr"); + write_exr(filename.c_str(), bc6h_img, 3, 0); + printf("Wrote .EXR file %s\n", filename.c_str()); + } + + // Write tonemapped/srgb images + write_hdr_debug_images((out_basename + "_source").c_str(), m_slice_images_hdr[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height); + write_hdr_debug_images((out_basename + "_unpacked_astc").c_str(), m_decoded_output_textures_astc_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height); + write_hdr_debug_images((out_basename + "_unpacked_bc6h").c_str(), m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height); } + } + } + else + { + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; - if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width()) + if (m_params.m_compute_stats) { - // ---- BC7 stats - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3); if (m_params.m_print_stats) - em.print("BC7 RGB Avg: "); - s.m_bc7_rgb_avg_psnr = em.m_psnr; + printf("Slice: %u\n", slice_index); + + image_stats& s = m_stats[slice_index]; + + image_metrics em; - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4); + // ---- .basis stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); if (m_params.m_print_stats) - em.print("BC7 RGBA Avg: "); - s.m_bc7_rgba_avg_psnr = em.m_psnr; + em.print(".basis RGB Avg: "); + s.m_basis_rgb_avg_psnr = (float)em.m_psnr; - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1); + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4); if (m_params.m_print_stats) - em.print("BC7 R Avg: "); + em.print(".basis RGBA Avg: "); + s.m_basis_rgba_avg_psnr = (float)em.m_psnr; - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1); + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1); if (m_params.m_print_stats) - em.print("BC7 G Avg: "); + em.print(".basis R Avg: "); - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1); + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1); if (m_params.m_print_stats) - em.print("BC7 B Avg: "); + em.print(".basis G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1); + if (m_params.m_print_stats) + em.print(".basis B Avg: "); if (m_params.m_uastc) { - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1); + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1); if (m_params.m_print_stats) - em.print("BC7 A Avg: "); + em.print(".basis A Avg: "); - s.m_bc7_a_avg_psnr = em.m_psnr; + s.m_basis_a_avg_psnr = (float)em.m_psnr; } - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0); + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); if (m_params.m_print_stats) - em.print("BC7 709 Luma: "); - s.m_bc7_luma_709_psnr = static_cast<float>(em.m_psnr); - s.m_bc7_luma_709_ssim = static_cast<float>(em.m_ssim); + em.print(".basis 709 Luma: "); + s.m_basis_luma_709_psnr = static_cast<float>(em.m_psnr); + s.m_basis_luma_709_ssim = static_cast<float>(em.m_ssim); - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true); + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); if (m_params.m_print_stats) - em.print("BC7 601 Luma: "); - s.m_bc7_luma_601_psnr = static_cast<float>(em.m_psnr); - } + em.print(".basis 601 Luma: "); + s.m_basis_luma_601_psnr = static_cast<float>(em.m_psnr); - if (!m_params.m_uastc) - { - // ---- Nearly best possible ETC1S stats - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); - if (m_params.m_print_stats) - em.print("Unquantized ETC1S RGB Avg: "); - s.m_best_etc1s_rgb_avg_psnr = static_cast<float>(em.m_psnr); + if (m_slice_descs.size() == 1) + { + const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size(); + if (m_params.m_print_stats) + { + debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + } + } - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); - if (m_params.m_print_stats) - em.print("Unquantized ETC1S 709 Luma: "); - s.m_best_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr); - s.m_best_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim); + if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width()) + { + // ---- BC7 stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3); + //if (m_params.m_print_stats) + // em.print("BC7 RGB Avg: "); + s.m_bc7_rgb_avg_psnr = (float)em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4); + //if (m_params.m_print_stats) + // em.print("BC7 RGBA Avg: "); + s.m_bc7_rgba_avg_psnr = (float)em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1); + //if (m_params.m_print_stats) + // em.print("BC7 R Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1); + //if (m_params.m_print_stats) + // em.print("BC7 G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1); + //if (m_params.m_print_stats) + // em.print("BC7 B Avg: "); + + if (m_params.m_uastc) + { + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1); + //if (m_params.m_print_stats) + // em.print("BC7 A Avg: "); - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true); - if (m_params.m_print_stats) - em.print("Unquantized ETC1S 601 Luma: "); - s.m_best_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr); + s.m_bc7_a_avg_psnr = (float)em.m_psnr; + } + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0); + //if (m_params.m_print_stats) + // em.print("BC7 709 Luma: "); + s.m_bc7_luma_709_psnr = static_cast<float>(em.m_psnr); + s.m_bc7_luma_709_ssim = static_cast<float>(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true); + //if (m_params.m_print_stats) + // em.print("BC7 601 Luma: "); + s.m_bc7_luma_601_psnr = static_cast<float>(em.m_psnr); + } + + if (!m_params.m_uastc) + { + // ---- Nearly best possible ETC1S stats + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); + //if (m_params.m_print_stats) + // em.print("Unquantized ETC1S RGB Avg: "); + s.m_best_etc1s_rgb_avg_psnr = static_cast<float>(em.m_psnr); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); + //if (m_params.m_print_stats) + // em.print("Unquantized ETC1S 709 Luma: "); + s.m_best_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr); + s.m_best_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true); + //if (m_params.m_print_stats) + // em.print("Unquantized ETC1S 601 Luma: "); + s.m_best_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr); + } } - } - std::string out_basename; - if (m_params.m_out_filename.size()) - string_get_filename(m_params.m_out_filename.c_str(), out_basename); - else if (m_params.m_source_filenames.size()) - string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); + std::string out_basename; + if (m_params.m_out_filename.size()) + string_get_filename(m_params.m_out_filename.c_str(), out_basename); + else if (m_params.m_source_filenames.size()) + string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); - string_remove_extension(out_basename); - out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); + string_remove_extension(out_basename); + out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); - if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images)) - { - // Write "best" ETC1S debug images - if (!m_params.m_uastc) + if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images)) { - gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); - best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image); + // Write "best" ETC1S debug images + if (!m_params.m_uastc) + { + gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); + best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image, true); - image best_etc1s_unpacked; - best_etc1s_gpu_image.unpack(best_etc1s_unpacked); - save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); + image best_etc1s_unpacked; + best_etc1s_gpu_image.unpack(best_etc1s_unpacked); + save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); + } } - } - if (m_params.m_debug_images) - { - // Write decoded ETC1S/ASTC debug images + if (m_params.m_debug_images) { - gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]); - decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc); + // Write decoded ETC1S/ASTC debug images + { + gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]); + decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc, true); - image temp(m_decoded_output_textures_unpacked[slice_index]); - temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); - save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp); - } + image temp(m_decoded_output_textures_unpacked[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp); + } - // Write decoded BC7 debug images - if (m_decoded_output_textures_bc7[slice_index].get_pixel_width()) - { - gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]); - decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7); + // Write decoded BC7 debug images + if (m_decoded_output_textures_bc7[slice_index].get_pixel_width()) + { + gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]); + decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7, true); - image temp(m_decoded_output_textures_unpacked_bc7[slice_index]); - temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); - save_png(out_basename + "_transcoded_bc7.png", temp); + image temp(m_decoded_output_textures_unpacked_bc7[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_bc7.png", temp); + } } } - } + } // if (m_params.m_hdr) + } // if (m_params.m_validate_output_data) return true; @@ -1727,10 +2962,27 @@ namespace basisu } static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; - static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_uastc_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; - static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; - + static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + + // HDR TODO - what is the best Khronos DFD to use for UASTC HDR? + static uint8_t g_ktx2_uastc_hdr_nonalpha_dfd[44] = + { + 0x2C,0x0,0x0,0x0, // 0 totalSize + 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId + 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber + 0xA7,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel + 0x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 + 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 + 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 + 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 + 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) + 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0) + }; + void basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header) { const uint8_t* pDFD; @@ -1738,7 +2990,12 @@ namespace basisu if (m_params.m_uastc) { - if (m_any_source_image_has_alpha) + if (m_params.m_hdr) + { + pDFD = g_ktx2_uastc_hdr_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_hdr_nonalpha_dfd); + } + else if (m_any_source_image_has_alpha) { pDFD = g_ktx2_uastc_alpha_dfd; dfd_len = sizeof(g_ktx2_uastc_alpha_dfd); @@ -1772,10 +3029,18 @@ namespace basisu dfd_bits &= ~(0xFF << 16); - if (m_params.m_ktx2_srgb_transfer_func) - dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16); - else + if (m_params.m_hdr) + { + // TODO: In HDR mode, always write linear for now. dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); + } + else + { + if (m_params.m_ktx2_srgb_transfer_func) + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16); + else + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); + } basisu::write_le_dword(dfd.data() + 3 * sizeof(uint32_t), dfd_bits); @@ -1850,7 +3115,12 @@ namespace basisu header.m_pixel_width = base_width; header.m_pixel_height = base_height; header.m_face_count = total_faces; - header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + + if (m_params.m_hdr) + header.m_vk_format = basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK; + else + header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + header.m_type_size = 1; header.m_level_count = total_levels; header.m_layer_count = (total_layers > 1) ? total_layers : 0; @@ -2061,7 +3331,8 @@ namespace basisu if (bytes_needed_to_pad < 6) bytes_needed_to_pad += 16; - printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); + // Just add the padding. It's likely not necessary anymore, but can't really hurt. + //printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. // We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize(). @@ -2258,18 +3529,34 @@ namespace basisu return result; } - void* basis_compress( - const basisu::vector<image>& source_images, + static void* basis_compress( + const basisu::vector<image> *pSource_images, + const basisu::vector<imagef> *pSource_images_hdr, uint32_t flags_and_quality, float uastc_rdo_quality, size_t* pSize, image_stats* pStats) { + assert((pSource_images != nullptr) || (pSource_images_hdr != nullptr)); + assert(!((pSource_images != nullptr) && (pSource_images_hdr != nullptr))); + // Check input parameters - if ((!source_images.size()) || (!pSize)) + if (pSource_images) { - error_printf("basis_compress: Invalid parameter\n"); - assert(0); - return nullptr; + if ((!pSource_images->size()) || (!pSize)) + { + error_printf("basis_compress: Invalid parameter\n"); + assert(0); + return nullptr; + } + } + else + { + if ((!pSource_images_hdr->size()) || (!pSize)) + { + error_printf("basis_compress: Invalid parameter\n"); + assert(0); + return nullptr; + } } *pSize = 0; @@ -2287,40 +3574,70 @@ namespace basisu comp_params.m_y_flip = (flags_and_quality & cFlagYFlip) != 0; comp_params.m_debug = (flags_and_quality & cFlagDebug) != 0; - + comp_params.m_debug_images = (flags_and_quality & cFlagDebugImages) != 0; + // Copy the largest mipmap level - comp_params.m_source_images.resize(1); - comp_params.m_source_images[0] = source_images[0]; + if (pSource_images) + { + comp_params.m_source_images.resize(1); + comp_params.m_source_images[0] = (*pSource_images)[0]; + + // Copy the smaller mipmap levels, if any + if (pSource_images->size() > 1) + { + comp_params.m_source_mipmap_images.resize(1); + comp_params.m_source_mipmap_images[0].resize(pSource_images->size() - 1); - // Copy the smaller mipmap levels, if any - if (source_images.size() > 1) + for (uint32_t i = 1; i < pSource_images->size(); i++) + comp_params.m_source_mipmap_images[0][i - 1] = (*pSource_images)[i]; + } + } + else { - comp_params.m_source_mipmap_images.resize(1); - comp_params.m_source_mipmap_images[0].resize(source_images.size() - 1); + comp_params.m_source_images_hdr.resize(1); + comp_params.m_source_images_hdr[0] = (*pSource_images_hdr)[0]; - for (uint32_t i = 1; i < source_images.size(); i++) - comp_params.m_source_mipmap_images[0][i - 1] = source_images[i]; + // Copy the smaller mipmap levels, if any + if (pSource_images_hdr->size() > 1) + { + comp_params.m_source_mipmap_images_hdr.resize(1); + comp_params.m_source_mipmap_images_hdr[0].resize(pSource_images_hdr->size() - 1); + + for (uint32_t i = 1; i < pSource_images->size(); i++) + comp_params.m_source_mipmap_images_hdr[0][i - 1] = (*pSource_images_hdr)[i]; + } } comp_params.m_multithreading = (flags_and_quality & cFlagThreaded) != 0; comp_params.m_use_opencl = (flags_and_quality & cFlagUseOpenCL) != 0; - comp_params.m_write_output_basis_files = false; + comp_params.m_write_output_basis_or_ktx2_files = false; comp_params.m_perceptual = (flags_and_quality & cFlagSRGB) != 0; comp_params.m_mip_srgb = comp_params.m_perceptual; comp_params.m_mip_gen = (flags_and_quality & (cFlagGenMipsWrap | cFlagGenMipsClamp)) != 0; comp_params.m_mip_wrapping = (flags_and_quality & cFlagGenMipsWrap) != 0; - comp_params.m_uastc = (flags_and_quality & cFlagUASTC) != 0; - if (comp_params.m_uastc) + if ((pSource_images_hdr) || (flags_and_quality & cFlagHDR)) { - comp_params.m_pack_uastc_flags = flags_and_quality & cPackUASTCLevelMask; - comp_params.m_rdo_uastc = (flags_and_quality & cFlagUASTCRDO) != 0; - comp_params.m_rdo_uastc_quality_scalar = uastc_rdo_quality; + // In UASTC HDR mode, the compressor will jam this to true anyway. + // And there's no need to set UASTC LDR or ETC1S options. + comp_params.m_uastc = true; } else - comp_params.m_quality_level = basisu::maximum<uint32_t>(1, flags_and_quality & 255); + { + comp_params.m_uastc = (flags_and_quality & cFlagUASTC) != 0; + if (comp_params.m_uastc) + { + comp_params.m_pack_uastc_flags = flags_and_quality & cPackUASTCLevelMask; + comp_params.m_rdo_uastc = (flags_and_quality & cFlagUASTCRDO) != 0; + comp_params.m_rdo_uastc_quality_scalar = uastc_rdo_quality; + } + else + { + comp_params.m_quality_level = basisu::maximum<uint32_t>(1, flags_and_quality & 255); + } + } comp_params.m_create_ktx2_file = (flags_and_quality & cFlagKTX2) != 0; @@ -2337,6 +3654,15 @@ namespace basisu comp_params.m_print_stats = (flags_and_quality & cFlagPrintStats) != 0; comp_params.m_status_output = (flags_and_quality & cFlagPrintStatus) != 0; + if ((flags_and_quality & cFlagHDR) || (pSource_images_hdr)) + { + comp_params.m_hdr = true; + comp_params.m_uastc_hdr_options.set_quality_level(flags_and_quality & cPackUASTCLevelMask); + } + + if (flags_and_quality & cFlagHDRLDRImageSRGBToLinearConversion) + comp_params.m_hdr_ldr_srgb_to_linear_conversion = true; + // Create the compressor, initialize it, and process the input basis_compressor comp; if (!comp.init(comp_params)) @@ -2381,6 +3707,24 @@ namespace basisu } void* basis_compress( + const basisu::vector<image>& source_images, + uint32_t flags_and_quality, float uastc_rdo_quality, + size_t* pSize, + image_stats* pStats) + { + return basis_compress(&source_images, nullptr, flags_and_quality, uastc_rdo_quality, pSize, pStats); + } + + void* basis_compress( + const basisu::vector<imagef>& source_images_hdr, + uint32_t flags_and_quality, + size_t* pSize, + image_stats* pStats) + { + return basis_compress(nullptr, &source_images_hdr, flags_and_quality, 0.0f, pSize, pStats); + } + + void* basis_compress( const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, uint32_t flags_and_quality, float uastc_rdo_quality, size_t* pSize, diff --git a/thirdparty/basis_universal/encoder/basisu_comp.h b/thirdparty/basis_universal/encoder/basisu_comp.h index b6c9fef9e2..1cc75fc8a3 100644 --- a/thirdparty/basis_universal/encoder/basisu_comp.h +++ b/thirdparty/basis_universal/encoder/basisu_comp.h @@ -1,5 +1,5 @@ // basisu_comp.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #include "basisu_basis_file.h" #include "../transcoder/basisu_transcoder.h" #include "basisu_uastc_enc.h" +#include "basisu_astc_hdr_enc.h" -#define BASISU_LIB_VERSION 116 -#define BASISU_LIB_VERSION_STRING "1.16" +#define BASISU_LIB_VERSION 150 +#define BASISU_LIB_VERSION_STRING "1.50" #ifndef BASISD_SUPPORT_KTX2 #error BASISD_SUPPORT_KTX2 is undefined @@ -81,6 +82,8 @@ namespace basisu m_basis_luma_601_psnr = 0.0f; m_basis_luma_709_ssim = 0.0f; + m_basis_rgb_avg_bc6h_psnr = 0.0f; + m_bc7_rgb_avg_psnr = 0.0f; m_bc7_rgba_avg_psnr = 0.0f; m_bc7_a_avg_psnr = 0.0f; @@ -100,7 +103,7 @@ namespace basisu uint32_t m_width; uint32_t m_height; - // .basis compressed (ETC1S or UASTC statistics) + // .basis/.ktx2 compressed (LDR: ETC1S or UASTC statistics, HDR: transcoded BC6H statistics) float m_basis_rgb_avg_psnr; float m_basis_rgba_avg_psnr; float m_basis_a_avg_psnr; @@ -108,7 +111,10 @@ namespace basisu float m_basis_luma_601_psnr; float m_basis_luma_709_ssim; - // BC7 statistics + // UASTC HDR only. + float m_basis_rgb_avg_bc6h_psnr; + + // LDR: BC7 statistics float m_bc7_rgb_avg_psnr; float m_bc7_rgba_avg_psnr; float m_bc7_a_avg_psnr; @@ -116,7 +122,7 @@ namespace basisu float m_bc7_luma_601_psnr; float m_bc7_luma_709_ssim; - // Highest achievable quality ETC1S statistics + // LDR: Highest achievable quality ETC1S statistics float m_best_etc1s_rgb_avg_psnr; float m_best_etc1s_luma_709_psnr; float m_best_etc1s_luma_601_psnr; @@ -256,7 +262,7 @@ namespace basisu m_no_selector_rdo.clear(); m_selector_rdo_thresh.clear(); m_read_source_images.clear(); - m_write_output_basis_files.clear(); + m_write_output_basis_or_ktx2_files.clear(); m_compression_level.clear(); m_compute_stats.clear(); m_print_stats.clear(); @@ -317,27 +323,38 @@ namespace basisu m_validate_output_data.clear(); + m_hdr_ldr_srgb_to_linear_conversion.clear(); + + m_hdr_favor_astc.clear(); + m_pJob_pool = nullptr; } - // True to generate UASTC .basis file data, otherwise ETC1S. + // True to generate UASTC .basis/.KTX2 file data, otherwise ETC1S. bool_param<false> m_uastc; + // Set m_hdr to true to switch to UASTC HDR mode. + bool_param<false> m_hdr; + bool_param<false> m_use_opencl; - // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. - // Otherwise, the compressor processes the images in m_source_images. + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read. + // Otherwise, the compressor processes the images in m_source_images or m_source_images_hdr. basisu::vector<std::string> m_source_filenames; basisu::vector<std::string> m_source_alpha_filenames; basisu::vector<image> m_source_images; + basisu::vector<imagef> m_source_images_hdr; + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. // The compressor applies the user-provided swizzling (in m_swizzle) to these images. basisu::vector< basisu::vector<image> > m_source_mipmap_images; + + basisu::vector< basisu::vector<imagef> > m_source_mipmap_images_hdr; - // Filename of the output basis file + // Filename of the output basis/ktx2 file std::string m_out_filename; // The params are done this way so we can detect when the user has explictly changed them. @@ -373,8 +390,8 @@ namespace basisu // Read source images from m_source_filenames/m_source_alpha_filenames bool_param<false> m_read_source_images; - // Write the output basis file to disk using m_out_filename - bool_param<false> m_write_output_basis_files; + // Write the output basis/ktx2 file to disk using m_out_filename + bool_param<false> m_write_output_basis_or_ktx2_files; // Compute and display image metrics bool_param<false> m_compute_stats; @@ -382,15 +399,15 @@ namespace basisu // Print stats to stdout, if m_compute_stats is true. bool_param<true> m_print_stats; - // Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels + // Check to see if any input image has an alpha channel, if so then the output basis/ktx2 file will have alpha channels bool_param<true> m_check_for_alpha; - // Always put alpha slices in the output basis file, even when the input doesn't have alpha + // Always put alpha slices in the output basis/ktx2 file, even when the input doesn't have alpha bool_param<false> m_force_alpha; bool_param<true> m_multithreading; - // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels - char m_swizzle[4]; + // Split the R channel to RGB and the G channel to alpha, then write a basis/ktx2 file with alpha channels + uint8_t m_swizzle[4]; bool_param<false> m_renormalize; @@ -448,8 +465,17 @@ namespace basisu param<int> m_ktx2_zstd_supercompression_level; bool_param<false> m_ktx2_srgb_transfer_func; + astc_hdr_codec_options m_uastc_hdr_options; + bool_param<false> m_validate_output_data; + // If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion) and then processed as HDR. + // Otherwise, LDR images will be processed as HDR as-is. + bool_param<true> m_hdr_ldr_srgb_to_linear_conversion; + + // If true, ASTC HDR quality is favored more than BC6H quality. Otherwise it's a rough balance. + bool_param<false> m_hdr_favor_astc; + job_pool *m_pJob_pool; }; @@ -504,6 +530,7 @@ namespace basisu opencl_context_ptr m_pOpenCL_context; basisu::vector<image> m_slice_images; + basisu::vector<imagef> m_slice_images_hdr; basisu::vector<image_stats> m_stats; @@ -515,7 +542,9 @@ namespace basisu uint32_t m_total_blocks; basisu_frontend m_frontend; + pixel_block_vec m_source_blocks; + pixel_block_hdr_vec m_source_blocks_hdr; basisu::vector<gpu_image> m_frontend_output_textures; @@ -526,11 +555,17 @@ namespace basisu basisu_file m_basis_file; - basisu::vector<gpu_image> m_decoded_output_textures; + basisu::vector<gpu_image> m_decoded_output_textures; // BC6H in HDR mode basisu::vector<image> m_decoded_output_textures_unpacked; + basisu::vector<gpu_image> m_decoded_output_textures_bc7; basisu::vector<image> m_decoded_output_textures_unpacked_bc7; + basisu::vector<imagef> m_decoded_output_textures_bc6h_hdr_unpacked; // BC6H in HDR mode + + basisu::vector<gpu_image> m_decoded_output_textures_astc_hdr; + basisu::vector<imagef> m_decoded_output_textures_astc_hdr_unpacked; + uint8_vec m_output_basis_file; uint8_vec m_output_ktx2_file; @@ -541,14 +576,21 @@ namespace basisu bool m_opencl_failed; + void check_for_hdr_inputs(); + bool sanity_check_input_params(); + void clean_hdr_image(imagef& src_img); + bool read_dds_source_images(); bool read_source_images(); bool extract_source_blocks(); bool process_frontend(); bool extract_frontend_texture_data(); bool process_backend(); bool create_basis_file_and_transcode(); + bool write_hdr_debug_images(const char* pBasename, const imagef& img, uint32_t width, uint32_t height); bool write_output_files_and_compute_stats(); + error_code encode_slices_to_uastc_hdr(); error_code encode_slices_to_uastc(); + bool generate_mipmaps(const imagef& img, basisu::vector<imagef>& mips, bool has_alpha); bool generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha); bool validate_texture_type_constraints(); bool validate_ktx2_constraints(); @@ -568,7 +610,8 @@ namespace basisu // // flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC level, i.e. "cFlagSRGB | cFlagGenMipsClamp | cFlagThreaded | 128" or "cFlagSRGB | cFlagGenMipsClamp | cFlagUASTC | cFlagThreaded | cPackUASTCLevelDefault". // In ETC1S mode, the lower 8-bits are the ETC1S quality level which ranges from [1,255] (higher=better quality/larger files) - // In UASTC mode, the lower 8-bits are the UASTC pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. + // In UASTC mode, the lower 8-bits are the UASTC LDR/HDR pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. Valid values are [0,4] for both LDR/HDR. + // In UASTC mode, be sure to set this, otherwise it defaults to 0 (fastest/lowest quality). // // uastc_rdo_quality: Float UASTC RDO quality level (0=no change, higher values lower quality but increase compressibility, initially try .5-1.5) // @@ -594,20 +637,36 @@ namespace basisu cFlagUASTCRDO = 1 << 18, // use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar) cFlagPrintStats = 1 << 19, // print image stats to stdout - cFlagPrintStatus = 1 << 20 // print status to stdout + cFlagPrintStatus = 1 << 20, // print status to stdout + + cFlagHDR = 1 << 21, // Force encoder into HDR mode, even if source image is LDR. + cFlagHDRLDRImageSRGBToLinearConversion = 1 << 22, // In HDR mode, convert LDR source images to linear before encoding. + + cFlagDebugImages = 1 << 23 // enable status output }; // This function accepts an array of source images. // If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled. - // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. The returned block must be freed using basis_free_data(). + // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. + // Important: The returned block MUST be manually freed using basis_free_data(). // basisu_encoder_init() MUST be called first! + // LDR version. To compress the LDR source image as HDR: Use the cFlagHDR flag. void* basis_compress( const basisu::vector<image> &source_images, uint32_t flags_and_quality, float uastc_rdo_quality, size_t* pSize, image_stats* pStats = nullptr); - // This function only accepts a single source image. + // HDR-only version. + // Important: The returned block MUST be manually freed using basis_free_data(). + void* basis_compress( + const basisu::vector<imagef>& source_images_hdr, + uint32_t flags_and_quality, + size_t* pSize, + image_stats* pStats = nullptr); + + // This function only accepts a single LDR source image. It's just a wrapper for basis_compress() above. + // Important: The returned block MUST be manually freed using basis_free_data(). void* basis_compress( const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, uint32_t flags_and_quality, float uastc_rdo_quality, @@ -615,6 +674,7 @@ namespace basisu image_stats* pStats = nullptr); // Frees the dynamically allocated file data returned by basis_compress(). + // This MUST be called on the pointer returned by basis_compress() when you're done with it. void basis_free_data(void* p); // Runs a short benchmark using synthetic image data to time OpenCL encoding vs. CPU encoding, with multithreading enabled. diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp index e87dd636a2..fff98e8301 100644 --- a/thirdparty/basis_universal/encoder/basisu_enc.cpp +++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp @@ -1,5 +1,5 @@ // basisu_enc.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,10 +21,20 @@ #include "jpgd.h" #include "pvpngreader.h" #include "basisu_opencl.h" +#include "basisu_astc_hdr_enc.h" #include <vector> +#ifndef TINYEXR_USE_ZFP +#define TINYEXR_USE_ZFP (1) +#endif +#include <tinyexr.h> + +#ifndef MINIZ_HEADER_FILE_ONLY #define MINIZ_HEADER_FILE_ONLY +#endif +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#endif #include "basisu_miniz.h" #if defined(_WIN32) @@ -165,14 +175,14 @@ namespace basisu bool g_library_initialized; std::mutex g_encoder_init_mutex; - + // Encoder library initialization (just call once at startup) - void basisu_encoder_init(bool use_opencl, bool opencl_force_serialization) + bool basisu_encoder_init(bool use_opencl, bool opencl_force_serialization) { std::lock_guard<std::mutex> lock(g_encoder_init_mutex); if (g_library_initialized) - return; + return true; detect_sse41(); @@ -189,7 +199,11 @@ namespace basisu interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports + astc_hdr_enc_init(); + basist::bc6h_enc_init(); + g_library_initialized = true; + return true; } void basisu_encoder_deinit() @@ -316,6 +330,24 @@ namespace basisu init(); return ticks * g_timer_freq; } + + float linear_to_srgb(float l) + { + assert(l >= 0.0f && l <= 1.0f); + if (l < .0031308f) + return saturate(l * 12.92f); + else + return saturate(1.055f * powf(l, 1.0f / 2.4f) - .055f); + } + + float srgb_to_linear(float s) + { + assert(s >= 0.0f && s <= 1.0f); + if (s < .04045f) + return saturate(s * (1.0f / 12.92f)); + else + return saturate(powf((s + .055f) * (1.0f / 1.055f), 2.4f)); + } const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; @@ -336,7 +368,7 @@ namespace basisu if (sizeof(void *) == sizeof(uint32_t)) { - if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) + if (((uint64_t)w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) { error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); @@ -371,6 +403,11 @@ namespace basisu return true; } + bool load_qoi(const char* pFilename, image& img) + { + return false; + } + bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename) { interval_timer tm; @@ -433,11 +470,178 @@ namespace basisu return load_png(pFilename, img); if (strcasecmp(pExt, "tga") == 0) return load_tga(pFilename, img); + if (strcasecmp(pExt, "qoi") == 0) + return load_qoi(pFilename, img); if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) ) return load_jpg(pFilename, img); return false; } + + static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear) + { + img.resize(ldr_img.get_width(), ldr_img.get_height()); + + for (uint32_t y = 0; y < ldr_img.get_height(); y++) + { + for (uint32_t x = 0; x < ldr_img.get_width(); x++) + { + const color_rgba& c = ldr_img(x, y); + + vec4F& d = img(x, y); + if (ldr_srgb_to_linear) + { + // TODO: Multiply by 100-200 nits? + d[0] = srgb_to_linear(c[0] * (1.0f / 255.0f)); + d[1] = srgb_to_linear(c[1] * (1.0f / 255.0f)); + d[2] = srgb_to_linear(c[2] * (1.0f / 255.0f)); + } + else + { + d[0] = c[0] * (1.0f / 255.0f); + d[1] = c[1] * (1.0f / 255.0f); + d[2] = c[2] * (1.0f / 255.0f); + } + d[3] = c[3] * (1.0f / 255.0f); + } + } + } + + bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear) + { + if ((!pMem) || (!mem_size)) + { + assert(0); + return false; + } + + switch (img_type) + { + case hdr_image_type::cHITRGBAHalfFloat: + { + if (mem_size != width * height * sizeof(basist::half_float) * 4) + { + assert(0); + return false; + } + + if ((!width) || (!height)) + { + assert(0); + return false; + } + + const basist::half_float* pSrc_image_h = static_cast<const basist::half_float *>(pMem); + + img.resize(width, height); + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const basist::half_float* pSrc_pixel = &pSrc_image_h[x * 4]; + + vec4F& dst = img(x, y); + dst[0] = basist::half_to_float(pSrc_pixel[0]); + dst[1] = basist::half_to_float(pSrc_pixel[1]); + dst[2] = basist::half_to_float(pSrc_pixel[2]); + dst[3] = basist::half_to_float(pSrc_pixel[3]); + } + + pSrc_image_h += (width * 4); + } + + break; + } + case hdr_image_type::cHITRGBAFloat: + { + if (mem_size != width * height * sizeof(float) * 4) + { + assert(0); + return false; + } + + if ((!width) || (!height)) + { + assert(0); + return false; + } + + img.resize(width, height); + memcpy(img.get_ptr(), pMem, width * height * sizeof(float) * 4); + + break; + } + case hdr_image_type::cHITPNGImage: + { + image ldr_img; + if (!load_png(static_cast<const uint8_t *>(pMem), mem_size, ldr_img)) + return false; + + convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear); + break; + } + case hdr_image_type::cHITEXRImage: + { + if (!read_exr(pMem, mem_size, img)) + return false; + + break; + } + case hdr_image_type::cHITHDRImage: + { + uint8_vec buf(mem_size); + memcpy(buf.get_ptr(), pMem, mem_size); + + rgbe_header_info hdr; + if (!read_rgbe(buf, img, hdr)) + return false; + + break; + } + default: + assert(0); + return false; + } + + return true; + } + + bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear) + { + std::string ext(string_get_extension(std::string(pFilename))); + + if (ext.length() == 0) + return false; + + const char* pExt = ext.c_str(); + + if (strcasecmp(pExt, "hdr") == 0) + { + rgbe_header_info rgbe_info; + if (!read_rgbe(pFilename, img, rgbe_info)) + return false; + return true; + } + + if (strcasecmp(pExt, "exr") == 0) + { + int n_chans = 0; + if (!read_exr(pFilename, img, n_chans)) + return false; + return true; + } + + // Try loading image as LDR, then optionally convert to linear light. + { + image ldr_img; + if (!load_image(pFilename, ldr_img)) + return false; + + convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear); + } + + return true; + } bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) { @@ -559,6 +763,45 @@ namespace basisu return true; } + bool read_file_to_data(const char* pFilename, void *pData, size_t len) + { + assert(pData && len); + if ((!pData) || (!len)) + return false; + + FILE* pFile = nullptr; +#ifdef _WIN32 + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + if (!pFile) + return false; + + fseek(pFile, 0, SEEK_END); +#ifdef _WIN32 + int64_t filesize = _ftelli64(pFile); +#else + int64_t filesize = ftello(pFile); +#endif + + if ((filesize < 0) || ((size_t)filesize < len)) + { + fclose(pFile); + return false; + } + fseek(pFile, 0, SEEK_SET); + + if (fread(pData, 1, (size_t)len, pFile) != (size_t)len) + { + fclose(pFile); + return false; + } + + fclose(pFile); + return true; + } + bool write_data_to_file(const char* pFilename, const void* pData, size_t len) { FILE* pFile = nullptr; @@ -581,25 +824,7 @@ namespace basisu return fclose(pFile) != EOF; } - - float linear_to_srgb(float l) - { - assert(l >= 0.0f && l <= 1.0f); - if (l < .0031308f) - return saturate(l * 12.92f); - else - return saturate(1.055f * powf(l, 1.0f/2.4f) - .055f); - } - - float srgb_to_linear(float s) - { - assert(s >= 0.0f && s <= 1.0f); - if (s < .04045f) - return saturate(s * (1.0f/12.92f)); - else - return saturate(powf((s + .055f) * (1.0f/1.055f), 2.4f)); - } - + bool image_resample(const image &src, image &dst, bool srgb, const char *pFilter, float filter_scale, bool wrapping, @@ -747,6 +972,121 @@ namespace basisu return true; } + bool image_resample(const imagef& src, imagef& dst, + const char* pFilter, float filter_scale, + bool wrapping, + uint32_t first_comp, uint32_t num_comps) + { + assert((first_comp + num_comps) <= 4); + + const int cMaxComps = 4; + + const uint32_t src_w = src.get_width(), src_h = src.get_height(); + const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); + + if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return false; + } + + if (!src_w || !src_h || !dst_w || !dst_h) + return false; + + if ((num_comps < 1) || (num_comps > cMaxComps)) + return false; + + if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return false; + } + + if ((src_w == dst_w) && (src_h == dst_h)) + { + dst = src; + return true; + } + + std::vector<float> samples[cMaxComps]; + Resampler* resamplers[cMaxComps]; + + resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping + pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); + samples[0].resize(src_w); + + for (uint32_t i = 1; i < num_comps; ++i) + { + resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping + pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0); + samples[i].resize(src_w); + } + + uint32_t dst_y = 0; + + for (uint32_t src_y = 0; src_y < src_h; ++src_y) + { + const vec4F* pSrc = &src(0, src_y); + + // Put source lines into resampler(s) + for (uint32_t x = 0; x < src_w; ++x) + { + for (uint32_t c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + const float v = (*pSrc)[comp_index]; + + samples[c][x] = v; + } + + pSrc++; + } + + for (uint32_t c = 0; c < num_comps; ++c) + { + if (!resamplers[c]->put_line(&samples[c][0])) + { + for (uint32_t i = 0; i < num_comps; i++) + delete resamplers[i]; + return false; + } + } + + // Now retrieve any output lines + for (;;) + { + uint32_t c; + for (c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + + const float* pOutput_samples = resamplers[c]->get_line(); + if (!pOutput_samples) + break; + + vec4F* pDst = &dst(0, dst_y); + + for (uint32_t x = 0; x < dst_w; x++) + { + (*pDst)[comp_index] = pOutput_samples[x]; + pDst++; + } + } + if (c < num_comps) + break; + + ++dst_y; + } + } + + for (uint32_t i = 0; i < num_comps; ++i) + delete resamplers[i]; + + return true; + } + void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms) { // See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen @@ -1312,11 +1652,13 @@ namespace basisu uint32_t a = max_index / num_syms, b = max_index % num_syms; + const uint32_t ofs = m_entries_picked.size(); + m_entries_picked.push_back(a); m_entries_picked.push_back(b); for (uint32_t i = 0; i < num_syms; i++) - if ((i != b) && (i != a)) + if ((i != m_entries_picked[ofs + 1]) && (i != m_entries_picked[ofs])) m_entries_to_do.push_back(i); for (uint32_t i = 0; i < m_entries_to_do.size(); i++) @@ -1372,6 +1714,235 @@ namespace basisu } return which_side; } + + void image_metrics::calc(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool log) + { + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + + double max_e = -1e+30f; + double sum = 0.0f, sum_sqr = 0.0f; + + m_has_neg = false; + m_any_abnormal = false; + m_hf_mag_overflow = false; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& ca = a(x, y), &cb = b(x, y); + + if (total_chans) + { + for (uint32_t c = 0; c < total_chans; c++) + { + float fa = ca[first_chan + c], fb = cb[first_chan + c]; + + if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if ((fa < 0.0f) || (fb < 0.0f)) + m_has_neg = true; + + if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb)) + m_any_abnormal = true; + + const double delta = fabs(fa - fb); + max_e = basisu::maximum<double>(max_e, delta); + + if (log) + { + double log2_delta = log2f(basisu::maximum(0.0f, fa) + 1.0f) - log2f(basisu::maximum(0.0f, fb) + 1.0f); + + sum += fabs(log2_delta); + sum_sqr += log2_delta * log2_delta; + } + else + { + sum += fabs(delta); + sum_sqr += delta * delta; + } + } + } + else + { + for (uint32_t c = 0; c < 3; c++) + { + float fa = ca[c], fb = cb[c]; + + if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if ((fa < 0.0f) || (fb < 0.0f)) + m_has_neg = true; + + if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb)) + m_any_abnormal = true; + } + + double ca_l = get_luminance(ca), cb_l = get_luminance(cb); + + double delta = fabs(ca_l - cb_l); + max_e = basisu::maximum(max_e, delta); + + if (log) + { + double log2_delta = log2(basisu::maximum<double>(0.0f, ca_l) + 1.0f) - log2(basisu::maximum<double>(0.0f, cb_l) + 1.0f); + + sum += fabs(log2_delta); + sum_sqr += log2_delta * log2_delta; + } + else + { + sum += delta; + sum_sqr += delta * delta; + } + } + } + } + + m_max = (double)(max_e); + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp<uint32_t>(total_chans, 1, 4); + + m_mean = (float)(sum / total_values); + m_mean_squared = (float)(sum_sqr / total_values); + m_rms = (float)sqrt(sum_sqr / total_values); + + const double max_val = 1.0f; + m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f; + } + + void image_metrics::calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error) + { + assert(total_chans); + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; + + uint_vec hist(65536); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& ca = a(x, y), &cb = b(x, y); + + for (uint32_t i = 0; i < 4; i++) + { + if ((ca[i] < 0.0f) || (cb[i] < 0.0f)) + m_has_neg = true; + + if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i])) + m_any_abnormal = true; + } + + int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) }; + int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) }; + + for (uint32_t c = 0; c < total_chans; c++) + hist[iabs(cah[first_chan + c] - cbh[first_chan + c]) & 65535]++; + + } // x + } // y + + m_max = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint32_t i = 0; i < 65536; i++) + { + if (hist[i]) + { + m_max = basisu::maximum<double>(m_max, (double)i); + double v = (double)i * (double)hist[i]; + sum += v; + sum2 += (double)i * v; + } + } + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp<uint32_t>(total_chans, 1, 4); + + const float max_val = 65535.0f; + m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val); + m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val); + m_rms = (float)sqrt(m_mean_squared); + m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f; + } + + // Alt. variant, same as calc_half(), for validation. + void image_metrics::calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error) + { + assert(total_chans); + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; + + double sum = 0.0f, sum2 = 0.0f; + m_max = 0; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& ca = a(x, y), & cb = b(x, y); + + for (uint32_t i = 0; i < 4; i++) + { + if ((ca[i] < 0.0f) || (cb[i] < 0.0f)) + m_has_neg = true; + + if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i])) + m_any_abnormal = true; + } + + int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) }; + int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) }; + + for (uint32_t c = 0; c < total_chans; c++) + { + int diff = iabs(cah[first_chan + c] - cbh[first_chan + c]); + if (diff) + m_max = std::max<double>(m_max, (double)diff); + + sum += diff; + sum2 += squarei(cah[first_chan + c] - cbh[first_chan + c]); + } + + } // x + } // y + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp<uint32_t>(total_chans, 1, 4); + + const float max_val = 65535.0f; + m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val); + m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val); + m_rms = (float)sqrt(m_mean_squared); + m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f; + } void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma) { @@ -1383,6 +1954,10 @@ namespace basisu double hist[256]; clear_obj(hist); + m_has_neg = false; + m_any_abnormal = false; + m_hf_mag_overflow = false; + for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) @@ -1410,7 +1985,7 @@ namespace basisu { if (hist[i]) { - m_max = basisu::maximum<float>(m_max, (float)i); + m_max = basisu::maximum<double>(m_max, (double)i); double v = i * hist[i]; sum += v; sum2 += i * v; @@ -1922,7 +2497,7 @@ namespace basisu } while (pixels_remaining); - assert((pDst - &input_line_buf[0]) == width * tga_bytes_per_pixel); + assert((pDst - &input_line_buf[0]) == (int)(width * tga_bytes_per_pixel)); pLine_data = &input_line_buf[0]; } @@ -2052,6 +2627,808 @@ namespace basisu return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); } + static inline void hdr_convert(const color_rgba& rgbe, vec4F& c) + { + if (rgbe[3] != 0) + { + float scale = ldexp(1.0f, rgbe[3] - 128 - 8); + c.set((float)rgbe[0] * scale, (float)rgbe[1] * scale, (float)rgbe[2] * scale, 1.0f); + } + else + { + c.set(0.0f, 0.0f, 0.0f, 1.0f); + } + } + + bool string_begins_with(const std::string& str, const char* pPhrase) + { + const size_t str_len = str.size(); + + const size_t phrase_len = strlen(pPhrase); + assert(phrase_len); + + if (str_len >= phrase_len) + { +#ifdef _MSC_VER + if (_strnicmp(pPhrase, str.c_str(), phrase_len) == 0) +#else + if (strncasecmp(pPhrase, str.c_str(), phrase_len) == 0) +#endif + return true; + } + + return false; + } + + // Radiance RGBE (.HDR) image reading. + // This code tries to preserve the original logic in Radiance's ray/src/common/color.c code: + // https://www.radiance-online.org/cgi-bin/viewcvs.cgi/ray/src/common/color.c?revision=2.26&view=markup&sortby=log + // Also see: https://flipcode.com/archives/HDR_Image_Reader.shtml. + // https://github.com/LuminanceHDR/LuminanceHDR/blob/master/src/Libpfs/io/rgbereader.cpp. + // https://radsite.lbl.gov/radiance/refer/filefmts.pdf + // Buggy readers: + // stb_image.h: appears to be a clone of rgbe.c, but with goto's (doesn't support old format files, doesn't support mixture of RLE/non-RLE scanlines) + // http://www.graphics.cornell.edu/~bjw/rgbe.html - rgbe.c/h + // http://www.graphics.cornell.edu/online/formats/rgbe/ - rgbe.c/.h - buggy + bool read_rgbe(const uint8_vec &filedata, imagef& img, rgbe_header_info& hdr_info) + { + hdr_info.clear(); + + const uint32_t MAX_SUPPORTED_DIM = 65536; + + if (filedata.size() < 4) + return false; + + // stb_image.h checks for the string "#?RADIANCE" or "#?RGBE" in the header. + // The original Radiance header code doesn't care about the specific string. + // opencv's reader only checks for "#?", so that's what we're going to do. + if ((filedata[0] != '#') || (filedata[1] != '?')) + return false; + + //uint32_t width = 0, height = 0; + bool is_rgbe = false; + size_t cur_ofs = 0; + + // Parse the lines until we encounter a blank line. + std::string cur_line; + for (; ; ) + { + if (cur_ofs >= filedata.size()) + return false; + + const uint32_t HEADER_TOO_BIG_SIZE = 4096; + if (cur_ofs >= HEADER_TOO_BIG_SIZE) + { + // Header seems too large - something is likely wrong. Return failure. + return false; + } + + uint8_t c = filedata[cur_ofs++]; + + if (c == '\n') + { + if (!cur_line.size()) + break; + + if ((cur_line[0] == '#') && (!string_begins_with(cur_line, "#?")) && (!hdr_info.m_program.size())) + { + cur_line.erase(0, 1); + while (cur_line.size() && (cur_line[0] == ' ')) + cur_line.erase(0, 1); + + hdr_info.m_program = cur_line; + } + else if (string_begins_with(cur_line, "EXPOSURE=") && (cur_line.size() > 9)) + { + hdr_info.m_exposure = atof(cur_line.c_str() + 9); + hdr_info.m_has_exposure = true; + } + else if (string_begins_with(cur_line, "GAMMA=") && (cur_line.size() > 6)) + { + hdr_info.m_exposure = atof(cur_line.c_str() + 6); + hdr_info.m_has_gamma = true; + } + else if (cur_line == "FORMAT=32-bit_rle_rgbe") + { + is_rgbe = true; + } + + cur_line.resize(0); + } + else + cur_line.push_back((char)c); + } + + if (!is_rgbe) + return false; + + // Assume and require the final line to have the image's dimensions. We're not supporting flipping. + for (; ; ) + { + if (cur_ofs >= filedata.size()) + return false; + uint8_t c = filedata[cur_ofs++]; + if (c == '\n') + break; + cur_line.push_back((char)c); + } + + int comp[2] = { 1, 0 }; // y, x (major, minor) + int dir[2] = { -1, 1 }; // -1, 1, (major, minor), for y -1=up + uint32_t major_dim = 0, minor_dim = 0; + + // Parse the dimension string, normally it'll be "-Y # +X #" (major, minor), rarely it differs + for (uint32_t d = 0; d < 2; d++) // 0=major, 1=minor + { + const bool is_neg_x = (strncmp(&cur_line[0], "-X ", 3) == 0); + const bool is_pos_x = (strncmp(&cur_line[0], "+X ", 3) == 0); + const bool is_x = is_neg_x || is_pos_x; + + const bool is_neg_y = (strncmp(&cur_line[0], "-Y ", 3) == 0); + const bool is_pos_y = (strncmp(&cur_line[0], "+Y ", 3) == 0); + const bool is_y = is_neg_y || is_pos_y; + + if (cur_line.size() < 3) + return false; + + if (!is_x && !is_y) + return false; + + comp[d] = is_x ? 0 : 1; + dir[d] = (is_neg_x || is_neg_y) ? -1 : 1; + + uint32_t& dim = d ? minor_dim : major_dim; + + cur_line.erase(0, 3); + + while (cur_line.size()) + { + char c = cur_line[0]; + if (c != ' ') + break; + cur_line.erase(0, 1); + } + + bool has_digits = false; + while (cur_line.size()) + { + char c = cur_line[0]; + cur_line.erase(0, 1); + + if (c == ' ') + break; + + if ((c < '0') || (c > '9')) + return false; + + const uint32_t prev_dim = dim; + dim = dim * 10 + (c - '0'); + if (dim < prev_dim) + return false; + + has_digits = true; + } + if (!has_digits) + return false; + + if ((dim < 1) || (dim > MAX_SUPPORTED_DIM)) + return false; + } + + // temp image: width=minor, height=major + img.resize(minor_dim, major_dim); + + std::vector<color_rgba> temp_scanline(minor_dim); + + // Read the scanlines. + for (uint32_t y = 0; y < major_dim; y++) + { + vec4F* pDst = &img(0, y); + + if ((filedata.size() - cur_ofs) < 4) + return false; + + // Determine if the line uses the new or old format. See the logic in color.c. + bool old_decrunch = false; + if ((minor_dim < 8) || (minor_dim > 0x7FFF)) + { + // Line is too short or long; must be old format. + old_decrunch = true; + } + else if (filedata[cur_ofs] != 2) + { + // R is not 2, must be old format + old_decrunch = true; + } + else + { + // c[0]/red is 2.Check GB and E for validity. + color_rgba c; + memcpy(&c, &filedata[cur_ofs], 4); + + if ((c[1] != 2) || (c[2] & 0x80)) + { + // G isn't 2, or the high bit of B is set which is impossible (image's > 0x7FFF pixels can't get here). Use old format. + old_decrunch = true; + } + else + { + // Check B and E. If this isn't the minor_dim in network order, something is wrong. The pixel would also be denormalized, and invalid. + uint32_t w = (c[2] << 8) | c[3]; + if (w != minor_dim) + return false; + + cur_ofs += 4; + } + } + + if (old_decrunch) + { + uint32_t rshift = 0, x = 0; + + while (x < minor_dim) + { + if ((filedata.size() - cur_ofs) < 4) + return false; + + color_rgba c; + memcpy(&c, &filedata[cur_ofs], 4); + cur_ofs += 4; + + if ((c[0] == 1) && (c[1] == 1) && (c[2] == 1)) + { + // We'll allow RLE matches to cross scanlines, but not on the very first pixel. + if ((!x) && (!y)) + return false; + + const uint32_t run_len = c[3] << rshift; + const vec4F run_color(pDst[-1]); + + if ((x + run_len) > minor_dim) + return false; + + for (uint32_t i = 0; i < run_len; i++) + *pDst++ = run_color; + + rshift += 8; + x += run_len; + } + else + { + rshift = 0; + + hdr_convert(c, *pDst); + pDst++; + x++; + } + } + continue; + } + + // New format + for (uint32_t s = 0; s < 4; s++) + { + uint32_t x_ofs = 0; + while (x_ofs < minor_dim) + { + uint32_t num_remaining = minor_dim - x_ofs; + + if (cur_ofs >= filedata.size()) + return false; + + uint8_t count = filedata[cur_ofs++]; + if (count > 128) + { + count -= 128; + if (count > num_remaining) + return false; + + if (cur_ofs >= filedata.size()) + return false; + const uint8_t val = filedata[cur_ofs++]; + + for (uint32_t i = 0; i < count; i++) + temp_scanline[x_ofs + i][s] = val; + + x_ofs += count; + } + else + { + if ((!count) || (count > num_remaining)) + return false; + + for (uint32_t i = 0; i < count; i++) + { + if (cur_ofs >= filedata.size()) + return false; + const uint8_t val = filedata[cur_ofs++]; + + temp_scanline[x_ofs + i][s] = val; + } + + x_ofs += count; + } + } // while (x_ofs < minor_dim) + } // c + + // Convert all the RGBE pixels to float now + for (uint32_t x = 0; x < minor_dim; x++, pDst++) + hdr_convert(temp_scanline[x], *pDst); + + assert((pDst - &img(0, y)) == (int)minor_dim); + + } // y + + // at here: + // img(width,height)=image pixels as read from file, x=minor axis, y=major axis + // width=minor axis dimension + // height=major axis dimension + // in file, pixels are emitted in minor order, them major (so major=scanlines in the file) + + imagef final_img; + if (comp[0] == 0) // if major axis is X + final_img.resize(major_dim, minor_dim); + else // major axis is Y, minor is X + final_img.resize(minor_dim, major_dim); + + // TODO: optimize the identity case + for (uint32_t major_iter = 0; major_iter < major_dim; major_iter++) + { + for (uint32_t minor_iter = 0; minor_iter < minor_dim; minor_iter++) + { + const vec4F& p = img(minor_iter, major_iter); + + uint32_t dst_x = 0, dst_y = 0; + + // is the minor dim output x? + if (comp[1] == 0) + { + // minor axis is x, major is y + + // is minor axis (which is output x) flipped? + if (dir[1] < 0) + dst_x = minor_dim - 1 - minor_iter; + else + dst_x = minor_iter; + + // is major axis (which is output y) flipped? -1=down in raster order, 1=up + if (dir[0] < 0) + dst_y = major_iter; + else + dst_y = major_dim - 1 - major_iter; + } + else + { + // minor axis is output y, major is output x + + // is minor axis (which is output y) flipped? + if (dir[1] < 0) + dst_y = minor_iter; + else + dst_y = minor_dim - 1 - minor_iter; + + // is major axis (which is output x) flipped? + if (dir[0] < 0) + dst_x = major_dim - 1 - major_iter; + else + dst_x = major_iter; + } + + final_img(dst_x, dst_y) = p; + } + } + + final_img.swap(img); + + return true; + } + + bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info) + { + uint8_vec filedata; + if (!read_file_to_vec(pFilename, filedata)) + return false; + return read_rgbe(filedata, img, hdr_info); + } + + static uint8_vec& append_string(uint8_vec& buf, const char* pStr) + { + const size_t str_len = strlen(pStr); + if (!str_len) + return buf; + + const size_t ofs = buf.size(); + buf.resize(ofs + str_len); + memcpy(&buf[ofs], pStr, str_len); + + return buf; + } + + static uint8_vec& append_string(uint8_vec& buf, const std::string& str) + { + if (!str.size()) + return buf; + return append_string(buf, str.c_str()); + } + + static inline void float2rgbe(color_rgba &rgbe, const vec4F &c) + { + const float red = c[0], green = c[1], blue = c[2]; + assert(red >= 0.0f && green >= 0.0f && blue >= 0.0f); + + const float max_v = basisu::maximumf(basisu::maximumf(red, green), blue); + + if (max_v < 1e-32f) + rgbe.clear(); + else + { + int e; + const float scale = frexp(max_v, &e) * 256.0f / max_v; + rgbe[0] = (uint8_t)(clamp<int>((int)(red * scale), 0, 255)); + rgbe[1] = (uint8_t)(clamp<int>((int)(green * scale), 0, 255)); + rgbe[2] = (uint8_t)(clamp<int>((int)(blue * scale), 0, 255)); + rgbe[3] = (uint8_t)(e + 128); + } + } + + const bool RGBE_FORCE_RAW = false; + const bool RGBE_FORCE_OLD_CRUNCH = false; // note must readers (particularly stb_image.h's) don't properly support this, when they should + + bool write_rgbe(uint8_vec &file_data, imagef& img, rgbe_header_info& hdr_info) + { + if (!img.get_width() || !img.get_height()) + return false; + + const uint32_t width = img.get_width(), height = img.get_height(); + + file_data.resize(0); + file_data.reserve(1024 + img.get_width() * img.get_height() * 4); + + append_string(file_data, "#?RADIANCE\n"); + + if (hdr_info.m_has_exposure) + append_string(file_data, string_format("EXPOSURE=%g\n", hdr_info.m_exposure)); + + if (hdr_info.m_has_gamma) + append_string(file_data, string_format("GAMMA=%g\n", hdr_info.m_gamma)); + + append_string(file_data, "FORMAT=32-bit_rle_rgbe\n\n"); + append_string(file_data, string_format("-Y %u +X %u\n", height, width)); + + if (((width < 8) || (width > 0x7FFF)) || (RGBE_FORCE_RAW)) + { + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + color_rgba rgbe; + float2rgbe(rgbe, img(x, y)); + append_vector(file_data, (const uint8_t *)&rgbe, sizeof(rgbe)); + } + } + } + else if (RGBE_FORCE_OLD_CRUNCH) + { + for (uint32_t y = 0; y < height; y++) + { + int prev_r = -1, prev_g = -1, prev_b = -1, prev_e = -1; + uint32_t cur_run_len = 0; + + for (uint32_t x = 0; x < width; x++) + { + color_rgba rgbe; + float2rgbe(rgbe, img(x, y)); + + if ((rgbe[0] == prev_r) && (rgbe[1] == prev_g) && (rgbe[2] == prev_b) && (rgbe[3] == prev_e)) + { + if (++cur_run_len == 255) + { + // this ensures rshift stays 0, it's lame but this path is only for testing readers + color_rgba f(1, 1, 1, cur_run_len - 1); + append_vector(file_data, (const uint8_t*)&f, sizeof(f)); + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); + cur_run_len = 0; + } + } + else + { + if (cur_run_len > 0) + { + color_rgba f(1, 1, 1, cur_run_len); + append_vector(file_data, (const uint8_t*)&f, sizeof(f)); + + cur_run_len = 0; + } + + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); + + prev_r = rgbe[0]; + prev_g = rgbe[1]; + prev_b = rgbe[2]; + prev_e = rgbe[3]; + } + } // x + + if (cur_run_len > 0) + { + color_rgba f(1, 1, 1, cur_run_len); + append_vector(file_data, (const uint8_t*)&f, sizeof(f)); + } + } // y + } + else + { + uint8_vec temp[4]; + for (uint32_t c = 0; c < 4; c++) + temp[c].resize(width); + + for (uint32_t y = 0; y < height; y++) + { + color_rgba rgbe(2, 2, width >> 8, width & 0xFF); + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); + + for (uint32_t x = 0; x < width; x++) + { + float2rgbe(rgbe, img(x, y)); + + for (uint32_t c = 0; c < 4; c++) + temp[c][x] = rgbe[c]; + } + + for (uint32_t c = 0; c < 4; c++) + { + int raw_ofs = -1; + + uint32_t x = 0; + while (x < width) + { + const uint32_t num_bytes_remaining = width - x; + const uint32_t max_run_len = basisu::minimum<uint32_t>(num_bytes_remaining, 127); + const uint8_t cur_byte = temp[c][x]; + + uint32_t run_len = 1; + while (run_len < max_run_len) + { + if (temp[c][x + run_len] != cur_byte) + break; + run_len++; + } + + const uint32_t cost_to_keep_raw = ((raw_ofs != -1) ? 0 : 1) + run_len; // 0 or 1 bytes to start a raw run, then the repeated bytes issued as raw + const uint32_t cost_to_take_run = 2 + 1; // 2 bytes to issue the RLE, then 1 bytes to start whatever follows it (raw or RLE) + + if ((run_len >= 3) && (cost_to_take_run < cost_to_keep_raw)) + { + file_data.push_back((uint8_t)(128 + run_len)); + file_data.push_back(cur_byte); + + x += run_len; + raw_ofs = -1; + } + else + { + if (raw_ofs < 0) + { + raw_ofs = (int)file_data.size(); + file_data.push_back(0); + } + + if (++file_data[raw_ofs] == 128) + raw_ofs = -1; + + file_data.push_back(cur_byte); + + x++; + } + } // x + + } // c + } // y + } + + return true; + } + + bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info) + { + uint8_vec file_data; + if (!write_rgbe(file_data, img, hdr_info)) + return false; + return write_vec_to_file(pFilename, file_data); + } + + bool read_exr(const char* pFilename, imagef& img, int& n_chans) + { + n_chans = 0; + + int width = 0, height = 0; + float* out_rgba = nullptr; + const char* err = nullptr; + + int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err); + n_chans = 4; + if (status != 0) + { + error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?"); + FreeEXRErrorMessage(err); + free(out_rgba); + return false; + } + + const uint32_t MAX_SUPPORTED_DIM = 65536; + if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM)) + { + error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename); + free(out_rgba); + return false; + } + + img.resize(width, height); + + if (n_chans == 1) + { + const float* pSrc = out_rgba; + vec4F* pDst = img.get_ptr(); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + (*pDst)[0] = pSrc[0]; + (*pDst)[1] = pSrc[1]; + (*pDst)[2] = pSrc[2]; + (*pDst)[3] = 1.0f; + + pSrc += 4; + ++pDst; + } + } + } + else + { + memcpy(img.get_ptr(), out_rgba, sizeof(float) * 4 * img.get_total_pixels()); + } + + free(out_rgba); + return true; + } + + bool read_exr(const void* pMem, size_t mem_size, imagef& img) + { + float* out_rgba = nullptr; + int width = 0, height = 0; + const char* pErr = nullptr; + int res = LoadEXRFromMemory(&out_rgba, &width, &height, (const uint8_t*)pMem, mem_size, &pErr); + if (res < 0) + { + error_printf("Failed loading .EXR image from memory! (TinyEXR error: %s)\n", pErr ? pErr : "?"); + FreeEXRErrorMessage(pErr); + free(out_rgba); + return false; + } + + img.resize(width, height); + memcpy(img.get_ptr(), out_rgba, width * height * sizeof(float) * 4); + free(out_rgba); + + return true; + } + + bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags) + { + assert((n_chans == 1) || (n_chans == 3) || (n_chans == 4)); + + const bool linear_hint = (flags & WRITE_EXR_LINEAR_HINT) != 0, + store_float = (flags & WRITE_EXR_STORE_FLOATS) != 0, + no_compression = (flags & WRITE_EXR_NO_COMPRESSION) != 0; + + const uint32_t width = img.get_width(), height = img.get_height(); + assert(width && height); + + if (!width || !height) + return false; + + float_vec layers[4]; + float* image_ptrs[4]; + for (uint32_t c = 0; c < n_chans; c++) + { + layers[c].resize(width * height); + image_ptrs[c] = layers[c].get_ptr(); + } + + // ABGR + int chan_order[4] = { 3, 2, 1, 0 }; + + if (n_chans == 1) + { + // Y + chan_order[0] = 0; + } + else if (n_chans == 3) + { + // BGR + chan_order[0] = 2; + chan_order[1] = 1; + chan_order[2] = 0; + } + else if (n_chans != 4) + { + assert(0); + return false; + } + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& p = img(x, y); + + for (uint32_t c = 0; c < n_chans; c++) + layers[c][x + y * width] = p[chan_order[c]]; + } // x + } // y + + EXRHeader header; + InitEXRHeader(&header); + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = n_chans; + image.images = (unsigned char**)image_ptrs; + image.width = width; + image.height = height; + + header.num_channels = n_chans; + + header.channels = (EXRChannelInfo*)calloc(header.num_channels, sizeof(EXRChannelInfo)); + + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + for (uint32_t i = 0; i < n_chans; i++) + { + char c = 'Y'; + if (n_chans == 3) + c = "BGR"[i]; + else if (n_chans == 4) + c = "ABGR"[i]; + + header.channels[i].name[0] = c; + header.channels[i].name[1] = '\0'; + + header.channels[i].p_linear = linear_hint; + } + + header.pixel_types = (int*)calloc(header.num_channels, sizeof(int)); + header.requested_pixel_types = (int*)calloc(header.num_channels, sizeof(int)); + + if (!no_compression) + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + + for (int i = 0; i < header.num_channels; i++) + { + // pixel type of input image + header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + + // pixel type of output image to be stored in .EXR + header.requested_pixel_types[i] = store_float ? TINYEXR_PIXELTYPE_FLOAT : TINYEXR_PIXELTYPE_HALF; + } + + const char* pErr_msg = nullptr; + + int ret = SaveEXRImageToFile(&image, &header, pFilename, &pErr_msg); + if (ret != TINYEXR_SUCCESS) + { + error_printf("Save EXR err: %s\n", pErr_msg); + FreeEXRErrorMessage(pErr_msg); + } + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + return (ret == TINYEXR_SUCCESS); + } + void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...) { char buf[2048]; @@ -2103,5 +3480,206 @@ namespace basisu } } } - + + // Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged. + // Only used for debugging/development. + void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure) + { + uint32_t width = hdr_img.get_width(), height = hdr_img.get_height(); + + ldr_img.resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + vec4F c(hdr_img(x, y)); + + for (uint32_t t = 0; t < 3; t++) + { + if (c[t] <= 0.0f) + { + c[t] = 0.0f; + } + else + { + c[t] *= exposure; + c[t] = c[t] / (1.0f + c[t]); + } + } + + c.clamp(0.0f, 1.0f); + + c[0] = linear_to_srgb(c[0]) * 255.0f; + c[1] = linear_to_srgb(c[1]) * 255.0f; + c[2] = linear_to_srgb(c[2]) * 255.0f; + c[3] = c[3] * 255.0f; + + color_rgba& o = ldr_img(x, y); + + o[0] = (uint8_t)std::round(c[0]); + o[1] = (uint8_t)std::round(c[1]); + o[2] = (uint8_t)std::round(c[2]); + o[3] = (uint8_t)std::round(c[3]); + } + } + } + + bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img) + { + const uint32_t width = hdr_test_img.get_width(); + const uint32_t height = hdr_test_img.get_height(); + + uint16_vec orig_half_img(width * 3 * height); + uint16_vec half_img(width * 3 * height); + + int max_shift = 32; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& p = hdr_test_img(x, y); + + for (uint32_t i = 0; i < 3; i++) + { + if (p[i] < 0.0f) + return false; + if (p[i] > basist::MAX_HALF_FLOAT) + return false; + + uint32_t h = basist::float_to_half(p[i]); + //uint32_t orig_h = h; + + orig_half_img[(x + y * width) * 3 + i] = (uint16_t)h; + + // Rotate sign bit into LSB + //h = rot_left16((uint16_t)h, 1); + //assert(rot_right16((uint16_t)h, 1) == orig_h); + h <<= 1; + + half_img[(x + y * width) * 3 + i] = (uint16_t)h; + + // Determine # of leading zero bits, ignoring the sign bit + if (h) + { + int lz = clz(h) - 16; + assert(lz >= 0 && lz <= 16); + + assert((h << lz) <= 0xFFFF); + + max_shift = basisu::minimum<int>(max_shift, lz); + } + } // i + } // x + } // y + + //printf("tonemap_image_compressive: Max leading zeros: %i\n", max_shift); + + uint32_t high_hist[256]; + clear_obj(high_hist); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + for (uint32_t i = 0; i < 3; i++) + { + uint16_t& hf = half_img[(x + y * width) * 3 + i]; + + assert(((uint32_t)hf << max_shift) <= 65535); + + hf <<= max_shift; + + uint32_t h = (uint8_t)(hf >> 8); + high_hist[h]++; + } + } // x + } // y + + uint32_t total_vals_used = 0; + int remap_old_to_new[256]; + for (uint32_t i = 0; i < 256; i++) + remap_old_to_new[i] = -1; + + for (uint32_t i = 0; i < 256; i++) + { + if (high_hist[i] != 0) + { + remap_old_to_new[i] = total_vals_used; + total_vals_used++; + } + } + + assert(total_vals_used >= 1); + + //printf("tonemap_image_compressive: Total used high byte values: %u, unused: %u\n", total_vals_used, 256 - total_vals_used); + + bool val_used[256]; + clear_obj(val_used); + + int remap_new_to_old[256]; + for (uint32_t i = 0; i < 256; i++) + remap_new_to_old[i] = -1; + BASISU_NOTE_UNUSED(remap_new_to_old); + + int prev_c = -1; + BASISU_NOTE_UNUSED(prev_c); + for (uint32_t i = 0; i < 256; i++) + { + if (remap_old_to_new[i] >= 0) + { + int c; + if (total_vals_used <= 1) + c = remap_old_to_new[i]; + else + { + c = (remap_old_to_new[i] * 255 + ((total_vals_used - 1) / 2)) / (total_vals_used - 1); + + assert(c > prev_c); + } + + assert(!val_used[c]); + + remap_new_to_old[c] = i; + + remap_old_to_new[i] = c; + prev_c = c; + + //printf("%u ", c); + + val_used[c] = true; + } + } // i + //printf("\n"); + + dst_img.resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + for (uint32_t c = 0; c < 3; c++) + { + uint16_t& v16 = half_img[(x + y * width) * 3 + c]; + + uint32_t hb = v16 >> 8; + //uint32_t lb = v16 & 0xFF; + + assert(remap_old_to_new[hb] != -1); + assert(remap_old_to_new[hb] <= 255); + assert(remap_new_to_old[remap_old_to_new[hb]] == (int)hb); + + hb = remap_old_to_new[hb]; + + //v16 = (uint16_t)((hb << 8) | lb); + + dst_img(x, y)[c] = (uint8_t)hb; + } + } // x + } // y + + return true; + } + } // namespace basisu diff --git a/thirdparty/basis_universal/encoder/basisu_enc.h b/thirdparty/basis_universal/encoder/basisu_enc.h index 0efeaa461f..780605e7b8 100644 --- a/thirdparty/basis_universal/encoder/basisu_enc.h +++ b/thirdparty/basis_universal/encoder/basisu_enc.h @@ -1,5 +1,5 @@ // basisu_enc.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,7 +48,8 @@ namespace basisu // Encoder library initialization. // This function MUST be called before encoding anything! - void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); + // Returns false if library initialization fails. + bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); void basisu_encoder_deinit(); // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 @@ -70,6 +71,18 @@ namespace basisu return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } + inline int left_shift32(int val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return static_cast<int>(static_cast<uint32_t>(val) << shift); + } + + inline uint32_t left_shift32(uint32_t val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return val << shift; + } + inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) @@ -130,6 +143,31 @@ namespace basisu return bits; } + + // Open interval + inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + + // Closed interval + inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + + inline uint32_t clz(uint32_t x) + { + if (!x) + return 32; + + uint32_t n = 0; + while ((x & 0x80000000) == 0) + { + x <<= 1u; + n++; + } + + return n; + } + + bool string_begins_with(const std::string& str, const char* pPhrase); // Hashing @@ -268,6 +306,7 @@ namespace basisu public: enum { num_elements = N }; + typedef T scalar_type; inline vec() { } inline vec(eZero) { set_zero(); } @@ -291,6 +330,7 @@ namespace basisu inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } + inline void clear() { set_zero(); } template <uint32_t OtherN, typename OtherT> inline vec &set(const vec<OtherN, OtherT> &other) @@ -391,7 +431,7 @@ namespace basisu inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); } inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } - inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } + inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } inline vec &clamp(T l, T h) { @@ -722,7 +762,7 @@ namespace basisu void job_thread(uint32_t index); }; - // Simple 32-bit color class + // Simple 64-bit color class class color_rgba_i16 { @@ -1116,7 +1156,9 @@ namespace basisu { std::string result(s); for (size_t i = 0; i < result.size(); i++) - result[i] = (char)tolower((int)result[i]); + { + result[i] = (char)tolower((uint8_t)(result[i])); + } return result; } @@ -1408,7 +1450,7 @@ namespace basisu size_t get_total_training_vecs() const { return m_training_vecs.size(); } const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } - array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } + array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const { @@ -1437,36 +1479,36 @@ namespace basisu } void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const - { + { uint_vec node_stack; - node_stack.reserve(512); + node_stack.reserve(512); - codebook.resize(0); - codebook.reserve(max_clusters); + codebook.resize(0); + codebook.reserve(max_clusters); - uint32_t node_index = 0; + uint32_t node_index = 0; - while (true) - { - const tsvq_node& cur = m_nodes[node_index]; + while (true) + { + const tsvq_node& cur = m_nodes[node_index]; - if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) - { - codebook.resize(codebook.size() + 1); - codebook.back() = cur.m_training_vecs; + if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) + { + codebook.resize(codebook.size() + 1); + codebook.back() = cur.m_training_vecs; - if (node_stack.empty()) - break; + if (node_stack.empty()) + break; - node_index = node_stack.back(); - node_stack.pop_back(); - continue; - } + node_index = node_stack.back(); + node_stack.pop_back(); + continue; + } - node_stack.push_back(cur.m_right_index); - node_index = cur.m_left_index; - } - } + node_stack.push_back(cur.m_right_index); + node_index = cur.m_left_index; + } + } bool generate(uint32_t max_size) { @@ -2319,6 +2361,14 @@ namespace basisu m_total_bits = 0; } + inline void restart() + { + m_bytes.resize(0); + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + inline const uint8_vec &get_bytes() const { return m_bytes; } inline uint64_t get_total_bits() const { return m_total_bits; } @@ -2920,11 +2970,11 @@ namespace basisu inline const color_rgba *get_ptr() const { return &m_pixels[0]; } inline color_rgba *get_ptr() { return &m_pixels[0]; } - bool has_alpha() const + bool has_alpha(uint32_t channel = 3) const { for (uint32_t y = 0; y < m_height; ++y) for (uint32_t x = 0; x < m_width; ++x) - if ((*this)(x, y).a < 255) + if ((*this)(x, y)[channel] < 255) return true; return false; @@ -3130,6 +3180,31 @@ namespace basisu return *this; } + imagef& crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + } + return *this; + } + inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } @@ -3213,19 +3288,128 @@ namespace basisu inline const vec4F *get_ptr() const { return &m_pixels[0]; } inline vec4F *get_ptr() { return &m_pixels[0]; } + + bool clean_astc_hdr_pixels(float highest_mag) + { + bool status = true; + bool nan_msg = false; + bool inf_msg = false; + bool neg_zero_msg = false; + bool neg_msg = false; + bool clamp_msg = false; + + for (uint32_t iy = 0; iy < m_height; iy++) + { + for (uint32_t ix = 0; ix < m_width; ix++) + { + vec4F& c = (*this)(ix, iy); + + for (uint32_t s = 0; s < 4; s++) + { + float &p = c[s]; + union { float f; uint32_t u; } x; x.f = p; + + if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000)) + { + if (std::isnan(p)) + { + if (!nan_msg) + { + fprintf(stderr, "One or more pixels was NaN, setting to 0.\n"); + nan_msg = true; + } + } + + if (std::isinf(p)) + { + if (!inf_msg) + { + fprintf(stderr, "One or more pixels was INF, setting to 0.\n"); + inf_msg = true; + } + } + + if (x.u == 0x80000000) + { + if (!neg_zero_msg) + { + fprintf(stderr, "One or more pixels was -0, setting them to 0.\n"); + neg_zero_msg = true; + } + } + + p = 0.0f; + status = false; + } + else + { + //const float o = p; + if (p < 0.0f) + { + p = 0.0f; + + if (!neg_msg) + { + fprintf(stderr, "One or more pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n"); + neg_msg = true; + } + + status = false; + } + + if (p > highest_mag) + { + p = highest_mag; + + if (!clamp_msg) + { + fprintf(stderr, "One or more pixels had to be clamped to %f.\n", highest_mag); + clamp_msg = true; + } + + status = false; + } + } + } + } + } + + return status; + } + + imagef& flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + + return *this; + } private: uint32_t m_width, m_height, m_pitch; // all in pixels vec4F_vec m_pixels; }; + // REC 709 coefficients + const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f; + + inline float get_luminance(const vec4F &c) + { + return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B; + } + + float linear_to_srgb(float l); + float srgb_to_linear(float s); + // Image metrics class image_metrics { public: // TODO: Add ssim - float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; + double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; + bool m_has_neg, m_hf_mag_overflow, m_any_abnormal; image_metrics() { @@ -3240,10 +3424,17 @@ namespace basisu m_rms = 0; m_psnr = 0; m_ssim = 0; + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; } - void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); } + void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); } + void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); } + void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false); + void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); + void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false); }; @@ -3256,6 +3447,8 @@ namespace basisu bool load_tga(const char* pFilename, image& img); inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } + bool load_qoi(const char* pFilename, image& img); + bool load_jpg(const char *pFilename, image& img); inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } @@ -3263,9 +3456,64 @@ namespace basisu bool load_image(const char* pFilename, image& img); inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } + // Supports .HDR and most (but not all) .EXR's (see TinyEXR). + bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true); + inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true) { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear); } + + enum class hdr_image_type + { + cHITRGBAHalfFloat = 0, + cHITRGBAFloat = 1, + cHITPNGImage = 2, + cHITEXRImage = 3, + cHITHDRImage = 4 + }; + + bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear); + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); + struct rgbe_header_info + { + std::string m_program; + + // Note no validation is done, either gamma or exposure may be 0. + double m_gamma; + bool m_has_gamma; + + double m_exposure; // watts/steradian/m^2. + bool m_has_exposure; + + void clear() + { + m_program.clear(); + m_gamma = 1.0f; + m_has_gamma = false; + m_exposure = 1.0f; + m_has_exposure = false; + } + }; + + bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info); + bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info); + + bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info); + bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info); + + bool read_exr(const char* pFilename, imagef& img, int& n_chans); + bool read_exr(const void* pMem, size_t mem_size, imagef& img); + + enum + { + WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic + WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats + WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block) + }; + + // Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images. + bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags); + enum { cImageSaveGrayscale = 1, @@ -3276,19 +3524,22 @@ namespace basisu inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } bool read_file_to_vec(const char* pFilename, uint8_vec& data); - + bool read_file_to_data(const char* pFilename, void *pData, size_t len); + bool write_data_to_file(const char* pFilename, const void* pData, size_t len); inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } - - float linear_to_srgb(float l); - float srgb_to_linear(float s); - + bool image_resample(const image &src, image &dst, bool srgb = false, const char *pFilter = "lanczos4", float filter_scale = 1.0f, bool wrapping = false, uint32_t first_comp = 0, uint32_t num_comps = 4); + bool image_resample(const imagef& src, imagef& dst, + const char* pFilter = "lanczos4", float filter_scale = 1.0f, + bool wrapping = false, + uint32_t first_comp = 0, uint32_t num_comps = 4); + // Timing typedef uint64_t timer_ticks; @@ -3319,6 +3570,8 @@ namespace basisu bool m_started, m_stopped; }; + inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); } + // 2D array template<typename T> @@ -3372,8 +3625,8 @@ namespace basisu inline const T &operator[] (uint32_t i) const { return m_values[i]; } inline T &operator[] (uint32_t i) { return m_values[i]; } - inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } - inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } + inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } + inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } void clear() { @@ -3450,7 +3703,327 @@ namespace basisu } }; typedef basisu::vector<pixel_block> pixel_block_vec; - + + struct pixel_block_hdr + { + vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] + + inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + + inline const vec4F* get_ptr() const { return &m_pixels[0][0]; } + inline vec4F* get_ptr() { return &m_pixels[0][0]; } + + inline void clear() { clear_obj(*this); } + + inline bool operator== (const pixel_block& rhs) const + { + return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; + } + }; + typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec; + + void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure); + bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img); + + // Intersection + enum eClear { cClear = 0 }; + enum eInitExpand { cInitExpand = 0 }; + + template<typename vector_type> + class ray + { + public: + typedef vector_type vector_t; + typedef typename vector_type::scalar_type scalar_type; + + inline ray() { } + inline ray(eClear) { clear(); } + inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { } + + inline void clear() + { + m_origin.clear(); + m_direction.clear(); + } + + inline const vector_type& get_origin(void) const { return m_origin; } + inline void set_origin(const vector_type& origin) { m_origin = origin; } + + inline const vector_type& get_direction(void) const { return m_direction; } + inline void set_direction(const vector_type& direction) { m_direction = direction; } + + inline void set_endpoints(const vector_type& start, const vector_type& end) + { + m_origin = start; + + m_direction = end - start; + m_direction.normalize_in_place(); + } + + inline vector_type eval(scalar_type t) const + { + return m_origin + m_direction * t; + } + + private: + vector_type m_origin; + vector_type m_direction; + }; + + typedef ray<vec2F> ray2F; + typedef ray<vec3F> ray3F; + + template<typename T> + class vec_interval + { + public: + enum { N = T::num_elements }; + typedef typename T::scalar_type scalar_type; + + inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; } + inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; } + + inline vec_interval() { } + inline vec_interval(eClear) { clear(); } + inline vec_interval(eInitExpand) { init_expand(); } + + inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); } + + inline void init_expand() + { + m_bounds[0].set(1e+30f, 1e+30f, 1e+30f); + m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f); + } + + inline vec_interval expand(const T& p) + { + for (uint32_t c = 0; c < N; c++) + { + if (p[c] < m_bounds[0][c]) + m_bounds[0][c] = p[c]; + + if (p[c] > m_bounds[1][c]) + m_bounds[1][c] = p[c]; + } + + return *this; + } + + inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; } + inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; } + + const T& get_low() const { return m_bounds[0]; } + T& get_low() { return m_bounds[0]; } + + const T& get_high() const { return m_bounds[1]; } + T& get_high() { return m_bounds[1]; } + + scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; } + + bool contains(const T& p) const + { + const T& low = get_low(), high = get_high(); + + for (uint32_t i = 0; i < N; i++) + { + if (p[i] < low[i]) + return false; + + if (p[i] > high[i]) + return false; + } + return true; + } + + private: + T m_bounds[2]; + }; + + typedef vec_interval<vec1F> vec_interval1F; + typedef vec_interval<vec2F> vec_interval2F; + typedef vec_interval<vec3F> vec_interval3F; + typedef vec_interval<vec4F> vec_interval4F; + + typedef vec_interval2F aabb2F; + typedef vec_interval3F aabb3F; + + namespace intersection + { + enum result + { + cBackfacing = -1, + cFailure = 0, + cSuccess, + cParallel, + cInside, + }; + + // Returns cInside, cSuccess, or cFailure. + // Algorithm: Graphics Gems 1 + template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type> + result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + enum + { + cNumDim = vector_type::num_elements, + cRight = 0, + cLeft = 1, + cMiddle = 2 + }; + + bool inside = true; + int quadrant[cNumDim]; + scalar_type candidate_plane[cNumDim]; + + for (int i = 0; i < cNumDim; i++) + { + if (ray.get_origin()[i] < box[0][i]) + { + quadrant[i] = cLeft; + candidate_plane[i] = box[0][i]; + inside = false; + } + else if (ray.get_origin()[i] > box[1][i]) + { + quadrant[i] = cRight; + candidate_plane[i] = box[1][i]; + inside = false; + } + else + { + quadrant[i] = cMiddle; + } + } + + if (inside) + { + coord = ray.get_origin(); + t = 0.0f; + return cInside; + } + + scalar_type max_t[cNumDim]; + for (int i = 0; i < cNumDim; i++) + { + if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) + max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; + else + max_t[i] = -1.0f; + } + + int which_plane = 0; + for (int i = 1; i < cNumDim; i++) + if (max_t[which_plane] < max_t[i]) + which_plane = i; + + if (max_t[which_plane] < 0.0f) + return cFailure; + + for (int i = 0; i < cNumDim; i++) + { + if (i != which_plane) + { + coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; + + if ((coord[i] < box[0][i]) || (coord[i] > box[1][i])) + return cFailure; + } + else + { + coord[i] = candidate_plane[i]; + } + + assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]); + } + + t = max_t[which_plane]; + return cSuccess; + } + + template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type> + result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + if (!box.contains(ray.get_origin())) + { + started_within = false; + return ray_aabb(coord, t, ray, box); + } + + started_within = true; + + typename vector_type::T diag_dist = box.diagonal_length() * 1.5f; + ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); + + result res(ray_aabb(coord, t, outside_ray, box)); + if (res != cSuccess) + return res; + + t = basisu::maximum(0.0f, diag_dist - t); + return cSuccess; + } + + } // intersect + + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + // Input cannot be negative, Inf or Nan. + inline basist::half_float float_to_half_non_neg_no_nan_inf(float val) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF; + int e = 0, m = 0; + + assert(((fi.i >> 31) == 0) && (flt_e != 0xFF)); + + // not zero or denormal + if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + m = lrintf((1 << 24) * fabsf(fi.f)); + else + { + e = new_exp + 15; + m = lrintf(flt_m * (1.0f / ((float)(1 << 13)))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + basist::half_float result = (basist::half_float)((e << 10) | m); + return result; + } + + // Supports positive and denormals only. No NaN or Inf. + inline float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h) + { + assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h)); + + union fu32 + { + uint32_t u; + float f; + }; + + static const fu32 K = { 0x77800000 }; + + fu32 o; + o.u = h << 13; + o.f *= K.f; + + return o.f; + } + } // namespace basisu diff --git a/thirdparty/basis_universal/encoder/basisu_etc.cpp b/thirdparty/basis_universal/encoder/basisu_etc.cpp index f8bd0f12e5..ba1c14231d 100644 --- a/thirdparty/basis_universal/encoder/basisu_etc.cpp +++ b/thirdparty/basis_universal/encoder/basisu_etc.cpp @@ -1,5 +1,5 @@ // basis_etc.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_etc.h b/thirdparty/basis_universal/encoder/basisu_etc.h index 208f2aac1b..5c44bd4812 100644 --- a/thirdparty/basis_universal/encoder/basisu_etc.h +++ b/thirdparty/basis_universal/encoder/basisu_etc.h @@ -1,5 +1,5 @@ // basis_etc.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_frontend.cpp b/thirdparty/basis_universal/encoder/basisu_frontend.cpp index 1f30a33c70..750f706aa5 100644 --- a/thirdparty/basis_universal/encoder/basisu_frontend.cpp +++ b/thirdparty/basis_universal/encoder/basisu_frontend.cpp @@ -1,5 +1,5 @@ // basisu_frontend.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -2347,6 +2347,7 @@ namespace basisu continue; uint64_t overall_best_err = 0; + (void)overall_best_err; uint64_t total_err[4][4][4]; clear_obj(total_err); diff --git a/thirdparty/basis_universal/encoder/basisu_frontend.h b/thirdparty/basis_universal/encoder/basisu_frontend.h index cda73f3984..69fc8d8ec5 100644 --- a/thirdparty/basis_universal/encoder/basisu_frontend.h +++ b/thirdparty/basis_universal/encoder/basisu_frontend.h @@ -1,5 +1,5 @@ // basisu_frontend.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp index dec769d5ac..342446b8fd 100644 --- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp +++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp @@ -1,5 +1,5 @@ // basisu_gpu_texture.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,13 +15,15 @@ #include "basisu_gpu_texture.h" #include "basisu_enc.h" #include "basisu_pvrtc1_4.h" -#if BASISU_USE_ASTC_DECOMPRESS -#include "basisu_astc_decomp.h" -#endif +#include "3rdparty/android_astc_decomp.h" #include "basisu_bc7enc.h" +#include "../transcoder/basisu_astc_hdr_core.h" namespace basisu { + //------------------------------------------------------------------------------------------------ + // ETC2 EAC + void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels) { static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8"); @@ -56,6 +58,8 @@ namespace basisu pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul); } + //------------------------------------------------------------------------------------------------ + // BC1 struct bc1_block { enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; @@ -274,6 +278,9 @@ namespace basisu return used_punchthrough; } + //------------------------------------------------------------------------------------------------ + // BC3-5 + struct bc4_block { enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; @@ -372,7 +379,8 @@ namespace basisu unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba)); unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba)); } - + + //------------------------------------------------------------------------------------------------ // ATC isn't officially documented, so I'm assuming these references: // http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf // https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c @@ -426,6 +434,7 @@ namespace basisu } } + //------------------------------------------------------------------------------------------------ // BC7 mode 0-7 decompression. // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. @@ -742,6 +751,255 @@ namespace basisu return false; } + static inline int bc6h_sign_extend(int val, int bits) + { + assert((bits >= 1) && (bits < 32)); + assert((val >= 0) && (val < (1 << bits))); + return (val << (32 - bits)) >> (32 - bits); + } + + static inline int bc6h_apply_delta(int base, int delta, int num_bits, int is_signed) + { + int bitmask = ((1 << num_bits) - 1); + int v = (base + delta) & bitmask; + return is_signed ? bc6h_sign_extend(v, num_bits) : v; + } + + static int bc6h_dequantize(int val, int bits, int is_signed) + { + int result; + if (is_signed) + { + if (bits >= 16) + result = val; + else + { + int s_flag = 0; + if (val < 0) + { + s_flag = 1; + val = -val; + } + + if (val == 0) + result = 0; + else if (val >= ((1 << (bits - 1)) - 1)) + result = 0x7FFF; + else + result = ((val << 15) + 0x4000) >> (bits - 1); + + if (s_flag) + result = -result; + } + } + else + { + if (bits >= 15) + result = val; + else if (!val) + result = 0; + else if (val == ((1 << bits) - 1)) + result = 0xFFFF; + else + result = ((val << 16) + 0x8000) >> bits; + } + return result; + } + + static inline int bc6h_interpolate(int a, int b, const uint8_t* pWeights, int index) + { + return (a * (64 - (int)pWeights[index]) + b * (int)pWeights[index] + 32) >> 6; + } + + static inline basist::half_float bc6h_convert_to_half(int val, int is_signed) + { + if (!is_signed) + { + // scale by 31/64 + return (basist::half_float)((val * 31) >> 6); + } + + // scale by 31/32 + val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5; + + int s = 0; + if (val < 0) + { + s = 0x8000; + val = -val; + } + + return (basist::half_float)(s | val); + } + + static inline uint32_t bc6h_get_bits(uint32_t num_bits, uint64_t& l, uint64_t& h, uint32_t& total_bits) + { + assert((num_bits) && (num_bits <= 63)); + + uint32_t v = (uint32_t)(l & ((1U << num_bits) - 1U)); + + l >>= num_bits; + l |= (h << (64U - num_bits)); + h >>= num_bits; + + total_bits += num_bits; + assert(total_bits <= 128); + + return v; + } + + static inline uint32_t bc6h_reverse_bits(uint32_t v, uint32_t num_bits) + { + uint32_t res = 0; + for (uint32_t i = 0; i < num_bits; i++) + { + uint32_t bit = (v & (1u << i)) != 0u; + res |= (bit << (num_bits - 1u - i)); + } + return res; + } + + static inline uint64_t bc6h_read_le_qword(const void* p) + { + const uint8_t* pSrc = static_cast<const uint8_t*>(p); + return ((uint64_t)read_le_dword(pSrc)) | (((uint64_t)read_le_dword(pSrc + sizeof(uint32_t))) << 32U); + } + + bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs) + { + assert(dest_pitch_in_halfs >= 4 * 3); + + const uint32_t MAX_SUBSETS = 2, MAX_COMPS = 3; + + const uint8_t* pSrc = static_cast<const uint8_t*>(pSrc_block); + basist::half_float* pDst = static_cast<basist::half_float*>(pDst_block); + + uint64_t blo = bc6h_read_le_qword(pSrc), bhi = bc6h_read_le_qword(pSrc + sizeof(uint64_t)); + + // Unpack mode + const int mode = basist::g_bc6h_mode_lookup[blo & 31]; + if (mode < 0) + { + for (int y = 0; y < 4; y++) + { + memset(pDst, 0, sizeof(basist::half_float) * 4); + pDst += dest_pitch_in_halfs; + } + return false; + } + + // Skip mode bits + uint32_t total_bits_read = 0; + bc6h_get_bits((mode < 2) ? 2 : 5, blo, bhi, total_bits_read); + + assert(mode < (int)basist::NUM_BC6H_MODES); + + const uint32_t num_subsets = (mode >= 10) ? 1 : 2; + const bool is_mode_9_or_10 = (mode == 9) || (mode == 10); + + // Unpack endpoint components + int comps[MAX_SUBSETS][MAX_COMPS][2] = { { { 0 } } }; // [subset][comp][l/h] + int part_index = 0; + + uint32_t layout_index = 0; + while (layout_index < basist::MAX_BC6H_LAYOUT_INDEX) + { + const basist::bc6h_bit_layout& layout = basist::g_bc6h_bit_layouts[mode][layout_index]; + + if (layout.m_comp < 0) + break; + + const int subset = layout.m_index >> 1, lh_index = layout.m_index & 1; + assert((layout.m_comp == 3) || ((subset >= 0) && (subset < (int)MAX_SUBSETS))); + + const int last_bit = layout.m_last_bit, first_bit = layout.m_first_bit; + assert(last_bit >= 0); + + int& res = (layout.m_comp == 3) ? part_index : comps[subset][layout.m_comp][lh_index]; + + if (first_bit < 0) + { + res |= (bc6h_get_bits(1, blo, bhi, total_bits_read) << last_bit); + } + else + { + const int total_bits = iabs(last_bit - first_bit) + 1; + const int bit_shift = basisu::minimum(first_bit, last_bit); + + int b = bc6h_get_bits(total_bits, blo, bhi, total_bits_read); + + if (last_bit < first_bit) + b = bc6h_reverse_bits(b, total_bits); + + res |= (b << bit_shift); + } + + layout_index++; + } + assert(layout_index != basist::MAX_BC6H_LAYOUT_INDEX); + + // Sign extend/dequantize endpoints + const int num_sig_bits = basist::g_bc6h_mode_sig_bits[mode][0]; + if (is_signed) + { + for (uint32_t comp = 0; comp < 3; comp++) + comps[0][comp][0] = bc6h_sign_extend(comps[0][comp][0], num_sig_bits); + } + + if (is_signed || !is_mode_9_or_10) + { + for (uint32_t subset = 0; subset < num_subsets; subset++) + for (uint32_t comp = 0; comp < 3; comp++) + for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++) + comps[subset][comp][lh] = bc6h_sign_extend(comps[subset][comp][lh], basist::g_bc6h_mode_sig_bits[mode][1 + comp]); + } + + if (!is_mode_9_or_10) + { + for (uint32_t subset = 0; subset < num_subsets; subset++) + for (uint32_t comp = 0; comp < 3; comp++) + for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++) + comps[subset][comp][lh] = bc6h_apply_delta(comps[0][comp][0], comps[subset][comp][lh], num_sig_bits, is_signed); + } + + for (uint32_t subset = 0; subset < num_subsets; subset++) + for (uint32_t comp = 0; comp < 3; comp++) + for (uint32_t lh = 0; lh < 2; lh++) + comps[subset][comp][lh] = bc6h_dequantize(comps[subset][comp][lh], num_sig_bits, is_signed); + + // Now unpack weights and output texels + const int weight_bits = (mode >= 10) ? 4 : 3; + const uint8_t* pWeights = (mode >= 10) ? basist::g_bc6h_weight4 : basist::g_bc6h_weight3; + + dest_pitch_in_halfs -= 4 * 3; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int subset = (num_subsets == 1) ? ((x | y) ? 0 : 0x80) : basist::g_bc6h_2subset_patterns[part_index][y][x]; + const int num_bits = weight_bits + ((subset & 0x80) ? -1 : 0); + + subset &= 1; + + const int weight_index = bc6h_get_bits(num_bits, blo, bhi, total_bits_read); + + pDst[0] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][0][0], comps[subset][0][1], pWeights, weight_index), is_signed); + pDst[1] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][1][0], comps[subset][1][1], pWeights, weight_index), is_signed); + pDst[2] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][2][0], comps[subset][2][1], pWeights, weight_index), is_signed); + + pDst += 3; + } + + pDst += dest_pitch_in_halfs; + } + + assert(total_bits_read == 128); + return true; + } + //------------------------------------------------------------------------------------------------ + // FXT1 (for fun, and because some modern Intel parts support it, and because a subset is like BC1) + struct fxt1_block { union @@ -901,6 +1159,9 @@ namespace basisu return true; } + //------------------------------------------------------------------------------------------------ + // PVRTC2 (non-interpolated, hard_flag=1 modulation=0 subset only!) + struct pvrtc2_block { uint8_t m_modulation[4]; @@ -1015,6 +1276,9 @@ namespace basisu return true; } + //------------------------------------------------------------------------------------------------ + // ETC2 EAC R11 or RG11 + struct etc2_eac_r11 { uint64_t m_base : 8; @@ -1085,13 +1349,16 @@ namespace basisu unpack_etc2_eac_r(pBlock, pPixels, c); } } - + + //------------------------------------------------------------------------------------------------ + // UASTC + void unpack_uastc(const void* p, color_rgba* pPixels) { basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false); } - - // Unpacks to RGBA, R, RG, or A + + // Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only. bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) { switch (fmt) @@ -1150,14 +1417,24 @@ namespace basisu unpack_etc2_eac(pBlock, pPixels); break; } - case texture_format::cASTC4x4: + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: + { + // Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data. + assert(0); + return false; + } + case texture_format::cASTC_LDR_4x4: { -#if BASISU_USE_ASTC_DECOMPRESS const bool astc_srgb = false; - basisu_astc::astc::decompress(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4); -#else - memset(pPixels, 255, 16 * sizeof(color_rgba)); -#endif + bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4); + assert(status); + + if (!status) + return false; + break; } case texture_format::cATC_RGB: @@ -1206,6 +1483,66 @@ namespace basisu return true; } + bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels) + { + switch (fmt) + { + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: + { +#if 1 + bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4); + assert(status); + if (!status) + return false; +#else + basist::half_float half_block[16][4]; + + astc_helpers::log_astc_block log_blk; + if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4)) + return false; + if (!astc_helpers::decode_block(log_blk, half_block, 4, 4, astc_helpers::cDecodeModeHDR16)) + return false; + + for (uint32_t p = 0; p < 16; p++) + { + pPixels[p][0] = basist::half_to_float(half_block[p][0]); + pPixels[p][1] = basist::half_to_float(half_block[p][1]); + pPixels[p][2] = basist::half_to_float(half_block[p][2]); + pPixels[p][3] = basist::half_to_float(half_block[p][3]); + } + + //memset(pPixels, 0, sizeof(vec4F) * 16); +#endif + return true; + } + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + { + basist::half_float half_block[16][3]; + + unpack_bc6h(pBlock, half_block, fmt == texture_format::cBC6HSigned); + + for (uint32_t p = 0; p < 16; p++) + { + pPixels[p][0] = basist::half_to_float(half_block[p][0]); + pPixels[p][1] = basist::half_to_float(half_block[p][1]); + pPixels[p][2] = basist::half_to_float(half_block[p][2]); + pPixels[p][3] = 1.0f; + } + + return true; + } + default: + { + break; + } + } + + assert(0); + return false; + } + bool gpu_image::unpack(image& img) const { img.resize(get_pixel_width(), get_pixel_height()); @@ -1252,7 +1589,48 @@ namespace basisu return success; } + + bool gpu_image::unpack_hdr(imagef& img) const + { + if ((m_fmt != texture_format::cASTC_HDR_4x4) && + (m_fmt != texture_format::cUASTC_HDR_4x4) && + (m_fmt != texture_format::cBC6HUnsigned) && + (m_fmt != texture_format::cBC6HSigned)) + { + // Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.) + assert(0); + return false; + } + + img.resize(get_pixel_width(), get_pixel_height()); + img.set_all(vec4F(0.0f)); + + if (!img.get_width() || !img.get_height()) + return true; + + assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize)); + vec4F pixels[cMaxBlockSize * cMaxBlockSize]; + clear_obj(pixels); + + bool success = true; + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + const void* pBlock = get_block_ptr(bx, by); + + if (!unpack_block_hdr(m_fmt, pBlock, pixels)) + success = false; + + img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height); + } // bx + } // by + + return success; + } + // KTX1 texture file writing static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; // KTX/GL enums @@ -1273,6 +1651,8 @@ namespace basisu KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278, KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C, KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D, + KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E, + KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F, KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00, KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02, KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0, @@ -1319,6 +1699,7 @@ namespace basisu uint32_t width = 0, height = 0, total_levels = 0; basisu::texture_format fmt = texture_format::cInvalidTextureFormat; + // Sanity check the input if (cubemap_flag) { if ((gpu_images.size() % 6) != 0) @@ -1327,7 +1708,7 @@ namespace basisu return false; } } - + for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) { const gpu_image_vec &levels = gpu_images[array_index]; @@ -1426,6 +1807,18 @@ namespace basisu base_internal_fmt = KTX_RGBA; break; } + case texture_format::cBC6HSigned: + { + internal_fmt = KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cBC6HUnsigned: + { + internal_fmt = KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; + base_internal_fmt = KTX_RGBA; + break; + } case texture_format::cBC7: { internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM; @@ -1443,7 +1836,10 @@ namespace basisu base_internal_fmt = KTX_RGBA; break; } - case texture_format::cASTC4x4: + // We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC. + case texture_format::cASTC_LDR_4x4: + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC { internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR; base_internal_fmt = KTX_RGBA; @@ -1496,17 +1892,17 @@ namespace basisu return false; } } - + ktx_header header; header.clear(); memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id)); header.m_endianness = KTX_ENDIAN; - + header.m_pixelWidth = width; header.m_pixelHeight = height; - + header.m_glTypeSize = 1; - + header.m_glInternalFormat = internal_fmt; header.m_glBaseInternalFormat = base_internal_fmt; @@ -1517,12 +1913,12 @@ namespace basisu header.m_numberOfMipmapLevels = total_levels; header.m_numberOfFaces = cubemap_flag ? 6 : 1; - append_vector(ktx_data, (uint8_t *)&header, sizeof(header)); + append_vector(ktx_data, (uint8_t*)&header, sizeof(header)); for (uint32_t level_index = 0; level_index < total_levels; level_index++) { uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes(); - + if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1)) { img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements); @@ -1531,9 +1927,10 @@ namespace basisu assert(img_size && ((img_size & 3) == 0)); packed_uint<4> packed_img_size(img_size); - append_vector(ktx_data, (uint8_t *)&packed_img_size, sizeof(packed_img_size)); + append_vector(ktx_data, (uint8_t*)&packed_img_size, sizeof(packed_img_size)); uint32_t bytes_written = 0; + (void)bytes_written; for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++) { @@ -1541,11 +1938,11 @@ namespace basisu { const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index]; - append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes()); - + append_vector(ktx_data, (uint8_t*)img.get_ptr(), img.get_size_in_bytes()); + bytes_written += img.get_size_in_bytes(); } - + } // array_index } // level_index @@ -1553,7 +1950,58 @@ namespace basisu return true; } - bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag) + bool does_dds_support_format(texture_format fmt) + { + switch (fmt) + { + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: + case texture_format::cBC1: + case texture_format::cBC3: + case texture_format::cBC4: + case texture_format::cBC5: + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + case texture_format::cBC7: + return true; + default: + break; + } + return false; + } + + // Only supports the basic DirectX BC texture formats. + // gpu_images array is: [face/layer][mipmap level] + // For cubemap arrays, # of face/layers must be a multiple of 6. + // Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped + // and cubemap, cubemap mipmapped, and cubemap array mipmapped. + bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format) + { + return false; + } + + bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format) + { + uint8_vec dds_data; + + if (!write_dds_file(dds_data, gpu_images, cubemap_flag, use_srgb_format)) + return false; + + if (!write_vec_to_file(pFilename, dds_data)) + { + fprintf(stderr, "write_dds_file: Failed writing DDS file data\n"); + return false; + } + + return true; + } + + bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips, basisu::vector<imagef>& hdr_mips) + { + return false; + } + + bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format) { std::string extension(string_tolower(string_get_extension(pFilename))); @@ -1570,8 +2018,8 @@ namespace basisu } else if (extension == "dds") { - // TODO - return false; + if (!write_dds_file(filedata, g, cubemap_flag, use_srgb_format)) + return false; } else { @@ -1583,11 +2031,18 @@ namespace basisu return basisu::write_vec_to_file(pFilename, filedata); } - bool write_compressed_texture_file(const char* pFilename, const gpu_image& g) + bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format) + { + basisu::vector<gpu_image_vec> a; + a.push_back(g); + return write_compressed_texture_file(pFilename, a, false, use_srgb_format); + } + + bool write_compressed_texture_file(const char* pFilename, const gpu_image& g, bool use_srgb_format) { basisu::vector<gpu_image_vec> v; enlarge_vector(v, 1)->push_back(g); - return write_compressed_texture_file(pFilename, v, false); + return write_compressed_texture_file(pFilename, v, false, use_srgb_format); } //const uint32_t OUT_FILE_MAGIC = 'TEXC'; @@ -1626,5 +2081,49 @@ namespace basisu return fclose(pFile) != EOF; } + + // The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting + // its usefulness/relevance. + // https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md + bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y) + { + assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0)); + + uint8_vec file_data; + file_data.push_back(0x13); + file_data.push_back(0xAB); + file_data.push_back(0xA1); + file_data.push_back(0x5C); + + file_data.push_back((uint8_t)block_width); + file_data.push_back((uint8_t)block_height); + file_data.push_back(1); + + file_data.push_back((uint8_t)dim_x); + file_data.push_back((uint8_t)(dim_x >> 8)); + file_data.push_back((uint8_t)(dim_x >> 16)); + + file_data.push_back((uint8_t)dim_y); + file_data.push_back((uint8_t)(dim_y >> 8)); + file_data.push_back((uint8_t)(dim_y >> 16)); + + file_data.push_back((uint8_t)1); + file_data.push_back((uint8_t)0); + file_data.push_back((uint8_t)0); + + const uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width; + const uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height; + + const uint32_t total_bytes = num_blocks_x * num_blocks_y * 16; + + const size_t cur_size = file_data.size(); + + file_data.resize(cur_size + total_bytes); + + memcpy(&file_data[cur_size], pBlocks, total_bytes); + + return write_vec_to_file(pFilename, file_data); + } + } // basisu diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.h b/thirdparty/basis_universal/encoder/basisu_gpu_texture.h index 619926f5f9..67c2a2bc5e 100644 --- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.h +++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.h @@ -1,5 +1,5 @@ // basisu_gpu_texture.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -48,6 +48,7 @@ namespace basisu } inline texture_format get_format() const { return m_fmt; } + inline bool is_hdr() const { return is_hdr_texture_format(m_fmt); } // Width/height in pixels inline uint32_t get_pixel_width() const { return m_width; } @@ -100,9 +101,13 @@ namespace basisu m_blocks.resize(m_blocks_x * m_blocks_y * m_qwords_per_block); } + // Unpacks LDR textures only. bool unpack(image& img) const; + + // Unpacks HDR textures only. + bool unpack_hdr(imagef& img) const; - void override_dimensions(uint32_t w, uint32_t h) + inline void override_dimensions(uint32_t w, uint32_t h) { m_width = w; m_height = h; @@ -116,39 +121,50 @@ namespace basisu typedef basisu::vector<gpu_image> gpu_image_vec; - // KTX file writing - + // KTX1 file writing bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag); - - bool write_compressed_texture_file(const char *pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag); - inline bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec &g) - { - basisu::vector<gpu_image_vec> a; - a.push_back(g); - return write_compressed_texture_file(pFilename, a, false); - } + bool does_dds_support_format(texture_format fmt); + bool write_dds_file(uint8_vec& dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format); + bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format); + + // Currently reads 2D 32bpp RGBA, 16-bit HALF RGBA, or 32-bit FLOAT RGBA, with or without mipmaps. No tex arrays or cubemaps, yet. + bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image>& ldr_mips, basisu::vector<imagef>& hdr_mips); - bool write_compressed_texture_file(const char *pFilename, const gpu_image &g); + // Supports DDS and KTX + bool write_compressed_texture_file(const char *pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format); + bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format); + bool write_compressed_texture_file(const char *pFilename, const gpu_image &g, bool use_srgb_format); bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); // GPU texture block unpacking + // For ETC1, use in basisu_etc.h: bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels); bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha); void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride); bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels); void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels); bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels); - bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); + bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); // full format + bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs = 4 * 3); // full format, outputs HALF values, RGB texels only (not RGBA) void unpack_atc(const void* pBlock_bits, color_rgba* pPixels); + // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment. bool unpack_fxt1(const void* p, color_rgba* pPixels); + // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC. bool unpack_pvrtc2(const void* p, color_rgba* pPixels); void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c); void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels); - + // unpack_block() is primarily intended to unpack texture data created by the transcoder. - // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not a complete implementation. + // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not yet a complete implementation. + // Unpacks LDR texture formats only. bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); - + + // Unpacks HDR texture formats only. + bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels); + + bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y); + } // namespace basisu + diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_declares.h b/thirdparty/basis_universal/encoder/basisu_kernels_declares.h index b03e2ea6e8..9b85a594ee 100644 --- a/thirdparty/basis_universal/encoder/basisu_kernels_declares.h +++ b/thirdparty/basis_universal/encoder/basisu_kernels_declares.h @@ -1,5 +1,5 @@ // basisu_kernels_declares.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_imp.h b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h index dcf1ce069a..123862b1dd 100644 --- a/thirdparty/basis_universal/encoder/basisu_kernels_imp.h +++ b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h @@ -1,5 +1,5 @@ // basisu_kernels_imp.h - Do not directly include -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp b/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp index 4f15a5a12b..36a493d7ed 100644 --- a/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp +++ b/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp @@ -1,5 +1,5 @@ // basisu_kernels_sse.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,22 +22,6 @@ #include <intrin.h> #endif -#if !defined(_MSC_VER) - #if __AVX__ || __AVX2__ || __AVX512F__ - #error Please check your compiler options - #endif - - #if CPPSPMD_SSE2 - #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__ - #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file - #endif - #else - #if !__SSE4_1__ || !__SSE3__ || !__SSSE3__ - #error Please check your compiler options - #endif - #endif -#endif - #include "cppspmd_sse.h" #include "cppspmd_type_aliases.h" diff --git a/thirdparty/basis_universal/encoder/basisu_miniz.h b/thirdparty/basis_universal/encoder/basisu_miniz.h index 18de997232..dab38f9f92 100644 --- a/thirdparty/basis_universal/encoder/basisu_miniz.h +++ b/thirdparty/basis_universal/encoder/basisu_miniz.h @@ -3,7 +3,7 @@ Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ - Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. + Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -1973,7 +1973,7 @@ static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahe (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); if (!probe_len) { - *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break; + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); break; } else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) { @@ -2101,7 +2101,7 @@ static mz_bool tdefl_compress_fast(tdefl_compressor *d) total_lz_bytes += cur_match_len; lookahead_pos += cur_match_len; - dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; MZ_ASSERT(lookahead_size >= cur_match_len); lookahead_size -= cur_match_len; @@ -2129,7 +2129,7 @@ static mz_bool tdefl_compress_fast(tdefl_compressor *d) d->m_huff_count[0][lit]++; lookahead_pos++; - dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; lookahead_size--; @@ -2283,7 +2283,7 @@ static mz_bool tdefl_compress_normal(tdefl_compressor *d) d->m_lookahead_pos += len_to_move; MZ_ASSERT(d->m_lookahead_size >= len_to_move); d->m_lookahead_size -= len_to_move; - d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE); + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); // Check if it's time to flush the current LZ codes to the internal output buffer. if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) diff --git a/thirdparty/basis_universal/encoder/basisu_opencl.cpp b/thirdparty/basis_universal/encoder/basisu_opencl.cpp index 81e3090a26..e0611c18ee 100644 --- a/thirdparty/basis_universal/encoder/basisu_opencl.cpp +++ b/thirdparty/basis_universal/encoder/basisu_opencl.cpp @@ -1,5 +1,5 @@ // basisu_opencl.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_opencl.h b/thirdparty/basis_universal/encoder/basisu_opencl.h index 4194a08418..2546a18dab 100644 --- a/thirdparty/basis_universal/encoder/basisu_opencl.h +++ b/thirdparty/basis_universal/encoder/basisu_opencl.h @@ -1,5 +1,5 @@ // basisu_opencl.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Note: Undefine or set BASISU_SUPPORT_OPENCL to 0 to completely OpenCL support. // diff --git a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp index 596fc197e6..4bf9516f90 100644 --- a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp +++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp @@ -1,5 +1,5 @@ // basisu_pvrtc1_4.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h index db6985a439..a9fe6b27aa 100644 --- a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h +++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h @@ -1,5 +1,5 @@ // basisu_pvrtc1_4.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -231,7 +231,18 @@ namespace basisu inline void set_to_black() { +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes()); +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif } inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const diff --git a/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp index 597cb3f618..46cd837376 100644 --- a/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp +++ b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp @@ -1,5 +1,5 @@ // basisu_resampler_filters.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_resampler.cpp b/thirdparty/basis_universal/encoder/basisu_resampler.cpp index f4cedf0031..a00c63335d 100644 --- a/thirdparty/basis_universal/encoder/basisu_resampler.cpp +++ b/thirdparty/basis_universal/encoder/basisu_resampler.cpp @@ -1,5 +1,5 @@ // basisu_resampler.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_resampler.h b/thirdparty/basis_universal/encoder/basisu_resampler.h index dc0978caeb..ac1ef73d7f 100644 --- a/thirdparty/basis_universal/encoder/basisu_resampler.h +++ b/thirdparty/basis_universal/encoder/basisu_resampler.h @@ -1,5 +1,5 @@ // basisu_resampler.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_resampler_filters.h b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h index 0ebb51c334..4d66ac2c70 100644 --- a/thirdparty/basis_universal/encoder/basisu_resampler_filters.h +++ b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h @@ -1,5 +1,5 @@ // basisu_resampler_filters.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_ssim.cpp b/thirdparty/basis_universal/encoder/basisu_ssim.cpp index cceb400b88..608ce937fc 100644 --- a/thirdparty/basis_universal/encoder/basisu_ssim.cpp +++ b/thirdparty/basis_universal/encoder/basisu_ssim.cpp @@ -1,5 +1,5 @@ // basisu_ssim.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_ssim.h b/thirdparty/basis_universal/encoder/basisu_ssim.h index 986ca3bbdf..51cd2d78fd 100644 --- a/thirdparty/basis_universal/encoder/basisu_ssim.h +++ b/thirdparty/basis_universal/encoder/basisu_ssim.h @@ -1,5 +1,5 @@ // basisu_ssim.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp index 271bbc6f1d..51f6e979d4 100644 --- a/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp +++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp @@ -1,5 +1,5 @@ // basisu_uastc_enc.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,11 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "basisu_uastc_enc.h" - -#if BASISU_USE_ASTC_DECOMPRESS -#include "basisu_astc_decomp.h" -#endif - +#include "3rdparty/android_astc_decomp.h" #include "basisu_gpu_texture.h" #include "basisu_bc7enc.h" @@ -384,6 +380,7 @@ namespace basisu } uint32_t total_endpoint_bits = 0; + (void)total_endpoint_bits; for (uint32_t i = 0; i < total_tq_values; i++) { @@ -428,6 +425,8 @@ namespace basisu #endif uint32_t total_weight_bits = 0; + (void)total_weight_bits; + const uint32_t plane_shift = (total_planes == 2) ? 1 : 0; for (uint32_t i = 0; i < 16 * total_planes; i++) { @@ -3175,6 +3174,7 @@ namespace basisu const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0); //const bool etc1_perceptual = true; + // TODO: This uses 64KB of stack space! uastc_encode_results results[MAX_ENCODE_RESULTS]; level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow); @@ -3567,7 +3567,6 @@ namespace basisu success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false); VALIDATE(success); -#if BASISU_USE_ASTC_DECOMPRESS // Now round trip to packed ASTC and back, then decode to pixels. uint32_t astc_data[4]; @@ -3580,7 +3579,7 @@ namespace basisu } color_rgba decoded_astc_block[4][4]; - success = basisu_astc::astc::decompress((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4); + success = basisu_astc::astc::decompress_ldr((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4); VALIDATE(success); for (uint32_t y = 0; y < 4; y++) @@ -3595,7 +3594,6 @@ namespace basisu VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a); } } -#endif } #endif @@ -3789,8 +3787,9 @@ namespace basisu { uint64_t m_sel; uint32_t m_ofs; + uint32_t m_pad; // avoid implicit padding for selector_bitsequence_hash selector_bitsequence() { } - selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs) { } + selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs), m_pad(0) { } bool operator== (const selector_bitsequence& other) const { return (m_ofs == other.m_ofs) && (m_sel == other.m_sel); @@ -3811,7 +3810,7 @@ namespace basisu { std::size_t operator()(selector_bitsequence const& s) const noexcept { - return static_cast<std::size_t>(hash_hsieh((uint8_t *)&s, sizeof(s)) ^ s.m_sel); + return hash_hsieh((const uint8_t*)&s, sizeof(s)); } }; diff --git a/thirdparty/basis_universal/encoder/basisu_uastc_enc.h b/thirdparty/basis_universal/encoder/basisu_uastc_enc.h index ba39a558b3..54d39380e6 100644 --- a/thirdparty/basis_universal/encoder/basisu_uastc_enc.h +++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.h @@ -1,5 +1,5 @@ // basisu_uastc_enc.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/cppspmd_flow.h b/thirdparty/basis_universal/encoder/cppspmd_flow.h index f6930476aa..93934173c4 100644 --- a/thirdparty/basis_universal/encoder/cppspmd_flow.h +++ b/thirdparty/basis_universal/encoder/cppspmd_flow.h @@ -1,7 +1,7 @@ // Do not include this header directly. // Control flow functionality in common between all the headers. // -// Copyright 2020-2021 Binomial LLC +// Copyright 2020-2024 Binomial LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/cppspmd_math.h b/thirdparty/basis_universal/encoder/cppspmd_math.h index e7b3202b8e..3032df865f 100644 --- a/thirdparty/basis_universal/encoder/cppspmd_math.h +++ b/thirdparty/basis_universal/encoder/cppspmd_math.h @@ -1,6 +1,6 @@ // Do not include this header directly. // -// Copyright 2020-2021 Binomial LLC +// Copyright 2020-2024 Binomial LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -646,7 +646,7 @@ CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x) { vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); - return VUINT_SHIFT_RIGHT(((v1 + VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F) * 0x1010101), 24); + return VUINT_SHIFT_RIGHT(((v1 + (VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F)) * 0x1010101), 24); } CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) diff --git a/thirdparty/basis_universal/encoder/cppspmd_math_declares.h b/thirdparty/basis_universal/encoder/cppspmd_math_declares.h index cdb6447b62..f76c9b7e38 100644 --- a/thirdparty/basis_universal/encoder/cppspmd_math_declares.h +++ b/thirdparty/basis_universal/encoder/cppspmd_math_declares.h @@ -1,7 +1,7 @@ // Do not include this header directly. // This header defines shared struct spmd_kernel helpers. // -// Copyright 2020-2021 Binomial LLC +// Copyright 2020-2024 Binomial LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/cppspmd_sse.h b/thirdparty/basis_universal/encoder/cppspmd_sse.h index 4c61bab7b1..79dfa1561a 100644 --- a/thirdparty/basis_universal/encoder/cppspmd_sse.h +++ b/thirdparty/basis_universal/encoder/cppspmd_sse.h @@ -450,7 +450,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit operator vint() const; private: - vbool& operator=(const vbool&); + //vbool& operator=(const vbool&); }; friend vbool operator!(const vbool& v); @@ -481,7 +481,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit vfloat(int value) : m_value(_mm_set1_ps((float)value)) { } private: - vfloat& operator=(const vfloat&); + //vfloat& operator=(const vfloat&); }; CPPSPMD_FORCE_INLINE vfloat& store(vfloat& dst, const vfloat& src) @@ -514,7 +514,7 @@ struct spmd_kernel float* m_pValue; private: - float_lref& operator=(const float_lref&); + //float_lref& operator=(const float_lref&); }; CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref& dst, const vfloat& src) @@ -561,7 +561,7 @@ struct spmd_kernel float* m_pValue; private: - float_vref& operator=(const float_vref&); + //float_vref& operator=(const float_vref&); }; // Varying ref to varying float @@ -571,7 +571,7 @@ struct spmd_kernel vfloat* m_pValue; private: - vfloat_vref& operator=(const vfloat_vref&); + //vfloat_vref& operator=(const vfloat_vref&); }; // Varying ref to varying int @@ -581,7 +581,7 @@ struct spmd_kernel vint* m_pValue; private: - vint_vref& operator=(const vint_vref&); + //vint_vref& operator=(const vint_vref&); }; CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src); @@ -624,7 +624,7 @@ struct spmd_kernel int* m_pValue; private: - int_lref& operator=(const int_lref&); + //int_lref& operator=(const int_lref&); }; CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src) @@ -663,7 +663,7 @@ struct spmd_kernel int16_t* m_pValue; private: - int16_lref& operator=(const int16_lref&); + //int16_lref& operator=(const int16_lref&); }; CPPSPMD_FORCE_INLINE const int16_lref& store(const int16_lref& dst, const vint& src) @@ -720,7 +720,7 @@ struct spmd_kernel const int* m_pValue; private: - cint_lref& operator=(const cint_lref&); + //cint_lref& operator=(const cint_lref&); }; CPPSPMD_FORCE_INLINE vint load(const cint_lref& src) @@ -742,7 +742,7 @@ struct spmd_kernel int* m_pValue; private: - int_vref& operator=(const int_vref&); + //int_vref& operator=(const int_vref&); }; // Varying ref to constant ints @@ -752,7 +752,7 @@ struct spmd_kernel const int* m_pValue; private: - cint_vref& operator=(const cint_vref&); + //cint_vref& operator=(const cint_vref&); }; // Varying int @@ -810,7 +810,7 @@ struct spmd_kernel } private: - vint& operator=(const vint&); + //vint& operator=(const vint&); }; // Load/store linear int @@ -1206,7 +1206,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE vint load_all(const vint_vref& src) { // TODO: There's surely a better way - __m128i k; + __m128i k = _mm_setzero_si128(); k = insert_x(k, ((int*)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); k = insert_y(k, ((int*)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); @@ -1261,7 +1261,7 @@ struct spmd_kernel } private: - lint& operator=(const lint&); + //lint& operator=(const lint&); }; CPPSPMD_FORCE_INLINE lint& store_all(lint& dst, const lint& src) diff --git a/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h b/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h index 0dfb28b88f..2600481239 100644 --- a/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h +++ b/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h @@ -1,7 +1,7 @@ // cppspmd_type_aliases.h // Do not include this file directly // -// Copyright 2020-2021 Binomial LLC +// Copyright 2020-2024 Binomial LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/encoder/pvpngreader.cpp b/thirdparty/basis_universal/encoder/pvpngreader.cpp index 46639f2796..6b32f66cbe 100644 --- a/thirdparty/basis_universal/encoder/pvpngreader.cpp +++ b/thirdparty/basis_universal/encoder/pvpngreader.cpp @@ -163,7 +163,7 @@ public: { if ((sizeof(size_t) == sizeof(uint32_t)) && (new_size > 0x7FFFFFFFUL)) return 0; - m_buf.resize(new_size); + m_buf.resize((size_t)new_size); } memcpy(&m_buf[(size_t)m_ofs], pBuf, len); @@ -178,11 +178,11 @@ public: return 0; uint64_t max_bytes = minimum<uint64_t>(len, m_buf.size() - m_ofs); - memcpy(pBuf, &m_buf[(size_t)m_ofs], max_bytes); + memcpy(pBuf, &m_buf[(size_t)m_ofs], (size_t)max_bytes); m_ofs += max_bytes; - return max_bytes; + return (size_t)max_bytes; } }; @@ -249,11 +249,11 @@ public: return 0; uint64_t max_bytes = minimum<uint64_t>(len, m_buf_size - m_ofs); - memcpy(pBuf, &m_pBuf[(size_t)m_ofs], max_bytes); + memcpy(pBuf, &m_pBuf[(size_t)m_ofs], (size_t)max_bytes); m_ofs += max_bytes; - return max_bytes; + return (size_t)max_bytes; } }; @@ -1626,8 +1626,8 @@ int png_decoder::png_decode_start() if (m_ihdr.m_ilace_type == 1) { - int i; - uint32_t total_lines, lines_processed; + //int i; + //uint32_t total_lines, lines_processed; m_adam7_pass_size_x[0] = adam7_pass_size(m_ihdr.m_width, 0, 8); m_adam7_pass_size_x[1] = adam7_pass_size(m_ihdr.m_width, 4, 8); @@ -1651,10 +1651,12 @@ int png_decoder::png_decode_start() m_pass_y_left = 0; +#if 0 total_lines = lines_processed = 0; for (i = 0; i < 7; i++) total_lines += m_adam7_pass_size_y[i]; +#endif for (; ; ) { @@ -1675,7 +1677,7 @@ int png_decoder::png_decode_start() } } - lines_processed++; + //lines_processed++; } m_adam7_decoded_flag = TRUE; diff --git a/modules/basis_universal/patches/external-jpgd.patch b/thirdparty/basis_universal/patches/external-jpgd.patch index 7a805d00cb..7a805d00cb 100644 --- a/modules/basis_universal/patches/external-jpgd.patch +++ b/thirdparty/basis_universal/patches/external-jpgd.patch diff --git a/thirdparty/basis_universal/patches/external-tinyexr.patch b/thirdparty/basis_universal/patches/external-tinyexr.patch new file mode 100644 index 0000000000..665af13300 --- /dev/null +++ b/thirdparty/basis_universal/patches/external-tinyexr.patch @@ -0,0 +1,23 @@ +diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp +index 6c0ac0ad370..2bf486a0287 100644 +--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp ++++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp +@@ -27,7 +27,7 @@ + #ifndef TINYEXR_USE_ZFP + #define TINYEXR_USE_ZFP (1) + #endif +-#include "3rdparty/tinyexr.h" ++#include <tinyexr.h> + + #ifndef MINIZ_HEADER_FILE_ONLY + #define MINIZ_HEADER_FILE_ONLY +@@ -3257,7 +3257,8 @@ namespace basisu + float* out_rgba = nullptr; + const char* err = nullptr; + +- int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans); ++ int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err); ++ n_chans = 4; + if (status != 0) + { + error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?"); diff --git a/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch b/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch new file mode 100644 index 0000000000..a4d176602d --- /dev/null +++ b/thirdparty/basis_universal/patches/remove-tinydds-qoi.patch @@ -0,0 +1,446 @@ +diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp +index 2bf486a0287..fff98e83014 100644 +--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp ++++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp +@@ -37,9 +37,6 @@ + #endif + #include "basisu_miniz.h" + +-#define QOI_IMPLEMENTATION +-#include "3rdparty/qoi.h" +- + #if defined(_WIN32) + // For QueryPerformanceCounter/QueryPerformanceFrequency + #define WIN32_LEAN_AND_MEAN +@@ -408,16 +405,7 @@ namespace basisu + + bool load_qoi(const char* pFilename, image& img) + { +- qoi_desc desc; +- clear_obj(desc); +- +- void* p = qoi_read(pFilename, &desc, 4); +- if (!p) +- return false; +- +- img.grant_ownership(static_cast<color_rgba *>(p), desc.width, desc.height); +- +- return true; ++ return false; + } + + bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename) +diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp +index 000869a5337..342446b8fd4 100644 +--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp ++++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp +@@ -19,9 +19,6 @@ + #include "basisu_bc7enc.h" + #include "../transcoder/basisu_astc_hdr_core.h" + +-#define TINYDDS_IMPLEMENTATION +-#include "3rdparty/tinydds.h" +- + namespace basisu + { + //------------------------------------------------------------------------------------------------ +@@ -1979,208 +1976,8 @@ namespace basisu + // Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped + // and cubemap, cubemap mipmapped, and cubemap array mipmapped. + bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format) +- { +- if (!gpu_images.size()) +- { +- assert(0); +- return false; +- } +- +- // Sanity check the input +- uint32_t slices = 1; +- if (cubemap_flag) +- { +- if ((gpu_images.size() % 6) != 0) +- { +- assert(0); +- return false; +- } +- slices = gpu_images.size() / 6; +- } +- else +- { +- slices = gpu_images.size(); +- } +- +- uint32_t width = 0, height = 0, total_levels = 0; +- basisu::texture_format fmt = texture_format::cInvalidTextureFormat; +- +- // Sanity check the input for consistent # of dimensions and mip levels +- for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) +- { +- const gpu_image_vec& levels = gpu_images[array_index]; +- +- if (!levels.size()) +- { +- // Empty mip chain +- assert(0); +- return false; +- } +- +- if (!array_index) +- { +- width = levels[0].get_pixel_width(); +- height = levels[0].get_pixel_height(); +- total_levels = (uint32_t)levels.size(); +- fmt = levels[0].get_format(); +- } +- else +- { +- if ((width != levels[0].get_pixel_width()) || +- (height != levels[0].get_pixel_height()) || +- (total_levels != levels.size())) +- { +- // All cubemap/texture array faces must be the same dimension +- assert(0); +- return false; +- } +- } +- +- for (uint32_t level_index = 0; level_index < levels.size(); level_index++) +- { +- if (level_index) +- { +- if ((levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) || +- (levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index))) +- { +- // Malformed mipmap chain +- assert(0); +- return false; +- } +- } +- +- if (fmt != levels[level_index].get_format()) +- { +- // All input textures must use the same GPU format +- assert(0); +- return false; +- } +- } +- } +- +- // No mipmap levels +- if (!total_levels) +- { +- assert(0); +- return false; +- } +- +- // Create the DDS mipmap level data +- uint8_vec mipmaps[32]; +- +- // See https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dds-file-layout-for-cubic-environment-maps +- // DDS cubemap organization is cubemap face 0 followed by all mips, then cubemap face 1 followed by all mips, etc. +- // Unfortunately tinydds.h's writer doesn't handle this case correctly, so we work around it here. +- // This also applies with 2D texture arrays, too. RenderDoc and ddsview (DirectXTex) views each type (cubemap array and 2D texture array) correctly. +- // Also see "Using Texture Arrays in Direct3D 10/11": +- // https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide +- for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) +- { +- const gpu_image_vec& levels = gpu_images[array_index]; +- +- for (uint32_t level_index = 0; level_index < levels.size(); level_index++) +- { +- append_vector(mipmaps[0], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes()); +- +- } // level_index +- } // array_index +- +-#if 0 +- // This organization, required by tinydds.h's API, is wrong. +- { +- for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) +- { +- const gpu_image_vec& levels = gpu_images[array_index]; +- +- for (uint32_t level_index = 0; level_index < levels.size(); level_index++) +- { +- append_vector(mipmaps[level_index], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes()); +- +- } // level_index +- } // array_index +- } +-#endif +- +- // Write DDS file using tinydds +- TinyDDS_WriteCallbacks cbs; +- cbs.error = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); }; +- cbs.alloc = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); }; +- cbs.free = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); }; +- cbs.write = [](void* user, void const* buffer, size_t byteCount) { BASISU_NOTE_UNUSED(user); uint8_vec* pVec = (uint8_vec*)user; append_vector(*pVec, (const uint8_t*)buffer, byteCount); }; +- +- uint32_t mipmap_sizes[32]; +- const void* mipmap_ptrs[32]; +- +- clear_obj(mipmap_sizes); +- clear_obj(mipmap_ptrs); +- +- assert(total_levels < 32); +- for (uint32_t i = 0; i < total_levels; i++) +- { +- mipmap_sizes[i] = mipmaps[i].size_in_bytes(); +- mipmap_ptrs[i] = mipmaps[i].get_ptr(); +- } +- +- // Select tinydds texture format +- uint32_t tinydds_fmt = 0; +- +- switch (fmt) +- { +- case texture_format::cBC1_NV: +- case texture_format::cBC1_AMD: +- case texture_format::cBC1: +- tinydds_fmt = use_srgb_format ? TDDS_BC1_RGBA_SRGB_BLOCK : TDDS_BC1_RGBA_UNORM_BLOCK; +- break; +- case texture_format::cBC3: +- tinydds_fmt = use_srgb_format ? TDDS_BC3_SRGB_BLOCK : TDDS_BC3_UNORM_BLOCK; +- break; +- case texture_format::cBC4: +- tinydds_fmt = TDDS_BC4_UNORM_BLOCK; +- break; +- case texture_format::cBC5: +- tinydds_fmt = TDDS_BC5_UNORM_BLOCK; +- break; +- case texture_format::cBC6HSigned: +- tinydds_fmt = TDDS_BC6H_SFLOAT_BLOCK; +- break; +- case texture_format::cBC6HUnsigned: +- tinydds_fmt = TDDS_BC6H_UFLOAT_BLOCK; +- break; +- case texture_format::cBC7: +- tinydds_fmt = use_srgb_format ? TDDS_BC7_SRGB_BLOCK : TDDS_BC7_UNORM_BLOCK; +- break; +- default: +- { +- fprintf(stderr, "Warning: Unsupported format in write_dds_file().\n"); +- return false; +- } +- } +- +- // DirectXTex's DDSView doesn't handle odd sizes textures correctly. RenderDoc loads them fine, however. +- // Trying to work around this here results in invalid mipmaps. +- //width = (width + 3) & ~3; +- //height = (height + 3) & ~3; +- +- bool status = TinyDDS_WriteImage(&cbs, +- &dds_data, +- width, +- height, +- 1, +- slices, +- total_levels, +- (TinyDDS_Format)tinydds_fmt, +- cubemap_flag, +- true, +- mipmap_sizes, +- mipmap_ptrs); +- +- if (!status) +- { +- fprintf(stderr, "write_dds_file: Failed creating DDS file\n"); +- return false; +- } +- +- return true; ++ { ++ return false; + } + + bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format) +@@ -2201,188 +1998,6 @@ namespace basisu + + bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips, basisu::vector<imagef>& hdr_mips) + { +- const uint32_t MAX_IMAGE_DIM = 16384; +- +- TinyDDS_Callbacks cbs; +- +- cbs.errorFn = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); }; +- cbs.allocFn = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); }; +- cbs.freeFn = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); }; +- cbs.readFn = [](void* user, void* buffer, size_t byteCount) -> size_t { return (size_t)fread(buffer, 1, byteCount, (FILE*)user); }; +- +-#ifdef _MSC_VER +- cbs.seekFn = [](void* user, int64_t ofs) -> bool { return _fseeki64((FILE*)user, ofs, SEEK_SET) == 0; }; +- cbs.tellFn = [](void* user) -> int64_t { return _ftelli64((FILE*)user); }; +-#else +- cbs.seekFn = [](void* user, int64_t ofs) -> bool { return fseek((FILE*)user, (long)ofs, SEEK_SET) == 0; }; +- cbs.tellFn = [](void* user) -> int64_t { return (int64_t)ftell((FILE*)user); }; +-#endif +- +- FILE* pFile = fopen_safe(pFilename, "rb"); +- if (!pFile) +- { +- error_printf("Can't open .DDS file \"%s\"\n", pFilename); +- return false; +- } +- +- // These are the formats AMD Compressonator supports in its UI. +- enum dds_fmt +- { +- cRGBA32, +- cRGBA_HALF, +- cRGBA_FLOAT +- }; +- +- bool status = false; +- dds_fmt fmt = cRGBA32; +- uint32_t width = 0, height = 0; +- bool hdr_flag = false; +- TinyDDS_Format tfmt = TDDS_UNDEFINED; +- +- TinyDDS_ContextHandle ctx = TinyDDS_CreateContext(&cbs, pFile); +- if (!ctx) +- goto failure; +- +- status = TinyDDS_ReadHeader(ctx); +- if (!status) +- { +- error_printf("Failed parsing DDS header in file \"%s\"\n", pFilename); +- goto failure; +- } +- +- if ((!TinyDDS_Is2D(ctx)) || (TinyDDS_ArraySlices(ctx) > 1) || (TinyDDS_IsCubemap(ctx))) +- { +- error_printf("Unsupported DDS texture type in file \"%s\"\n", pFilename); +- goto failure; +- } +- +- width = TinyDDS_Width(ctx); +- height = TinyDDS_Height(ctx); +- +- if (!width || !height) +- { +- error_printf("DDS texture dimensions invalid in file \"%s\"\n", pFilename); +- goto failure; +- } +- +- if ((width > MAX_IMAGE_DIM) || (height > MAX_IMAGE_DIM)) +- { +- error_printf("DDS texture dimensions too large in file \"%s\"\n", pFilename); +- goto failure; +- } +- +- tfmt = TinyDDS_GetFormat(ctx); +- switch (tfmt) +- { +- case TDDS_R8G8B8A8_SRGB: +- case TDDS_R8G8B8A8_UNORM: +- case TDDS_B8G8R8A8_SRGB: +- case TDDS_B8G8R8A8_UNORM: +- fmt = cRGBA32; +- break; +- case TDDS_R16G16B16A16_SFLOAT: +- fmt = cRGBA_HALF; +- hdr_flag = true; +- break; +- case TDDS_R32G32B32A32_SFLOAT: +- fmt = cRGBA_FLOAT; +- hdr_flag = true; +- break; +- default: +- error_printf("File \"%s\" has an unsupported DDS texture format (only supports RGBA/BGRA 32bpp, RGBA HALF float, or RGBA FLOAT)\n", pFilename); +- goto failure; +- } +- +- if (hdr_flag) +- hdr_mips.resize(TinyDDS_NumberOfMipmaps(ctx)); +- else +- ldr_mips.resize(TinyDDS_NumberOfMipmaps(ctx)); +- +- for (uint32_t level = 0; level < TinyDDS_NumberOfMipmaps(ctx); level++) +- { +- const uint32_t level_width = TinyDDS_MipMapReduce(width, level); +- const uint32_t level_height = TinyDDS_MipMapReduce(height, level); +- const uint32_t total_level_texels = level_width * level_height; +- +- const void* pImage = TinyDDS_ImageRawData(ctx, level); +- const uint32_t image_size = TinyDDS_ImageSize(ctx, level); +- +- if (fmt == cRGBA32) +- { +- ldr_mips[level].resize(level_width, level_height); +- +- if ((ldr_mips[level].get_total_pixels() * sizeof(uint32_t) != image_size)) +- { +- assert(0); +- goto failure; +- } +- +- memcpy(ldr_mips[level].get_ptr(), pImage, image_size); +- +- if ((tfmt == TDDS_B8G8R8A8_SRGB) || (tfmt == TDDS_B8G8R8A8_UNORM)) +- { +- // Swap R and B components. +- uint32_t *pTexels = (uint32_t *)ldr_mips[level].get_ptr(); +- for (uint32_t i = 0; i < total_level_texels; i++) +- { +- const uint32_t v = pTexels[i]; +- const uint32_t r = (v >> 16) & 0xFF; +- const uint32_t b = v & 0xFF; +- pTexels[i] = r | (b << 16) | (v & 0xFF00FF00); +- } +- } +- } +- else if (fmt == cRGBA_FLOAT) +- { +- hdr_mips[level].resize(level_width, level_height); +- +- if ((hdr_mips[level].get_total_pixels() * sizeof(float) * 4 != image_size)) +- { +- assert(0); +- goto failure; +- } +- +- memcpy(hdr_mips[level].get_ptr(), pImage, image_size); +- } +- else if (fmt == cRGBA_HALF) +- { +- hdr_mips[level].resize(level_width, level_height); +- +- if ((hdr_mips[level].get_total_pixels() * sizeof(basist::half_float) * 4 != image_size)) +- { +- assert(0); +- goto failure; +- } +- +- // Unpack half to float. +- const basist::half_float* pSrc_comps = static_cast<const basist::half_float*>(pImage); +- vec4F* pDst_texels = hdr_mips[level].get_ptr(); +- +- for (uint32_t i = 0; i < total_level_texels; i++) +- { +- (*pDst_texels)[0] = basist::half_to_float(pSrc_comps[0]); +- (*pDst_texels)[1] = basist::half_to_float(pSrc_comps[1]); +- (*pDst_texels)[2] = basist::half_to_float(pSrc_comps[2]); +- (*pDst_texels)[3] = basist::half_to_float(pSrc_comps[3]); +- +- pSrc_comps += 4; +- pDst_texels++; +- } // y +- } +- } // level +- +- TinyDDS_DestroyContext(ctx); +- fclose(pFile); +- +- return true; +- +- failure: +- if (ctx) +- TinyDDS_DestroyContext(ctx); +- +- if (pFile) +- fclose(pFile); +- + return false; + } + diff --git a/thirdparty/basis_universal/transcoder/basisu.h b/thirdparty/basis_universal/transcoder/basisu.h index 1230b59ec6..939ee79e62 100644 --- a/thirdparty/basis_universal/transcoder/basisu.h +++ b/thirdparty/basis_universal/transcoder/basisu.h @@ -1,5 +1,5 @@ // basisu.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -117,13 +117,26 @@ namespace basisu typedef basisu::vector<uint64_t> uint64_vec; typedef basisu::vector<int> int_vec; typedef basisu::vector<bool> bool_vec; + typedef basisu::vector<float> float_vec; void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); - +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + template <typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; } @@ -162,10 +175,45 @@ namespace basisu template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } template<typename T> inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; } + // Open interval + inline bool in_bounds(int v, int l, int h) + { + return (v >= l) && (v < h); + } + + // Closed interval + inline bool in_range(int v, int l, int h) + { + return (v >= l) && (v <= h); + } + inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; } template<typename T> inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); } + inline uint32_t get_bit(uint32_t src, int ndx) + { + assert(in_bounds(ndx, 0, 32)); + return (src >> ndx) & 1; + } + + inline bool is_bit_set(uint32_t src, int ndx) + { + return get_bit(src, ndx) != 0; + } + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert(in_range(num_bits, 1, 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + template<typename T, typename R> inline void append_vector(T &vec, const R *pObjs, size_t n) { if (n) @@ -267,6 +315,11 @@ namespace basisu return true; } + static inline uint32_t read_le_word(const uint8_t* pBytes) + { + return (pBytes[1] << 8U) | (pBytes[0]); + } + static inline uint32_t read_le_dword(const uint8_t *pBytes) { return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]); @@ -303,6 +356,10 @@ namespace basisu return *this; } +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif inline operator uint32_t() const { switch (NumBytes) @@ -354,6 +411,9 @@ namespace basisu } } } +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif }; enum eZero { cZero }; @@ -402,8 +462,11 @@ namespace basisu cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A cBC5, // 3DC/DXN (two BC4/DXT5A blocks) + cBC6HSigned, // HDR + cBC6HUnsigned, // HDR cBC7, - cASTC4x4, // LDR only + cASTC_LDR_4x4, // ASTC 4x4 LDR only + cASTC_HDR_4x4, // ASTC 4x4 HDR only (but may use LDR ASTC blocks internally) cPVRTC1_4_RGB, cPVRTC1_4_RGBA, cATC_RGB, @@ -413,17 +476,22 @@ namespace basisu cETC2_R11_EAC, cETC2_RG11_EAC, cUASTC4x4, + cUASTC_HDR_4x4, cBC1_NV, cBC1_AMD, - + // Uncompressed/raw pixels cRGBA32, cRGB565, cBGR565, cRGBA4444, - cABGR4444 + cABGR4444, + cRGBA_HALF, + cRGB_HALF, + cRGB_9E5 }; + // This is bytes per block for GPU formats, or bytes per texel for uncompressed formats. inline uint32_t get_bytes_per_block(texture_format fmt) { switch (fmt) @@ -443,13 +511,27 @@ namespace basisu case texture_format::cETC2_R11_EAC: return 8; case texture_format::cRGBA32: - return sizeof(uint32_t) * 16; + case texture_format::cRGB_9E5: + return sizeof(uint32_t); + case texture_format::cRGB_HALF: + return sizeof(uint16_t) * 3; + case texture_format::cRGBA_HALF: + return sizeof(uint16_t) * 4; + case texture_format::cRGB565: + case texture_format::cBGR565: + case texture_format::cRGBA4444: + case texture_format::cABGR4444: + return sizeof(uint16_t); + default: break; } + + // Everything else is 16 bytes/block. return 16; } + // This is qwords per block for GPU formats, or not valid for uncompressed formats. inline uint32_t get_qwords_per_block(texture_format fmt) { return get_bytes_per_block(fmt) >> 3; @@ -473,6 +555,17 @@ namespace basisu BASISU_NOTE_UNUSED(fmt); return 4; } + + inline bool is_hdr_texture_format(texture_format fmt) + { + if (fmt == texture_format::cASTC_HDR_4x4) + return true; + if (fmt == texture_format::cUASTC_HDR_4x4) + return true; + if ((fmt == texture_format::cBC6HSigned) || (fmt == texture_format::cBC6HUnsigned)) + return true; + return false; + } } // namespace basisu diff --git a/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h new file mode 100644 index 0000000000..82dcd2bfe1 --- /dev/null +++ b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h @@ -0,0 +1,102 @@ +// File: basisu_astc_hdr_core.h +#pragma once +#include "basisu_astc_helpers.h" + +namespace basist +{ + struct astc_blk + { + uint8_t m_vals[16]; + }; + + // ASTC_HDR_MAX_VAL is the maximum color component value that can be encoded. + // If the input has values higher than this, they need to be linearly scaled so all values are between [0,ASTC_HDR_MAX_VAL], and the linear scaling inverted in the shader. + const float ASTC_HDR_MAX_VAL = 65216.0f; // actually MAX_QLOG12_VAL + + // Maximum usable QLOG encodings, and their floating point equivalent values, that don't result in NaN/Inf's. + const uint32_t MAX_QLOG7 = 123; + //const float MAX_QLOG7_VAL = 55296.0f; + + const uint32_t MAX_QLOG8 = 247; + //const float MAX_QLOG8_VAL = 60416.0f; + + const uint32_t MAX_QLOG9 = 495; + //const float MAX_QLOG9_VAL = 62976.0f; + + const uint32_t MAX_QLOG10 = 991; + //const float MAX_QLOG10_VAL = 64256.0f; + + const uint32_t MAX_QLOG11 = 1983; + //const float MAX_QLOG11_VAL = 64896.0f; + + const uint32_t MAX_QLOG12 = 3967; + //const float MAX_QLOG12_VAL = 65216.0f; + + const uint32_t MAX_QLOG16 = 63487; + const float MAX_QLOG16_VAL = 65504.0f; + + const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; + + // Notes: + // qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless) + // However, this is not lossless in the general sense. + inline half_float qlog16_to_half_slow(uint32_t qlog16) + { + assert(qlog16 <= 0xFFFF); + + int C = qlog16; + + int E = (C & 0xF800) >> 11; + int M = C & 0x7FF; + + int Mt; + if (M < 512) + Mt = 3 * M; + else if (M >= 1536) + Mt = 5 * M - 2048; + else + Mt = 4 * M - 512; + + int Cf = (E << 10) + (Mt >> 3); + return (half_float)Cf; + } + + // This is not lossless + inline half_float qlog_to_half_slow(uint32_t qlog, uint32_t bits) + { + assert((bits >= 7U) && (bits <= 16U)); + assert(qlog < (1U << bits)); + + int C = qlog << (16 - bits); + return qlog16_to_half_slow(C); + } + + void astc_hdr_core_init(); + + void decode_mode7_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale); + + bool decode_mode7_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale, + uint32_t ise_endpoint_range); + + void decode_mode11_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3]); + + bool decode_mode11_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + uint32_t ise_endpoint_range); + + bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk); + bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk); + + bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk); + bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk); + +} // namespace basist diff --git a/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h new file mode 100644 index 0000000000..09a234b2ae --- /dev/null +++ b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h @@ -0,0 +1,3587 @@ +// basisu_astc_helpers.h +// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header. +#pragma once +#ifndef BASISU_ASTC_HELPERS_HEADER +#define BASISU_ASTC_HELPERS_HEADER + +#include <stdlib.h> +#include <stdint.h> +#include <math.h> +#include <fenv.h> + +namespace astc_helpers +{ + const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64] + const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid + const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels + const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values + + static const uint32_t NUM_ASTC_BLOCK_SIZES = 14; + extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2]; + + // The Color Endpoint Modes (CEM's) + enum cems + { + CEM_LDR_LUM_DIRECT = 0, + CEM_LDR_LUM_BASE_PLUS_OFS = 1, + CEM_HDR_LUM_LARGE_RANGE = 2, + CEM_HDR_LUM_SMALL_RANGE = 3, + CEM_LDR_LUM_ALPHA_DIRECT = 4, + CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5, + CEM_LDR_RGB_BASE_SCALE = 6, + CEM_HDR_RGB_BASE_SCALE = 7, + CEM_LDR_RGB_DIRECT = 8, + CEM_LDR_RGB_BASE_PLUS_OFFSET = 9, + CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10, + CEM_HDR_RGB = 11, + CEM_LDR_RGBA_DIRECT = 12, + CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13, + CEM_HDR_RGB_LDR_ALPHA = 14, + CEM_HDR_RGB_HDR_ALPHA = 15 + }; + + // All Bounded Integer Sequence Coding (BISE or ISE) ranges. + // Weights: Ranges [0,11] are valid. + // Endpoints: Ranges [4,20] are valid. + enum bise_levels + { + BISE_2_LEVELS = 0, + BISE_3_LEVELS = 1, + BISE_4_LEVELS = 2, + BISE_5_LEVELS = 3, + BISE_6_LEVELS = 4, + BISE_8_LEVELS = 5, + BISE_10_LEVELS = 6, + BISE_12_LEVELS = 7, + BISE_16_LEVELS = 8, + BISE_20_LEVELS = 9, + BISE_24_LEVELS = 10, + BISE_32_LEVELS = 11, + BISE_40_LEVELS = 12, + BISE_48_LEVELS = 13, + BISE_64_LEVELS = 14, + BISE_80_LEVELS = 15, + BISE_96_LEVELS = 16, + BISE_128_LEVELS = 17, + BISE_160_LEVELS = 18, + BISE_192_LEVELS = 19, + BISE_256_LEVELS = 20 + }; + + const uint32_t TOTAL_ISE_RANGES = 21; + + // Valid endpoint ISE ranges + const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4 + const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20 + const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1; + + // Valid weight ISE ranges + const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0 + const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11 + const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1; + + // The ISE range table. + extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1) + + // Possible Color Component Select values, used in dual plane mode. + // The CCS component will be interpolated using the 2nd weight plane. + enum ccs + { + CCS_GBA_R = 0, + CCS_RBA_G = 1, + CCS_RGA_B = 2, + CCS_RGB_A = 3 + }; + + struct astc_block + { + uint32_t m_vals[4]; + }; + + const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode + const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode + const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits) + const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block + + struct log_astc_block + { + bool m_error_flag; + + bool m_solid_color_flag_ldr, m_solid_color_flag_hdr; + uint16_t m_solid_color[4]; + + // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr + uint32_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block + + bool m_dual_plane; + + uint32_t m_weight_ise_range; // 0-11 + uint32_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking + + uint32_t m_color_component_selector; // 0-3, 0=GBA R, 1=RBA G, 2=RGA B, 3=RGB A, only used in dual plane mode + + uint32_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode) + uint32_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1 + + uint32_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's + + // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc. + uint8_t m_weights[MAX_GRID_WEIGHTS]; + + // ISE endpoint values + // Endpoint order examples: + // 1 subset LA : LL0 LH0 AL0 AH0 + // 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 + // 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 + // 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1 + // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1 + // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1 + uint8_t m_endpoints[MAX_ENDPOINTS]; + + void clear() + { + memset(this, 0, sizeof(*this)); + } + }; + + // Open interval + inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert((num_bits >= 1) && (num_bits <= 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + // Returns the number of levels in the given ISE range. + inline uint32_t get_ise_levels(uint32_t ise_range) + { + assert(ise_range < TOTAL_ISE_RANGES); + return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0]; + } + + inline int get_ise_sequence_bits(int count, int range) + { + // See 18.22 Data Size Determination + int total_bits = g_ise_range_table[range][0] * count; + total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5; + total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3; + return total_bits; + } + + inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w) + { + assert(w <= MAX_WEIGHT_VALUE); + return (l * (64 - w) + h * w + 32) >> 6; + } + + void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range); + + // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions. + bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr); + + // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component. + void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a); + + // Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's) + void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah); + + // These helpers are all quite slow, but are useful for table preparation. + + // Dequantizes ISE encoded endpoint val to [0,255] + uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11 + + // Dequantizes ISE encoded weight val to [0,64] + uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10 + + uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range); + uint32_t find_nearest_bise_weight(int v, uint32_t ise_range); + + void create_quant_tables( + uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] + uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights + bool weight_flag); // false if block endpoints, true if weights + + // True if the CEM is LDR. + bool is_cem_ldr(uint32_t mode); + inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); } + + // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp). + bool is_valid_block_size(uint32_t w, uint32_t h); + + bool block_has_any_hdr_cems(const log_astc_block& log_blk); + bool block_has_any_ldr_cems(const log_astc_block& log_blk); + + // Returns the # of endpoint values for the given CEM. + inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); } + + struct dequant_table + { + basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels] + basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + + void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs) + { + m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256); + m_ISE_to_val.resize(num_levels); + if (init_rank_tabs) + { + m_ISE_to_rank.resize(num_levels); + m_rank_to_ISE.resize(num_levels); + } + } + }; + + struct dequant_tables + { + dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES]; + dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES]; + + const dequant_table& get_weight_tab(uint32_t range) const + { + assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); + return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; + } + + dequant_table& get_weight_tab(uint32_t range) + { + assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); + return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; + } + + const dequant_table& get_endpoint_tab(uint32_t range) const + { + assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; + } + + dequant_table& get_endpoint_tab(uint32_t range) + { + assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; + } + + void init(bool init_rank_tabs) + { + for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++) + { + const uint32_t num_levels = get_ise_levels(range); + dequant_table& tab = get_weight_tab(range); + + tab.init(true, num_levels, init_rank_tabs); + + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true); + } + + for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++) + { + const uint32_t num_levels = get_ise_levels(range); + dequant_table& tab = get_endpoint_tab(range); + + tab.init(false, num_levels, init_rank_tabs); + + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false); + } + } + }; + + extern dequant_tables g_dequant_tables; + void init_tables(bool init_rank_tabs); + + // Procedurally returns the texel partition/subset index given the block coordinate and config. + int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block); + + void blue_contract( + int r, int g, int b, int a, + int& dr, int& dg, int& db, int& da); + + void bit_transfer_signed(int& a, int& b); + + void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE); + + typedef uint16_t half_float; + half_float float_to_half(float val, bool toward_zero); + float half_to_float(half_float hval); + + const int MAX_RGB9E5 = 0xff80; + void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b); + uint32_t pack_rgb9e5(float r, float g, float b); + + enum decode_mode + { + cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks + cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks + cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks + cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt + }; + + // Decodes logical block to output pixels. + // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16) + bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode); + + void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs); + + // Unpack a physical ASTC encoded GPU texture block to a logical block description. + bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height); + +} // namespace astc_helpers + +#endif // BASISU_ASTC_HELPERS_HEADER + +//------------------------------------------------------------------ + +#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION + +namespace astc_helpers +{ + template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; } + template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; } + + const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = { + { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 }, + { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 }, + { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 }, + { 12, 10 }, { 12, 12 } + }; + + const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] = + { + //b t q + //2 3 5 // rng ise_index notes + { 1, 0, 0 }, // 0..1 0 + { 0, 1, 0 }, // 0..2 1 + { 2, 0, 0 }, // 0..3 2 + { 0, 0, 1 }, // 0..4 3 + { 1, 1, 0 }, // 0..5 4 min endpoint ISE index + { 3, 0, 0 }, // 0..7 5 + { 1, 0, 1 }, // 0..9 6 + { 2, 1, 0 }, // 0..11 7 + { 4, 0, 0 }, // 0..15 8 + { 2, 0, 1 }, // 0..19 9 + { 3, 1, 0 }, // 0..23 10 + { 5, 0, 0 }, // 0..31 11 max weight ISE index + { 3, 0, 1 }, // 0..39 12 + { 4, 1, 0 }, // 0..47 13 + { 6, 0, 0 }, // 0..63 14 + { 4, 0, 1 }, // 0..79 15 + { 5, 1, 0 }, // 0..95 16 + { 7, 0, 0 }, // 0..127 17 + { 5, 0, 1 }, // 0..159 18 + { 6, 1, 0 }, // 0..191 19 + { 8, 0, 0 }, // 0..255 20 + }; + + static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize) + { + uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst); + + assert(codesize <= 9); + if (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t val = code << byte_bit_offset; + + uint32_t index = bit_offset >> 3; + pBuf[index] |= (uint8_t)val; + + if (codesize > (8 - byte_bit_offset)) + pBuf[index + 1] |= (uint8_t)(val >> 8); + + bit_offset += codesize; + } + } + + static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) + { + return (bits >> low) & ((1 << (high - low + 1)) - 1); + } + + // Writes bits to output in an endian safe way + static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits) + { + assert(total_bits <= 31); + assert(value < (1u << total_bits)); + + uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput); + + while (total_bits) + { + const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7)); + + pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7)); + + bit_pos += bits_to_write; + total_bits -= bits_to_write; + value >>= bits_to_write; + } + } + + static const uint8_t g_astc_quint_encode[125] = + { + 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, + 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104, + 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, + 126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/ + }; + + // Encodes 3 values to output, usable for any range that uses quints and bits + static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n) + { + // First extract the quints and the bits from the 3 input values + int quints = 0, bits[3]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 3; i++) + { + static const int s_muls[3] = { 1, 5, 25 }; + + const int t = pValues[i] >> n; + + quints += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(quints < 125); + const int T = g_astc_quint_encode[quints]; + + // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) | + (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3); + } + + static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, + 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, + 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, + 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, + 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, + 191, 223, 124, 125, 126 }; + + // Encodes 5 values to output, usable for any range that uses trits and bits + static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n) + { + // First extract the trits and the bits from the 5 input values + int trits = 0, bits[5]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 5; i++) + { + static const int s_muls[5] = { 1, 3, 9, 27, 81 }; + + const int t = pValues[i] >> n; + + trits += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(trits < 243); + const int T = g_astc_trit_encode[trits]; + + // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); + + astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | + (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); + } + + // Packs values using ASTC's BISE to output buffer. + void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range) + { + uint32_t temp[5] = { 0 }; + + const int num_bits = g_ise_range_table[range][0]; + + int group_size = 0; + if (g_ise_range_table[range][1]) + group_size = 5; + else if (g_ise_range_table[range][2]) + group_size = 3; + +#ifndef NDEBUG + const uint32_t num_levels = get_ise_levels(range); + for (int i = 0; i < num_vals; i++) + { + assert(pSrc_vals[i] < num_levels); + } +#endif + + if (group_size) + { + // Range has trits or quints - pack each group of 5 or 3 values + const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); + + for (int group_index = 0; group_index < total_groups; group_index++) + { + uint8_t vals[5] = { 0 }; + + const int limit = my_min(group_size, num_vals - group_index * group_size); + for (int i = 0; i < limit; i++) + vals[i] = pSrc_vals[group_index * group_size + i]; + + if (group_size == 5) + astc_encode_trits(temp, vals, bit_pos, num_bits); + else + astc_encode_quints(temp, vals, bit_pos, num_bits); + } + } + else + { + for (int i = 0; i < num_vals; i++) + astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits); + } + + // TODO: Could this write too many bits on incomplete blocks? + pDst[0] |= temp[0]; pDst[1] |= temp[1]; + pDst[2] |= temp[2]; pDst[3] |= temp[3]; + } + + inline uint32_t rev_dword(uint32_t bits) + { + uint32_t v = (bits << 16) | (bits >> 16); + v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4); + v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1); + return v; + } + + static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); } + + static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits) + { + config_bits = 0; + + const int W = log_block.m_grid_width, H = log_block.m_grid_height; + + const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision + const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits + + // See Tables 81-82 + // Compute p from weight range + uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0); + + // Rearrange p's bits to p0 p2 p1 + p = (p >> 1) + ((p & 1) << 2); + + // Try encoding each row of table 82. + + // W+4 H+2 + if (is_packable(W - 4, 2) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3); + return true; + } + + // W+8 H+2 + if (is_packable(W - 8, 2) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3); + return true; + } + + // W+2 H+8 + if (is_packable(W - 2, 2) && is_packable(H - 8, 2)) + { + config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3); + return true; + } + + // W+2 H+6 + if (is_packable(W - 2, 2) && is_packable(H - 6, 1)) + { + config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); + return true; + } + + // W+2 H+2 + if (is_packable(W - 2, 1) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); + return true; + } + + // 12 H+2 + if ((W == 12) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2); + return true; + } + + // W+2 12 + if ((H == 12) && is_packable(W - 2, 2)) + { + config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2); + return true; + } + + // 6 10 + if ((W == 6) && (H == 10)) + { + config_bits = (Dp_P << 9) | (3 << 7) | (p << 2); + return true; + } + + // 10 6 + if ((W == 10) && (H == 6)) + { + config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2); + return true; + } + + // W+6 H+6 (no dual plane or high prec) + if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2)) + { + config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2); + return true; + } + + // Failed: unsupported weight grid dimensions or config. + return false; + } + + bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range) + { + memset(&phys_block, 0, sizeof(phys_block)); + + if (pExpected_endpoint_range) + *pExpected_endpoint_range = -1; + + assert(!log_block.m_error_flag); + if (log_block.m_error_flag) + return false; + + if (log_block.m_solid_color_flag_ldr) + { + pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]); + return true; + } + else if (log_block.m_solid_color_flag_hdr) + { + pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]); + return true; + } + + if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS)) + return false; + + // Max usable weight range is 11 + if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE) + return false; + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) + return false; + + if (log_block.m_color_component_selector > 3) + return false; + + uint32_t config_bits = 0; + if (!get_config_bits(log_block, config_bits)) + return false; + + uint32_t bit_pos = 0; + astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11); + + const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height); + const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range); + + // 18.24 Illegal Encodings + if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) + return false; + + uint32_t total_extra_bits = 0; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2); + + if (log_block.m_num_partitions > 1) + { + if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS) + return false; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10); + + uint32_t highest_cem = 0, lowest_cem = UINT32_MAX; + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + { + highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]); + lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]); + } + + if (highest_cem > 15) + return false; + + // Ensure CEM range is contiguous + if (((highest_cem >> 2) > (1 + (lowest_cem >> 2)))) + return false; + + // See tables 79/80 + uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2; + if (lowest_cem != highest_cem) + { + encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2)); + + // See tables at 23.11 Color Endpoint Mode + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + { + const int M = log_block.m_color_endpoint_modes[j] & 3; + + const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1); + if ((C & 1) != C) + return false; + + encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j)); + } + + total_extra_bits = 3 * log_block.m_num_partitions - 4; + + if ((total_weight_bits + total_extra_bits) > 128) + return false; + + uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits; + astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits); + } + + astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6); + } + else + { + if (log_block.m_partition_id) + return false; + if (log_block.m_color_endpoint_modes[0] > 15) + return false; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4); + } + + if (log_block.m_dual_plane) + { + if (log_block.m_num_partitions > 3) + return false; + + total_extra_bits += 2; + + uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits; + astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2); + } + + const uint32_t total_config_bits = bit_pos + total_extra_bits; + const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; + if (num_remaining_bits < 0) + return false; + + uint32_t total_cem_vals = 0; + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2); + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + int endpoint_ise_range = -1; + for (int k = 20; k > 0; k--) + { + int bits = get_ise_sequence_bits(total_cem_vals, k); + if (bits <= num_remaining_bits) + { + endpoint_ise_range = k; + break; + } + } + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE) + return false; + + // Ensure the caller utilized the right endpoint ISE range. + if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range) + { + if (pExpected_endpoint_range) + *pExpected_endpoint_range = endpoint_ise_range; + return false; + } + + // Pack endpoints forwards + encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range); + + // Pack weights backwards + uint32_t weight_data[4] = { 0 }; + encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range); + + for (uint32_t i = 0; i < 4; i++) + phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]); + + return true; + } + + static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits) + { + assert(num_src_bits <= num_dst_bits); + assert((src & ((1 << num_src_bits) - 1)) == src); + + uint32_t dst = 0; + for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits) + dst |= (shift >= 0) ? (src << shift) : (src >> -shift); + + return dst; + } + + uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range) + { + assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(val < get_ise_levels(ise_range)); + + uint32_t u = 0; + + switch (ise_range) + { + case 5: + { + u = bit_replication_scale(val, 3, 8); + break; + } + case 8: + { + u = bit_replication_scale(val, 4, 8); + break; + } + case 11: + { + u = bit_replication_scale(val, 5, 8); + break; + } + case 14: + { + u = bit_replication_scale(val, 6, 8); + break; + } + case 17: + { + u = bit_replication_scale(val, 7, 8); + break; + } + case 20: + { + u = val; + break; + } + case 4: + case 6: + case 7: + case 9: + case 10: + case 12: + case 13: + case 15: + case 16: + case 18: + case 19: + { + const uint32_t num_bits = g_ise_range_table[ise_range][0]; + const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); + const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); + + // compute Table 103 row index + const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2; + + assert(range_index >= 0 && range_index <= 10); + + uint32_t bits = val & ((1 << num_bits) - 1); + uint32_t tval = val >> num_bits; + + assert(tval < (num_trits ? 3U : 5U)); + + uint32_t a = bits & 1; + uint32_t b = (bits >> 1) & 1; + uint32_t c = (bits >> 2) & 1; + uint32_t d = (bits >> 3) & 1; + uint32_t e = (bits >> 4) & 1; + uint32_t f = (bits >> 5) & 1; + + uint32_t A = a ? 511 : 0; + uint32_t B = 0; + + switch (range_index) + { + case 2: + { + // 876543210 + // b000b0bb0 + B = (b << 1) | (b << 2) | (b << 4) | (b << 8); + break; + } + case 3: + { + // 876543210 + // b0000bb00 + B = (b << 2) | (b << 3) | (b << 8); + break; + } + case 4: + { + // 876543210 + // cb000cbcb + B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8); + break; + } + case 5: + { + // 876543210 + // cb0000cbc + B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8); + break; + } + case 6: + { + // 876543210 + // dcb000dcb + B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8); + break; + } + case 7: + { + // 876543210 + // dcb0000dc + B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8); + break; + } + case 8: + { + // 876543210 + // edcb000ed + B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8); + break; + } + case 9: + { + // 876543210 + // edcb0000e + B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8); + break; + } + case 10: + { + // 876543210 + // fedcb000f + B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8); + break; + } + default: + break; + } + + static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 }; + uint32_t C = C_vals[range_index]; + uint32_t D = tval; + + u = D * C + B; + u = u ^ A; + u = (A & 0x80) | (u >> 2); + + break; + } + default: + { + assert(0); + break; + } + } + + return u; + } + + uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range) + { + assert(val < get_ise_levels(ise_range)); + + uint32_t u = 0; + switch (ise_range) + { + case 0: + { + u = val ? 63 : 0; + break; + } + case 1: // 0-2 + { + const uint8_t s_tab_0_2[3] = { 0, 32, 63 }; + u = s_tab_0_2[val]; + break; + } + case 2: // 0-3 + { + u = bit_replication_scale(val, 2, 6); + break; + } + case 3: // 0-4 + { + const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 }; + u = s_tab_0_4[val]; + break; + } + case 5: // 0-7 + { + u = bit_replication_scale(val, 3, 6); + break; + } + case 8: // 0-15 + { + u = bit_replication_scale(val, 4, 6); + break; + } + case 11: // 0-31 + { + u = bit_replication_scale(val, 5, 6); + break; + } + case 4: // 0-5 + case 6: // 0-9 + case 7: // 0-11 + case 9: // 0-19 + case 10: // 0-23 + { + const uint32_t num_bits = g_ise_range_table[ise_range][0]; + const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); + const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); + + // compute Table 103 row index + const int range_index = num_bits * 2 + (num_quints ? 1 : 0); + + // Extract bits and tris/quints from value + const uint32_t bits = val & ((1u << num_bits) - 1); + const uint32_t D = val >> num_bits; + + assert(D < (num_trits ? 3U : 5U)); + + // Now dequantize + // See Table 103. ASTC weight unquantization parameters + static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 }; + + const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1; + + const uint32_t A = (a == 0) ? 0 : 0x7F; + + uint32_t B = 0; + if (range_index == 4) + B = ((b << 6) | (b << 2) | (b << 0)); + else if (range_index == 5) + B = ((b << 6) | (b << 1)); + else if (range_index == 6) + B = ((c << 6) | (b << 5) | (c << 1) | (b << 0)); + + const uint32_t C = C_table[range_index - 2]; + + u = D * C + B; + u = u ^ A; + u = (A & 0x20) | (u >> 2); + break; + } + default: + assert(0); + break; + } + + if (u > 32) + u++; + + return u; + } + + // Returns the nearest ISE symbol given a [0,255] endpoint value. + uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range) + { + assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE); + + const uint32_t total_levels = get_ise_levels(ise_range); + int best_e = INT_MAX, best_index = 0; + for (uint32_t i = 0; i < total_levels; i++) + { + const int qv = dequant_bise_endpoint(i, ise_range); + int e = labs(v - qv); + if (e < best_e) + { + best_e = e; + best_index = i; + if (!best_e) + break; + } + } + return best_index; + } + + // Returns the nearest ISE weight given a [0,64] endpoint value. + uint32_t find_nearest_bise_weight(int v, uint32_t ise_range) + { + assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); + assert(v <= (int)MAX_WEIGHT_VALUE); + + const uint32_t total_levels = get_ise_levels(ise_range); + int best_e = INT_MAX, best_index = 0; + for (uint32_t i = 0; i < total_levels; i++) + { + const int qv = dequant_bise_weight(i, ise_range); + int e = labs(v - qv); + if (e < best_e) + { + best_e = e; + best_index = i; + if (!best_e) + break; + } + } + return best_index; + } + + void create_quant_tables( + uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] + uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights + bool weight_flag) // false if block endpoints, true if weights + { + const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256; + + for (uint32_t i = 0; i < num_dequant_vals; i++) + { + uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range); + + if (pVal_to_ise) + pVal_to_ise[i] = (uint8_t)bise_index; + + if (pISE_to_val) + pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range); + } + + if (pISE_to_rank || pRank_to_ISE) + { + const uint32_t num_levels = get_ise_levels(ise_range); + + if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2]) + { + // Only bits + for (uint32_t i = 0; i < num_levels; i++) + { + if (pISE_to_rank) + pISE_to_rank[i] = (uint8_t)i; + + if (pRank_to_ISE) + pRank_to_ISE[i] = (uint8_t)i; + } + } + else + { + // Range has trits or quints + uint32_t vals[256]; + for (uint32_t i = 0; i < num_levels; i++) + { + uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range); + + // Low=ISE value + // High=dequantized value + vals[i] = (v << 16) | i; + } + + // Sorts by dequantized value + std::sort(vals, vals + num_levels); + + for (uint32_t rank = 0; rank < num_levels; rank++) + { + uint32_t ise_val = (uint8_t)vals[rank]; + + if (pISE_to_rank) + pISE_to_rank[ise_val] = (uint8_t)rank; + + if (pRank_to_ISE) + pRank_to_ISE[rank] = (uint8_t)ise_val; + } + } + } + } + + void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah) + { + uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; + memset(pDst, 0xFF, 16); + + pDst[0] = 0b11111100; + pDst[1] = 0b11111101; + + pDst[8] = (uint8_t)rh; + pDst[9] = (uint8_t)(rh >> 8); + pDst[10] = (uint8_t)gh; + pDst[11] = (uint8_t)(gh >> 8); + pDst[12] = (uint8_t)bh; + pDst[13] = (uint8_t)(bh >> 8); + pDst[14] = (uint8_t)ah; + pDst[15] = (uint8_t)(ah >> 8); + } + + // rh-ah are half-floats + void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah) + { + uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; + memset(pDst, 0xFF, 16); + + pDst[0] = 0b11111100; + + pDst[8] = (uint8_t)rh; + pDst[9] = (uint8_t)(rh >> 8); + pDst[10] = (uint8_t)gh; + pDst[11] = (uint8_t)(gh >> 8); + pDst[12] = (uint8_t)bh; + pDst[13] = (uint8_t)(bh >> 8); + pDst[14] = (uint8_t)ah; + pDst[15] = (uint8_t)(ah >> 8); + } + + bool is_cem_ldr(uint32_t mode) + { + switch (mode) + { + case CEM_LDR_LUM_DIRECT: + case CEM_LDR_LUM_BASE_PLUS_OFS: + case CEM_LDR_LUM_ALPHA_DIRECT: + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + case CEM_LDR_RGB_BASE_SCALE: + case CEM_LDR_RGB_DIRECT: + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + return true; + default: + break; + } + + return false; + } + + bool is_valid_block_size(uint32_t w, uint32_t h) + { + assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM)); + assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM)); + +#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true; + SIZECHK(4, 4); + SIZECHK(5, 4); + + SIZECHK(5, 5); + + SIZECHK(6, 5); + SIZECHK(6, 6); + + SIZECHK(8, 5); + SIZECHK(8, 6); + SIZECHK(10, 5); + SIZECHK(10, 6); + + SIZECHK(8, 8); + SIZECHK(10, 8); + SIZECHK(10, 10); + + SIZECHK(12, 10); + SIZECHK(12, 12); +#undef SIZECHK + + return false; + } + + bool block_has_any_hdr_cems(const log_astc_block& log_blk) + { + assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + if (is_cem_hdr(log_blk.m_color_endpoint_modes[i])) + return true; + + return false; + } + + bool block_has_any_ldr_cems(const log_astc_block& log_blk) + { + assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i])) + return true; + + return false; + } + + dequant_tables g_dequant_tables; + + void precompute_texel_partitions_4x4(); + + void init_tables(bool init_rank_tabs) + { + g_dequant_tables.init(init_rank_tabs); + + precompute_texel_partitions_4x4(); + } + + struct weighted_sample + { + uint8_t m_src_x; + uint8_t m_src_y; + uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8 + }; + + static void compute_upsample_weights( + int block_width, int block_height, + int weight_grid_width, int weight_grid_height, + weighted_sample* pWeights) // there will be block_width * block_height bilinear samples + { + const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1); + const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1); + + for (int texelY = 0; texelY < block_height; texelY++) + { + for (int texelX = 0; texelX < block_width; texelX++) + { + const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6; + const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6; + const uint32_t jX = gX >> 4; + const uint32_t jY = gY >> 4; + const uint32_t fX = gX & 0xf; + const uint32_t fY = gY & 0xf; + const uint32_t w11 = (fX * fY + 8) >> 4; + const uint32_t w10 = fY - w11; + const uint32_t w01 = fX - w11; + const uint32_t w00 = 16 - fX - fY + w11; + + weighted_sample& s = pWeights[texelX + texelY * block_width]; + s.m_src_x = (uint8_t)jX; + s.m_src_y = (uint8_t)jY; + s.m_weights[0][0] = (uint8_t)w00; + s.m_weights[0][1] = (uint8_t)w01; + s.m_weights[1][0] = (uint8_t)w10; + s.m_weights[1][1] = (uint8_t)w11; + } + } + } + + // Should be dequantized [0,64] weights + static void upsample_weight_grid( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights) // [by][bx] + { + assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12)); + assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by)); + + const uint32_t total_src_weights = wx * wy; + const uint32_t total_dst_weights = bx * by; + + if (total_src_weights == total_dst_weights) + { + memcpy(pDst_weights, pSrc_weights, total_src_weights); + return; + } + + weighted_sample weights[12 * 12]; + compute_upsample_weights(bx, by, wx, wy, weights); + + const weighted_sample* pS = weights; + + for (uint32_t y = 0; y < by; y++) + { + for (uint32_t x = 0; x < bx; x++, ++pS) + { + const uint32_t w00 = pS->m_weights[0][0]; + const uint32_t w01 = pS->m_weights[0][1]; + const uint32_t w10 = pS->m_weights[1][0]; + const uint32_t w11 = pS->m_weights[1][1]; + + assert(w00 || w01 || w10 || w11); + + const uint32_t sx = pS->m_src_x, sy = pS->m_src_y; + + uint32_t total = 8; + if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00; + if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01; + if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10; + if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11; + + pDst_weights[x + y * bx] = (uint8_t)(total >> 4); + } + } + } + + inline uint32_t hash52(uint32_t v) + { + uint32_t p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; + } + + int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block) + { + assert(zIn == 0); + + const uint32_t x = small_block ? xIn << 1 : xIn; + const uint32_t y = small_block ? yIn << 1 : yIn; + const uint32_t z = small_block ? zIn << 1 : zIn; + const uint32_t seed = seedIn + 1024 * (num_partitions - 1); + const uint32_t rnum = hash52(seed); + + uint8_t seed1 = (uint8_t)(rnum & 0xf); + uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf); + uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf); + uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf); + uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf); + uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf); + uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf); + uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf); + uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf); + uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf); + uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf); + uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf); + + seed1 = (uint8_t)(seed1 * seed1); + seed2 = (uint8_t)(seed2 * seed2); + seed3 = (uint8_t)(seed3 * seed3); + seed4 = (uint8_t)(seed4 * seed4); + seed5 = (uint8_t)(seed5 * seed5); + seed6 = (uint8_t)(seed6 * seed6); + seed7 = (uint8_t)(seed7 * seed7); + seed8 = (uint8_t)(seed8 * seed8); + seed9 = (uint8_t)(seed9 * seed9); + seed10 = (uint8_t)(seed10 * seed10); + seed11 = (uint8_t)(seed11 * seed11); + seed12 = (uint8_t)(seed12 * seed12); + + const int shA = (seed & 2) != 0 ? 4 : 5; + const int shB = (num_partitions == 3) ? 6 : 5; + const int sh1 = (seed & 1) != 0 ? shA : shB; + const int sh2 = (seed & 1) != 0 ? shB : shA; + const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2; + + seed1 = (uint8_t)(seed1 >> sh1); + seed2 = (uint8_t)(seed2 >> sh2); + seed3 = (uint8_t)(seed3 >> sh1); + seed4 = (uint8_t)(seed4 >> sh2); + seed5 = (uint8_t)(seed5 >> sh1); + seed6 = (uint8_t)(seed6 >> sh2); + seed7 = (uint8_t)(seed7 >> sh1); + seed8 = (uint8_t)(seed8 >> sh2); + seed9 = (uint8_t)(seed9 >> sh3); + seed10 = (uint8_t)(seed10 >> sh3); + seed11 = (uint8_t)(seed11 >> sh3); + seed12 = (uint8_t)(seed12 >> sh3); + + const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14)); + const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10)); + const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0; + const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0; + + return (a >= b && a >= c && a >= d) ? 0 + : (b >= c && b >= d) ? 1 + : (c >= d) ? 2 + : 3; + } + + static uint32_t g_texel_partitions_4x4[1024][2]; + + void precompute_texel_partitions_4x4() + { + for (uint32_t p = 0; p < 1024; p++) + { + uint32_t v2 = 0, v3 = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t shift = x * 2 + y * 8; + v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift); + v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift); + } + } + + g_texel_partitions_4x4[p][0] = v2; + g_texel_partitions_4x4[p][1] = v3; + } + } + + static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions) + { + assert(g_texel_partitions_4x4[1][0]); + assert(seed < 1024); + assert((x <= 3) && (y <= 3)); + assert((num_partitions >= 2) && (num_partitions <= 3)); + + const uint32_t shift = x * 2 + y * 8; + return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3; + } + + void blue_contract( + int r, int g, int b, int a, + int &dr, int &dg, int &db, int &da) + { + dr = (r + b) >> 1; + dg = (g + b) >> 1; + db = b; + da = a; + } + + inline void bit_transfer_signed(int& a, int& b) + { + b >>= 1; + b |= (a & 0x80); + a >>= 1; + a &= 0x3F; + if ((a & 0x20) != 0) + a -= 0x40; + } + + static inline int clamp(int a, int l, int h) + { + if (a < l) + a = l; + else if (a > h) + a = h; + return a; + } + + static inline float clampf(float a, float l, float h) + { + if (a < l) + a = l; + else if (a > h) + a = h; + return a; + } + + inline int sign_extend(int src, int num_src_bits) + { + assert((num_src_bits >= 2) && (num_src_bits <= 31)); + + const bool negative = (src & (1 << (num_src_bits - 1))) != 0; + if (negative) + return src | ~((1 << num_src_bits) - 1); + else + return src & ((1 << num_src_bits) - 1); + } + + // endpoints is [4][2] + void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE) + { + assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA); + + int v0 = pE[0], v1 = pE[1]; + + int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0]; + int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1]; + + switch (cem_index) + { + case CEM_LDR_LUM_DIRECT: + { + e0_r = v0; e1_r = v1; + e0_g = v0; e1_g = v1; + e0_b = v0; e1_b = v1; + e0_a = 0xFF; e1_a = 0xFF; + break; + } + case CEM_LDR_LUM_BASE_PLUS_OFS: + { + int l0 = (v0 >> 2) | (v1 & 0xc0); + int l1 = l0 + (v1 & 0x3f); + + if (l1 > 0xFF) + l1 = 0xFF; + + e0_r = l0; e1_r = l1; + e0_g = l0; e1_g = l1; + e0_b = l0; e1_b = l1; + e0_a = 0xFF; e1_a = 0xFF; + break; + } + case CEM_LDR_LUM_ALPHA_DIRECT: + { + int v2 = pE[2], v3 = pE[3]; + + e0_r = v0; e1_r = v1; + e0_g = v0; e1_g = v1; + e0_b = v0; e1_b = v1; + e0_a = v2; e1_a = v3; + break; + } + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + { + int v2 = pE[2], v3 = pE[3]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + + e0_r = v0; e1_r = v0 + v1; + e0_g = v0; e1_g = v0 + v1; + e0_b = v0; e1_b = v0 + v1; + e0_a = v2; e1_a = v2 + v3; + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_LDR_RGB_BASE_SCALE: + { + int v2 = pE[2], v3 = pE[3]; + + e0_r = (v0 * v3) >> 8; e1_r = v0; + e0_g = (v1 * v3) >> 8; e1_g = v1; + e0_b = (v2 * v3) >> 8; e1_b = v2; + e0_a = 0xFF; e1_a = 0xFF; + + break; + } + case CEM_LDR_RGB_DIRECT: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + if ((v1 + v3 + v5) >= (v0 + v2 + v4)) + { + e0_r = v0; e1_r = v1; + e0_g = v2; e1_g = v3; + e0_b = v4; e1_b = v5; + e0_a = 0xFF; e1_a = 0xFF; + } + else + { + blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); + } + + break; + } + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + bit_transfer_signed(v5, v4); + + if ((v1 + v3 + v5) >= 0) + { + e0_r = v0; e1_r = v0 + v1; + e0_g = v2; e1_g = v2 + v3; + e0_b = v4; e1_b = v4 + v5; + e0_a = 0xFF; e1_a = 0xFF; + } + else + { + blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); + } + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + e0_r = (v0 * v3) >> 8; e1_r = v0; + e0_g = (v1 * v3) >> 8; e1_g = v1; + e0_b = (v2 * v3) >> 8; e1_b = v2; + e0_a = v4; e1_a = v5; + + break; + } + case CEM_LDR_RGBA_DIRECT: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; + + if ((v1 + v3 + v5) >= (v0 + v2 + v4)) + { + e0_r = v0; e1_r = v1; + e0_g = v2; e1_g = v3; + e0_b = v4; e1_b = v5; + e0_a = v6; e1_a = v7; + } + else + { + blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); + } + + break; + } + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + bit_transfer_signed(v5, v4); + bit_transfer_signed(v7, v6); + + if ((v1 + v3 + v5) >= 0) + { + e0_r = v0; e1_r = v0 + v1; + e0_g = v2; e1_g = v2 + v3; + e0_b = v4; e1_b = v4 + v5; + e0_a = v6; e1_a = v6 + v7; + } + else + { + blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); + } + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_HDR_LUM_LARGE_RANGE: + { + int y0, y1; + if (v1 >= v0) + { + y0 = (v0 << 4); + y1 = (v1 << 4); + } + else + { + y0 = (v1 << 4) + 8; + y1 = (v0 << 4) - 8; + } + + e0_r = y0; e1_r = y1; + e0_g = y0; e1_g = y1; + e0_b = y0; e1_b = y1; + e0_a = 0x780; e1_a = 0x780; + + break; + } + case CEM_HDR_LUM_SMALL_RANGE: + { + int y0, y1, d; + + if ((v0 & 0x80) != 0) + { + y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2); + d = (v1 & 0x1F) << 2; + } + else + { + y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1); + d = (v1 & 0x0F) << 1; + } + + y1 = y0 + d; + if (y1 > 0xFFF) + y1 = 0xFFF; + + e0_r = y0; e1_r = y1; + e0_g = y0; e1_g = y1; + e0_b = y0; e1_b = y1; + e0_a = 0x780; e1_a = 0x780; + + break; + } + case CEM_HDR_RGB_BASE_SCALE: + { + int v2 = pE[2], v3 = pE[3]; + + int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); + + int majcomp, mode; + if ((modeval & 0xC) != 0xC) + { + majcomp = modeval >> 2; + mode = modeval & 3; + } + else if (modeval != 0xF) + { + majcomp = modeval & 3; + mode = 4; + } + else + { + majcomp = 0; + mode = 5; + } + + int red = v0 & 0x3f; + int green = v1 & 0x1f; + int blue = v2 & 0x1f; + int scale = v3 & 0x1f; + + int x0 = (v1 >> 6) & 1; + int x1 = (v1 >> 5) & 1; + int x2 = (v2 >> 6) & 1; + int x3 = (v2 >> 5) & 1; + int x4 = (v3 >> 7) & 1; + int x5 = (v3 >> 6) & 1; + int x6 = (v3 >> 5) & 1; + + int ohm = 1 << mode; + if (ohm & 0x30) green |= x0 << 6; + if (ohm & 0x3A) green |= x1 << 5; + if (ohm & 0x30) blue |= x2 << 6; + if (ohm & 0x3A) blue |= x3 << 5; + if (ohm & 0x3D) scale |= x6 << 5; + if (ohm & 0x2D) scale |= x5 << 6; + if (ohm & 0x04) scale |= x4 << 7; + if (ohm & 0x3B) red |= x4 << 6; + if (ohm & 0x04) red |= x3 << 6; + if (ohm & 0x10) red |= x5 << 7; + if (ohm & 0x0F) red |= x2 << 7; + if (ohm & 0x05) red |= x1 << 8; + if (ohm & 0x0A) red |= x0 << 8; + if (ohm & 0x05) red |= x0 << 9; + if (ohm & 0x02) red |= x6 << 9; + if (ohm & 0x01) red |= x3 << 10; + if (ohm & 0x02) red |= x5 << 10; + + static const int s_shamts[6] = { 1,1,2,3,4,5 }; + + const int shamt = s_shamts[mode]; + red <<= shamt; + green <<= shamt; + blue <<= shamt; + scale <<= shamt; + + if (mode != 5) + { + green = red - green; + blue = red - blue; + } + + if (majcomp == 1) + std::swap(red, green); + + if (majcomp == 2) + std::swap(red, blue); + + e1_r = clamp(red, 0, 0xFFF); + e1_g = clamp(green, 0, 0xFFF); + e1_b = clamp(blue, 0, 0xFFF); + e1_a = 0x780; + + e0_r = clamp(red - scale, 0, 0xFFF); + e0_g = clamp(green - scale, 0, 0xFFF); + e0_b = clamp(blue - scale, 0, 0xFFF); + e0_a = 0x780; + + break; + } + case CEM_HDR_RGB_HDR_ALPHA: + case CEM_HDR_RGB_LDR_ALPHA: + case CEM_HDR_RGB: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6); + + e0_a = 0x780; + e1_a = 0x780; + + if (majcomp == 3) + { + e0_r = v0 << 4; + e0_g = v2 << 4; + e0_b = (v4 & 0x7f) << 5; + + e1_r = v1 << 4; + e1_g = v3 << 4; + e1_b = (v5 & 0x7f) << 5; + } + else + { + int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5); + int va = v0 | ((v1 & 0x40) << 2); + int vb0 = v2 & 0x3f; + int vb1 = v3 & 0x3f; + int vc = v1 & 0x3f; + int vd0 = v4 & 0x7f; + int vd1 = v5 & 0x7f; + + static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 }; + vd0 = sign_extend(vd0, s_dbitstab[mode]); + vd1 = sign_extend(vd1, s_dbitstab[mode]); + + int x0 = (v2 >> 6) & 1; + int x1 = (v3 >> 6) & 1; + int x2 = (v4 >> 6) & 1; + int x3 = (v5 >> 6) & 1; + int x4 = (v4 >> 5) & 1; + int x5 = (v5 >> 5) & 1; + + int ohm = 1 << mode; + if (ohm & 0xA4) va |= x0 << 9; + if (ohm & 0x08) va |= x2 << 9; + if (ohm & 0x50) va |= x4 << 9; + if (ohm & 0x50) va |= x5 << 10; + if (ohm & 0xA0) va |= x1 << 10; + if (ohm & 0xC0) va |= x2 << 11; + if (ohm & 0x04) vc |= x1 << 6; + if (ohm & 0xE8) vc |= x3 << 6; + if (ohm & 0x20) vc |= x2 << 7; + if (ohm & 0x5B) vb0 |= x0 << 6; + if (ohm & 0x5B) vb1 |= x1 << 6; + if (ohm & 0x12) vb0 |= x2 << 7; + if (ohm & 0x12) vb1 |= x3 << 7; + + int shamt = (mode >> 1) ^ 3; + va = (uint32_t)va << shamt; + vb0 = (uint32_t)vb0 << shamt; + vb1 = (uint32_t)vb1 << shamt; + vc = (uint32_t)vc << shamt; + vd0 = (uint32_t)vd0 << shamt; + vd1 = (uint32_t)vd1 << shamt; + + e1_r = clamp(va, 0, 0xFFF); + e1_g = clamp(va - vb0, 0, 0xFFF); + e1_b = clamp(va - vb1, 0, 0xFFF); + + e0_r = clamp(va - vc, 0, 0xFFF); + e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF); + e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF); + + if (majcomp == 1) + { + std::swap(e0_r, e0_g); + std::swap(e1_r, e1_g); + } + else if (majcomp == 2) + { + std::swap(e0_r, e0_b); + std::swap(e1_r, e1_b); + } + } + + if (cem_index == CEM_HDR_RGB_LDR_ALPHA) + { + int v6 = pE[6], v7 = pE[7]; + + e0_a = v6; + e1_a = v7; + } + else if (cem_index == CEM_HDR_RGB_HDR_ALPHA) + { + int v6 = pE[6], v7 = pE[7]; + + // Extract mode bits + int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2); + v6 &= 0x7F; + v7 &= 0x7F; + + if (mode == 3) + { + e0_a = v6 << 5; + e1_a = v7 << 5; + } + else + { + v6 |= (v7 << (mode + 1)) & 0x780; + v7 &= (0x3F >> mode); + v7 ^= (0x20 >> mode); + v7 -= (0x20 >> mode); + v6 <<= (4 - mode); + v7 <<= (4 - mode); + + v7 += v6; + v7 = clamp(v7, 0, 0xFFF); + e0_a = v6; + e1_a = v7; + } + } + + break; + } + default: + { + assert(0); + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = 0; + pEndpoints[c][1] = 0; + } + break; + } + } + } + + static inline bool is_half_inf_or_nan(half_float v) + { + return get_bits(v, 10, 14) == 31; + } + + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + half_float float_to_half(float val, bool toward_zero) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; + int s = flt_s, e = 0, m = 0; + + // inf/NaN + if (flt_e == 0xff) + { + e = 31; + if (flt_m != 0) // NaN + m = 1; + } + // not zero or denormal + else if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + { + if (toward_zero) + m = (int)truncf((1 << 24) * fabsf(fi.f)); + else + m = lrintf((1 << 24) * fabsf(fi.f)); + } + else + { + e = new_exp + 15; + if (toward_zero) + m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13))); + else + m = lrintf((float)flt_m * (1.0f / (float)(1 << 13))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((s >= 0) && (s <= 1)); + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + half_float result = (half_float)((s << 15) | (e << 10) | m); + return result; + } + + float half_to_float(half_float hval) + { + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; + } + + static inline half_float qlog16_to_half(int k) + { + assert((k >= 0) && (k <= 0xFFFF)); + + int E = (k & 0xF800) >> 11; + int M = k & 0x7FF; + + int Mt; + if (M < 512) + Mt = 3 * M; + else if (M >= 1536) + Mt = 5 * M - 2048; + else + Mt = 4 * M - 512; + + return (half_float)((E << 10) + (Mt >> 3)); + } + + // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt + const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31; + const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS); + const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS); + const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1); + //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP)); + const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS)); + + void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b) + { + int x = packed & 511; + int y = (packed >> 9) & 511; + int z = (packed >> 18) & 511; + int w = (packed >> 27) & 31; + + const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); + + r = x * scale; + g = y * scale; + b = z * scale; + } + + // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases. + static inline int floor_log2(float x) + { + union float754 + { + unsigned int raw; + float value; + }; + + float754 f; + f.value = x; + // Extract float exponent + return ((f.raw >> 23) & 0xFF) - 127; + } + + static inline int maximumi(int a, int b) { return (a > b) ? a : b; } + static inline float maximumf(float a, float b) { return (a > b) ? a : b; } + + uint32_t pack_rgb9e5(float r, float g, float b) + { + r = clampf(r, 0.0f, MAX_RGB9E5); + g = clampf(g, 0.0f, MAX_RGB9E5); + b = clampf(b, 0.0f, MAX_RGB9E5); + + float maxrgb = maximumf(maximumf(r, g), b); + int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS; + assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP)); + + float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); + + int maxm = (int)floorf((maxrgb / denom) + 0.5f); + if (maxm == (MAX_RGB9E5_MANTISSA + 1)) + { + denom *= 2; + exp_shared += 1; + assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); + } + else + { + assert(maxm <= MAX_RGB9E5_MANTISSA); + } + + int rm = (int)floorf((r / denom) + 0.5f); + int gm = (int)floorf((g / denom) + 0.5f); + int bm = (int)floorf((b / denom) + 0.5f); + + assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA)); + assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA)); + assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA)); + + return rm | (gm << 9) | (bm << 18) | (exp_shared << 27); + } + + static inline int clz17(uint32_t x) + { + assert(x <= 0x1FFFF); + x &= 0x1FFFF; + + if (!x) + return 17; + + uint32_t n = 0; + while ((x & 0x10000) == 0) + { + x <<= 1u; + n++; + } + + return n; + } + + static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb) + { + int lz = clz17(Cr | Cg | Cb | 1); + if (Cr == 65535) { Cr = 65536; lz = 0; } + if (Cg == 65535) { Cg = 65536; lz = 0; } + if (Cb == 65535) { Cb = 65536; lz = 0; } + Cr <<= lz; Cg <<= lz; Cb <<= lz; + Cr = (Cr >> 8) & 0x1FF; + Cg = (Cg >> 8) & 0x1FF; + Cb = (Cb >> 8) & 0x1FF; + uint32_t exponent = 16 - lz; + uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr; + return texel; + } + + static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb) + { + if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff; + if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff; + if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff; + int Re = (Cr >> 10) & 0x1F; + int Ge = (Cg >> 10) & 0x1F; + int Be = (Cb >> 10) & 0x1F; + int Rex = (Re == 0) ? 1 : Re; + int Gex = (Ge == 0) ? 1 : Ge; + int Bex = (Be == 0) ? 1 : Be; + int Xm = ((Cr | Cg | Cb) & 0x200) >> 9; + int Xe = Re | Ge | Be; + uint32_t rshift, gshift, bshift, expo; + + if (Xe == 0) + { + expo = rshift = gshift = bshift = Xm; + } + else if (Re >= Ge && Re >= Be) + { + expo = Rex + 1; + rshift = 2; + gshift = Rex - Gex + 2; + bshift = Rex - Bex + 2; + } + else if (Ge >= Be) + { + expo = Gex + 1; + rshift = Gex - Rex + 2; + gshift = 2; + bshift = Gex - Bex + 2; + } + else + { + expo = Bex + 1; + rshift = Bex - Rex + 2; + gshift = Bex - Gex + 2; + bshift = 2; + } + + int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400); + int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400); + int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400); + Rm = (Rm >> rshift) & 0x1FF; + Gm = (Gm >> gshift) & 0x1FF; + Bm = (Bm >> bshift) & 0x1FF; + + uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0); + return texel; + } + + // Important: pPixels is either 32-bit/texel or 64-bit/texel. + bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode) + { + assert(is_valid_block_size(blk_width, blk_height)); + + assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); + if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()) + return false; + + const uint32_t num_blk_pixels = blk_width * blk_height; + + // Write block error color + if (dec_mode == cDecodeModeHDR16) + { + // NaN's + memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = purple_9e5; + } + else + { + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = 0xFFFF00FF; + } + + if (log_blk.m_error_flag) + { + // Should this return false? It's not an invalid logical block config, though. + return false; + } + + // Handle solid color blocks + if (log_blk.m_solid_color_flag_ldr) + { + // LDR solid block + if (dec_mode == cDecodeModeHDR16) + { + // Convert LDR pixels to half-float + half_float h[4]; + for (uint32_t c = 0; c < 4; c++) + h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f)); + float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f)); + float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f)); + + const uint32_t packed = pack_rgb9e5(r, g, b); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = packed; + } + else + { + // Convert LDR pixels to 8-bits + for (uint32_t i = 0; i < num_blk_pixels; i++) + for (uint32_t c = 0; c < 4; c++) + ((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8); + } + + return true; + } + else if (log_blk.m_solid_color_flag_hdr) + { + // HDR solid block, decode mode must be half-float or RGB9E5 + if (dec_mode == cDecodeModeHDR16) + { + for (uint32_t i = 0; i < num_blk_pixels; i++) + memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + float r = half_to_float(log_blk.m_solid_color[0]); + float g = half_to_float(log_blk.m_solid_color[1]); + float b = half_to_float(log_blk.m_solid_color[2]); + + const uint32_t packed = pack_rgb9e5(r, g, b); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = packed; + } + else + { + return false; + } + + return true; + } + + // Sanity check block's config + if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2)) + return false; + if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) + return false; + + if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) + return false; + if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)) + return false; + if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS)) + return false; + if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS)) + return false; + if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS) + return false; + if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0)) + return false; + if (log_blk.m_color_component_selector > 3) + return false; + + const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range); + const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range); + + bool is_ldr_endpoints[MAX_PARTITIONS]; + + // Check CEM's + uint32_t total_cem_vals = 0; + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + { + if (log_blk.m_color_endpoint_modes[i] > 15) + return false; + + total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]); + + is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]); + } + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range); + const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data(); + + // Dequantized endpoints to [0,255] + uint8_t dequantized_endpoints[MAX_ENDPOINTS]; + for (uint32_t i = 0; i < total_cem_vals; i++) + { + if (log_blk.m_endpoints[i] >= total_endpoint_levels) + return false; + dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]]; + } + + // Dequantize weights to [0,64] + uint8_t dequantized_weights[2][12 * 12]; + + const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); + const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); + + const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height; + for (uint32_t i = 0; i < total_weight_vals; i++) + { + if (log_blk.m_weights[i] >= total_weight_levels) + return false; + + const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0; + const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i; + + dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]]; + } + + // Upsample weight grid. [0,64] weights + uint8_t upsampled_weights[2][12 * 12]; + + upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]); + if (log_blk.m_dual_plane) + upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]); + + // Decode CEM's + int endpoints[4][4][2]; // [subset][comp][l/h] + + uint32_t endpoint_val_index = 0; + for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++) + { + const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset]; + + decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]); + + endpoint_val_index += get_num_cem_values(cem_index); + } + + // Decode texels + const bool small_block = num_blk_pixels < 31; + const bool use_precomputed_texel_partitions = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3); + const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX; + + bool success = true; + + if (dec_mode == cDecodeModeRGB9E5) + { + // returns uint32_t's + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + const uint32_t subset = (log_blk.m_num_partitions > 1) ? + (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)) + : 0; + + int comp[3]; + + for (uint32_t c = 0; c < 3; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + if (is_ldr_endpoints[subset]) + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF)); + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + le = (le << 8) | le; + he = (he << 8) | he; + + int k = weight_interpolate(le, he, w); + assert((k >= 0) && (k <= 0xFFFF)); + + comp[c] = k; // 1.0 + } + else + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF)); + + int le = endpoints[subset][c][0] << 4; + int he = endpoints[subset][c][1] << 4; + + int qlog16 = weight_interpolate(le, he, w); + + comp[c] = qlog16_to_half(qlog16); + + if (is_half_inf_or_nan((half_float)comp[c])) + comp[c] = 0x7BFF; + } + + } // c + + uint32_t packed; + if (is_ldr_endpoints[subset]) + packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]); + else + packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]); + + ((uint32_t*)pPixels)[pixel_index] = packed; + + } // x + } // y + } + else if (dec_mode == cDecodeModeHDR16) + { + // Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application) + + // returns half floats + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + const uint32_t subset = (log_blk.m_num_partitions > 1) ? + (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)) + : 0; + + for (uint32_t c = 0; c < 4; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + half_float o; + + if ( (is_ldr_endpoints[subset]) || + ((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) ) + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF)); + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + le = (le << 8) | le; + he = (he << 8) | he; + + int k = weight_interpolate(le, he, w); + assert((k >= 0) && (k <= 0xFFFF)); + + if (k == 0xFFFF) + o = 0x3C00; // 1.0 + else + o = float_to_half((float)k * (1.0f / 65536.0f), true); + } + else + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF)); + + int le = endpoints[subset][c][0] << 4; + int he = endpoints[subset][c][1] << 4; + + int qlog16 = weight_interpolate(le, he, w); + + o = qlog16_to_half(qlog16); + + if (is_half_inf_or_nan(o)) + o = 0x7BFF; + } + + ((half_float*)pPixels)[pixel_index * 4 + c] = o; + } + + } // x + } // y + } + else + { + // returns uint8_t's + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t subset = (log_blk.m_num_partitions > 1) ? + (use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)) + : 0; + + if (!is_ldr_endpoints[subset]) + { + ((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF; + success = false; + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + // FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder + //if ((dec_mode == cDecodeModeSRGB8) && (c <= 2)) + // See https://github.com/ARM-software/astc-encoder/issues/447 + if (dec_mode == cDecodeModeSRGB8) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + uint32_t k = weight_interpolate(le, he, w); + + // FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does + // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16. + // It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit. + ((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8); + } + } + + } // x + } // y + } + + return success; + } + + //------------------------------------------------ + // Physical to logical block decoding + + // unsigned 128-bit int, with some signed helpers + class uint128 + { + uint64_t m_lo, m_hi; + + public: + uint128() = default; + inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { } + inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { } + inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { } + + inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; } + inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; } + + inline explicit operator uint8_t () const { return (uint8_t)m_lo; } + inline explicit operator uint16_t () const { return (uint16_t)m_lo; } + inline explicit operator uint32_t () const { return (uint32_t)m_lo; } + inline explicit operator uint64_t () const { return m_lo; } + + inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; } + inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; } + + inline uint64_t get_low() const { return m_lo; } + inline uint64_t& get_low() { return m_lo; } + + inline uint64_t get_high() const { return m_hi; } + inline uint64_t& get_high() { return m_hi; } + + inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); } + inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); } + + inline bool operator< (const uint128& rhs) const + { + if (m_hi < rhs.m_hi) + return true; + + if (m_hi == rhs.m_hi) + { + if (m_lo < rhs.m_lo) + return true; + } + + return false; + } + + inline bool operator> (const uint128& rhs) const { return (rhs < *this); } + + inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); } + inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); } + + inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); } + inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); } + inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); } + inline explicit operator bool() const { return is_non_zero(); } + inline bool is_signed() const { return ((int64_t)m_hi) < 0; } + + inline bool signed_less(const uint128& rhs) const + { + const bool l_signed = is_signed(), r_signed = rhs.is_signed(); + + if (l_signed == r_signed) + return *this < rhs; + + if (l_signed && !r_signed) + return true; + + assert(!l_signed && r_signed); + return false; + } + + inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); } + inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); } + inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); } + + double get_double() const + { + double res = 0; + + if (m_hi) + res = (double)m_hi * pow(2.0f, 64.0f); + + res += (double)m_lo; + + return res; + } + + double get_signed_double() const + { + if (is_signed()) + return -(uint128(*this).abs().get_double()); + else + return get_double(); + } + + inline uint128 abs() const + { + uint128 res(*this); + if (res.is_signed()) + res = -res; + return res; + } + + inline uint128& operator<<= (int shift) + { + assert(shift >= 0); + if (shift < 0) + return *this; + + m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift); + + if ((shift) && (shift < 64)) + m_hi |= (m_lo >> (64 - shift)); + + m_lo = (shift >= 64) ? 0 : (m_lo << shift); + + return *this; + } + + inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; } + + inline uint128& operator>>= (int shift) + { + assert(shift >= 0); + if (shift < 0) + return *this; + + m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift); + + if ((shift) && (shift < 64)) + m_lo |= (m_hi << (64 - shift)); + + m_hi = (shift >= 64) ? 0 : (m_hi >> shift); + + return *this; + } + + inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; } + + inline uint128 signed_shift_right(int shift) const + { + uint128 res(*this); + res >>= shift; + + if (is_signed()) + { + uint128 x(0U); + x = ~x; + x >>= shift; + res |= (~x); + } + + return res; + } + + inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; } + inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; } + + inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; } + inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; } + + inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; } + inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; } + + inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); } + + inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; } + + // prefix + inline uint128 operator ++() + { + if (++m_lo == 0) + ++m_hi; + return *this; + } + + // postfix + inline uint128 operator ++(int) + { + uint128 res(*this); + if (++m_lo == 0) + ++m_hi; + return res; + } + + // prefix + inline uint128 operator --() + { + const uint64_t t = m_lo; + if (--m_lo > t) + --m_hi; + return *this; + } + + // postfix + inline uint128 operator --(int) + { + const uint64_t t = m_lo; + uint128 res(*this); + if (--m_lo > t) + --m_hi; + return res; + } + + inline uint128& operator+= (const uint128& rhs) + { + const uint64_t t = m_lo + rhs.m_lo; + m_hi = m_hi + rhs.m_hi + (t < m_lo); + m_lo = t; + return *this; + } + + inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; } + + inline uint128& operator-= (const uint128& rhs) + { + const uint64_t t = m_lo - rhs.m_lo; + m_hi = m_hi - rhs.m_hi - (t > m_lo); + m_lo = t; + return *this; + } + + inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; } + + // computes bit by bit, very slow + uint128& operator*=(const uint128& rhs) + { + uint128 temp(*this), result(0U); + + for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1) + if (bitmask.get_low() & 1) + result += temp; + + *this = result; + return *this; + } + + uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; } + + // computes bit by bit, very slow + friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder) + { + remainder = 0; + + if (!divisor) + { + assert(0); + return ~uint128(0U); + } + + uint128 quotient(0), one(1); + + for (int i = 127; i >= 0; i--) + { + remainder = (remainder << 1) | ((dividend >> i) & one); + if (remainder >= divisor) + { + remainder -= divisor; + quotient |= (one << i); + } + } + + return quotient; + } + + uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; } + uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; } + + uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; } + uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; } + + void print_hex(FILE* pFile) const + { + fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo); + } + + void format_unsigned(std::string& res) const + { + basisu::vector<uint8_t> digits; + digits.reserve(39 + 1); + + uint128 k(*this), ten(10); + do + { + uint128 r; + k = divide(k, ten, r); + digits.push_back((uint8_t)r); + } while (k); + + for (int i = (int)digits.size() - 1; i >= 0; i--) + res += ('0' + digits[i]); + } + + void format_signed(std::string& res) const + { + uint128 val(*this); + + if (val.is_signed()) + { + res.push_back('-'); + val = -val; + } + + val.format_unsigned(res); + } + + void print_unsigned(FILE* pFile) + { + std::string str; + format_unsigned(str); + fprintf(pFile, "%s", str.c_str()); + } + + void print_signed(FILE* pFile) + { + std::string str; + format_signed(str); + fprintf(pFile, "%s", str.c_str()); + } + + uint128 get_reversed_bits() const + { + uint128 res; + + const uint32_t* pSrc = (const uint32_t*)this; + uint32_t* pDst = (uint32_t*)&res; + + pDst[0] = rev_dword(pSrc[3]); + pDst[1] = rev_dword(pSrc[2]); + pDst[2] = rev_dword(pSrc[1]); + pDst[3] = rev_dword(pSrc[0]); + + return res; + } + + uint128 get_byteswapped() const + { + uint128 res; + + const uint8_t* pSrc = (const uint8_t*)this; + uint8_t* pDst = (uint8_t*)&res; + + for (uint32_t i = 0; i < 16; i++) + pDst[i] = pSrc[15 - i]; + + return res; + } + + inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const + { + assert(bit_ofs < 128); + assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128)); + + uint128 res(*this); + res >>= bit_ofs; + + const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1); + return res.get_low() & bitmask; + } + + inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const + { + assert(bit_len <= 32); + return (uint32_t)get_bits64(bit_ofs, bit_len); + } + + inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const + { + assert(len && (len <= 32)); + uint32_t x = get_bits(bit_ofs, len); + bit_ofs += len; + return x; + } + + inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits) + { + assert(bit_ofs < 128); + assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128)); + + uint128 bitmask(1); + bitmask = (bitmask << num_bits) - 1; + assert(uint128(val) <= bitmask); + + bitmask <<= bit_ofs; + *this &= ~bitmask; + + *this = *this | (uint128(val) << bit_ofs); + return *this; + } + }; + + static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk) + { + if (bits.get_bits(10, 2) != 0b11) + return false; + + uint32_t bit_ofs = 12; + const uint32_t min_s = bits.next_bits(bit_ofs, 13); + const uint32_t max_s = bits.next_bits(bit_ofs, 13); + const uint32_t min_t = bits.next_bits(bit_ofs, 13); + const uint32_t max_t = bits.next_bits(bit_ofs, 13); + assert(bit_ofs == 64); + + const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF); + + if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t))) + return false; + + const bool hdr_flag = bits.get_bits(9, 1) != 0; + + if (hdr_flag) + log_blk.m_solid_color_flag_hdr = true; + else + log_blk.m_solid_color_flag_ldr = true; + + log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16); + log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16); + log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16); + log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16); + + if (log_blk.m_solid_color_flag_hdr) + { + for (uint32_t c = 0; c < 4; c++) + if (is_half_inf_or_nan(log_blk.m_solid_color[c])) + return false; + } + + return true; + } + + struct astc_dec_row + { + int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs; + }; + + static const astc_dec_row s_dec_rows[10] = + { + // Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs; + { 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2 + { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2 + { 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8 + { 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6 + + { 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2 + { 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2 + { 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12 + { 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10 + + { 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6 + { -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6 + }; + + static bool decode_config(const uint128& bits, log_astc_block& log_blk) + { + // Reserved + if (bits.get_bits(0, 4) == 0) + return false; + + // Reserved + if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111)) + { + if (bits.get_bits(2, 4) != 0b1111) + return false; + } + + // Void extent + if (bits.get_bits(0, 9) == 0b111111100) + return decode_void_extent(bits, log_blk); + + // Check rows + const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2); + const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1); + const uint32_t x7_2 = bits.get_bits(7, 2); + + int row_index = -1; + if (x0_2 == 0) + { + if (x7_2 == 0b00) + row_index = 5; + else if (x7_2 == 0b01) + row_index = 6; + else if (x5_4 == 0b1100) + row_index = 7; + else if (x5_4 == 0b1101) + row_index = 8; + else if (x7_2 == 0b10) + row_index = 9; + } + else + { + if (x2_2 == 0b00) + row_index = 0; + else if (x2_2 == 0b01) + row_index = 1; + else if (x2_2 == 0b10) + row_index = 2; + else if ((x2_2 == 0b11) && (x8_1 == 0)) + row_index = 3; + else if ((x2_2 == 0b11) && (x8_1 == 1)) + row_index = 4; + } + if (row_index < 0) + return false; + + const astc_dec_row& r = s_dec_rows[row_index]; + + bool P = false, Dp = false; + uint32_t W = r.W_bias, H = r.H_bias; + + if (r.P_ofs >= 0) + P = bits.get_bits(r.P_ofs, 1) != 0; + + if (r.Dp_ofs >= 0) + Dp = bits.get_bits(r.Dp_ofs, 1) != 0; + + if (r.W_size) + W += bits.get_bits(r.W_ofs, r.W_size); + + if (r.H_size) + H += bits.get_bits(r.H_ofs, r.H_size); + + assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM)); + assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM)); + + int p0 = bits.get_bits(r.p0_ofs, 1); + int p1 = bits.get_bits(r.p1_ofs, 1); + int p2 = bits.get_bits(r.p2_ofs, 1); + + uint32_t p = p0 | (p1 << 1) | (p2 << 2); + if (p < 2) + return false; + + log_blk.m_grid_width = W; + log_blk.m_grid_height = H; + + log_blk.m_weight_ise_range = (p - 2) + (P * BISE_10_LEVELS); + assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); + + log_blk.m_dual_plane = Dp; + + return true; + } + + static inline uint32_t read_le_dword(const uint8_t* pBytes) + { + return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U); + } + + // See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs. + static const uint8_t s_trit_decode[256][5] = + { + {0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0}, + {0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0}, + {0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0}, + {0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2}, + {0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0}, + {0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0}, + {0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0}, + {0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2}, + {0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0}, + {0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0}, + {0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0}, + {0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2}, + {0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2}, + {0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2}, + {0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2}, + {0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2}, + {0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1}, + {0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1}, + {0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1}, + {0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2}, + {0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1}, + {0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1}, + {0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1}, + {0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2}, + {0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1}, + {0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1}, + {0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1}, + {0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2}, + {0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2}, + {0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2}, + {0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2}, + {0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2} + }; + + static const uint8_t s_quint_decode[128][3] = + { + {0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4}, + {0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4}, + {0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4}, + {0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4}, + {0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4}, + {0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4}, + {0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4}, + {0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4}, + {0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4}, + {0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4}, + {0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4}, + {0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4}, + {0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4}, + {0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4}, + {0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4}, + {0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4} + }; + + static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val) + { + assert((num_vals >= 1) && (num_vals <= 5)); + uint32_t m[5] = { 0 }, T = 0; + + static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 }; + + for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++) + { + if (bits_per_val) + m[c] = bits.next_bits(bit_ofs, bits_per_val); + T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs); + T_ofs += s_t_bits[c]; + } + + const uint8_t (&p_trits)[5] = s_trit_decode[T]; + + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]); + } + + static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val) + { + assert((num_vals >= 1) && (num_vals <= 3)); + uint32_t m[3] = { 0 }, T = 0; + + static const uint8_t s_t_bits[3] = { 3, 2, 2 }; + + for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++) + { + if (bits_per_val) + m[c] = bits.next_bits(bit_ofs, bits_per_val); + T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs); + T_ofs += s_t_bits[c]; + } + + const uint8_t (&p_quints)[3] = s_quint_decode[T]; + + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]); + } + + static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs) + { + assert(num_vals && (ise_range < TOTAL_ISE_RANGES)); + + const uint32_t bits_per_val = g_ise_range_table[ise_range][0]; + + if (g_ise_range_table[ise_range][1]) + { + // Trits+bits, 5 vals per block, 7 bits extra per block + const uint32_t total_blocks = (num_vals + 4) / 5; + for (uint32_t b = 0; b < total_blocks; b++) + { + const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5); + decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val); + } + } + else if (g_ise_range_table[ise_range][2]) + { + // Quints+bits, 3 vals per block, 8 bits extra per block + const uint32_t total_blocks = (num_vals + 2) / 3; + for (uint32_t b = 0; b < total_blocks; b++) + { + const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3); + decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val); + } + } + else + { + assert(bits_per_val); + + // Only bits + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val); + } + } + + void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs) + { + const uint128 bits( + (uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32), + (uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32)); + + return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs); + } + + // Decodes a physical ASTC block to a logical ASTC block. + // blk_width/blk_height are only used to validate the weight grid's dimensions. + bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height) + { + assert(is_valid_block_size(blk_width, blk_height)); + + const uint8_t* pS = (uint8_t*)pASTC_block; + + log_blk.clear(); + log_blk.m_error_flag = true; + + const uint128 bits( + (uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32), + (uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32)); + + const uint128 rev_bits(bits.get_reversed_bits()); + + if (!decode_config(bits, log_blk)) + return false; + + if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr) + { + // Void extent + log_blk.m_error_flag = false; + return true; + } + + // Check grid dimensions + if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) + return false; + + // Now we have the grid width/height, dual plane, weight ISE range + + const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height); + const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range); + + // 18.24 Illegal Encodings + if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) + return false; + + const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits; + + uint32_t total_extra_bits = 0; + + // Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane. + + log_blk.m_num_partitions = bits.get_bits(11, 2) + 1; + if (log_blk.m_num_partitions == 1) + log_blk.m_color_endpoint_modes[0] = bits.get_bits(13, 4); // read CEM bits + else + { + // 2 or more partitions + if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4)) + return false; + + log_blk.m_partition_id = bits.get_bits(13, 10); + + uint32_t cem_bits = bits.get_bits(23, 6); + + if ((cem_bits & 3) == 0) + { + // All CEM's the same + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + log_blk.m_color_endpoint_modes[i] = cem_bits >> 2; + } + else + { + // CEM's different, but within up to 2 adjacent classes + const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4; + + total_extra_bits = 3 * log_blk.m_num_partitions - 4; + + if ((total_weight_bits + total_extra_bits) > 128) + return false; + + uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits; + + uint32_t c[4] = { 0 }, m[4] = { 0 }; + + cem_bits >>= 2; + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1) + c[i] = cem_bits & 1; + + switch (log_blk.m_num_partitions) + { + case 2: + { + m[0] = cem_bits & 3; + m[1] = bits.next_bits(cem_bit_pos, 2); + break; + } + case 3: + { + m[0] = cem_bits & 1; + m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1); + m[1] = bits.next_bits(cem_bit_pos, 2); + m[2] = bits.next_bits(cem_bit_pos, 2); + break; + } + case 4: + { + for (uint32_t i = 0; i < 4; i++) + m[i] = bits.next_bits(cem_bit_pos, 2); + break; + } + default: + { + assert(0); + break; + } + } + + assert(cem_bit_pos == end_of_weight_bit_ofs); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + { + log_blk.m_color_endpoint_modes[i] = first_cem_index + (c[i] * 4) + m[i]; + assert(log_blk.m_color_endpoint_modes[i] <= 15); + } + } + } + + // Now we have all the CEM indices. + + if (log_blk.m_dual_plane) + { + // Read CCS bits, beneath any CEM bits + total_extra_bits += 2; + + if (total_extra_bits > end_of_weight_bit_ofs) + return false; + + uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits; + log_blk.m_color_component_selector = bits.get_bits(ccs_bit_pos, 2); + } + + uint32_t config_bit_pos = 11 + 2; // config+num_parts + if (log_blk.m_num_partitions == 1) + config_bit_pos += 4; // CEM bits + else + config_bit_pos += 10 + 6; // part_id+CEM bits + + // config+num_parts+total_extra_bits (CEM extra+CCS) + uint32_t total_config_bits = config_bit_pos + total_extra_bits; + + // Compute number of remaining bits in block + const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; + if (num_remaining_bits < 0) + return false; + + // Compute total number of ISE encoded color endpoint mode values + uint32_t total_cem_vals = 0; + for (uint32_t j = 0; j < log_blk.m_num_partitions; j++) + total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]); + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + // Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block + int endpoint_ise_range = -1; + for (int k = 20; k > 0; k--) + { + int b = get_ise_sequence_bits(total_cem_vals, k); + if (b <= num_remaining_bits) + { + endpoint_ise_range = k; + break; + } + } + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE) + return false; + + log_blk.m_endpoint_ise_range = endpoint_ise_range; + + // Decode endpoints forwards in block + decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos); + + // Decode grid weights backwards in block + decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0); + + log_blk.m_error_flag = false; + + return true; + } + +} // namespace astc_helpers + +#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION diff --git a/thirdparty/basis_universal/transcoder/basisu_containers.h b/thirdparty/basis_universal/transcoder/basisu_containers.h index d3e14369ba..bfc51bb499 100644 --- a/thirdparty/basis_universal/transcoder/basisu_containers.h +++ b/thirdparty/basis_universal/transcoder/basisu_containers.h @@ -188,8 +188,9 @@ namespace basisu #define BASISU_IS_SCALAR_TYPE(T) (scalar_type<T>::cFlag) -#if defined(__GNUC__) && __GNUC__<5 - #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) +#if !defined(BASISU_HAVE_STD_TRIVIALLY_COPYABLE) && defined(__GNUC__) && __GNUC__<5 + //#define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) + #define BASISU_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__) #else #define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value #endif @@ -286,8 +287,19 @@ namespace basisu if (BASISU_IS_BITWISE_COPYABLE(T)) { +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif if ((m_p) && (other.m_p)) memcpy(m_p, other.m_p, m_size * sizeof(T)); +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif } else { @@ -330,8 +342,19 @@ namespace basisu if (BASISU_IS_BITWISE_COPYABLE(T)) { +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif if ((m_p) && (other.m_p)) memcpy(m_p, other.m_p, other.m_size * sizeof(T)); +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif } else { @@ -501,7 +524,7 @@ namespace basisu if (new_capacity > m_capacity) { - if (!increase_capacity(new_capacity, false)) + if (!increase_capacity(new_capacity, false, true)) return false; } else if (new_capacity < m_capacity) @@ -509,7 +532,8 @@ namespace basisu // Must work around the lack of a "decrease_capacity()" method. // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. vector tmp; - tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false); + if (!tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false, true)) + return false; tmp = *this; swap(tmp); } @@ -750,7 +774,21 @@ namespace basisu } // Copy "down" the objects to preserve, filling in the empty slots. + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + memmove(pDst, pSrc, num_to_move * sizeof(T)); + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif } else { @@ -1003,7 +1041,21 @@ namespace basisu inline void set_all(const T& o) { if ((sizeof(T) == 1) && (scalar_type<T>::cFlag)) + { +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif memset(m_p, *reinterpret_cast<const uint8_t*>(&o), m_size); + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + } else { T* pDst = m_p; @@ -1029,7 +1081,7 @@ namespace basisu // Important: This method is used in Basis Universal. If you change how this container allocates memory, you'll need to change any users of this method. inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity) { - // To to prevent the caller from obviously shooting themselves in the foot. + // To prevent the caller from obviously shooting themselves in the foot. if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) { // Can grant ownership of a block inside the container itself! diff --git a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h index d5cb61569b..60c0b3d89f 100644 --- a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h +++ b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h @@ -19,23 +19,30 @@ namespace basisu if (m_capacity >= min_new_capacity) return true; - size_t new_capacity = min_new_capacity; - if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity))) - { - new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity); - - assert(new_capacity && (new_capacity > m_capacity)); + uint64_t new_capacity_u64 = min_new_capacity; + if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64))) + new_capacity_u64 = helpers::next_pow2(new_capacity_u64); - if (new_capacity < min_new_capacity) - { - if (nofail) - return false; - fprintf(stderr, "vector too large\n"); - abort(); - } + size_t new_capacity = (size_t)new_capacity_u64; + if (new_capacity != new_capacity_u64) + { + if (nofail) + return false; + fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n"); + abort(); } - const size_t desired_size = element_size * new_capacity; + const uint64_t desired_size_u64 = (uint64_t)element_size * new_capacity; + + const size_t desired_size = (size_t)desired_size_u64; + if (desired_size_u64 != desired_size) + { + if (nofail) + return false; + fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n"); + abort(); + } + size_t actual_size = 0; if (!pMover) { @@ -46,11 +53,7 @@ namespace basisu return false; char buf[256]; -#ifdef _MSC_VER - sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size); -#else - sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size); -#endif + snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size); fprintf(stderr, "%s", buf); abort(); } @@ -75,11 +78,7 @@ namespace basisu return false; char buf[256]; -#ifdef _MSC_VER - sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size); -#else - sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size); -#endif + snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size); fprintf(stderr, "%s", buf); abort(); } diff --git a/thirdparty/basis_universal/transcoder/basisu_file_headers.h b/thirdparty/basis_universal/transcoder/basisu_file_headers.h index 4316d738e6..d29e3feb03 100644 --- a/thirdparty/basis_universal/transcoder/basisu_file_headers.h +++ b/thirdparty/basis_universal/transcoder/basisu_file_headers.h @@ -1,5 +1,5 @@ // basis_file_headers.h -// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -89,7 +89,8 @@ namespace basist enum class basis_tex_format { cETC1S = 0, - cUASTC4x4 = 1 + cUASTC4x4 = 1, + cUASTC_HDR_4x4 = 2 }; struct basis_file_header diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp index c698861f3b..32018cd282 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp @@ -1,5 +1,5 @@ // basisu_transcoder.cpp -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,11 @@ #include <limits.h> #include "basisu_containers_impl.h" +#define BASISU_ASTC_HELPERS_IMPLEMENTATION +#include "basisu_astc_helpers.h" + +#include "basisu_astc_hdr_core.h" + #ifndef BASISD_IS_BIG_ENDIAN // TODO: This doesn't work on OSX. How can this be so difficult? //#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN) @@ -139,6 +144,10 @@ #endif #endif +#ifndef BASISD_SUPPORT_UASTC_HDR + #define BASISD_SUPPORT_UASTC_HDR 1 +#endif + #define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0 #define BASISD_WRITE_NEW_DXT1_TABLES 0 #define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0 @@ -1908,17 +1917,24 @@ namespace basist void basisu_transcoder_init() { if (g_transcoder_initialized) - { - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); + { + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; - } + } - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); #if BASISD_SUPPORT_UASTC uastc_init(); #endif +#if BASISD_SUPPORT_UASTC_HDR + // TODO: Examine this, optimize for startup time/mem utilization. + astc_helpers::init_tables(false); + + astc_hdr_core_init(); +#endif + #if BASISD_SUPPORT_ASTC transcoder_init_astc(); #endif @@ -2027,6 +2043,10 @@ namespace basist transcoder_init_pvrtc2(); #endif +#if BASISD_SUPPORT_UASTC_HDR + bc6h_enc_init(); +#endif + g_transcoder_initialized = true; } @@ -6928,7 +6948,7 @@ namespace basist static inline int sq(int x) { return x * x; } - // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0. // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook) @@ -7515,6 +7535,8 @@ namespace basist } #endif // BASISD_SUPPORT_PVRTC2 + //------------------------------------------------------------------------------------------------ + basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() : m_pGlobal_codebook(nullptr), m_selector_history_buf_size(0) @@ -8620,7 +8642,7 @@ namespace basist // Now make sure the output buffer is large enough, or we'll overwrite memory. if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); + BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); return false; } } @@ -8632,7 +8654,7 @@ namespace basist if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1) { - BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n"); + BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n"); return false; } } @@ -8640,7 +8662,7 @@ namespace basist { if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks) { - BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n"); + BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n"); return false; } } @@ -9242,13 +9264,17 @@ namespace basist return status; } + + //------------------------------------------------------------------------------------------------ basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder() { } - bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + bool basisu_lowlevel_uastc_transcoder::transcode_slice( + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) { BASISU_NOTE_UNUSED(pState); @@ -9784,6 +9810,317 @@ namespace basist return status; } + + //------------------------------------------------------------------------------------------------ + + basisu_lowlevel_uastc_hdr_transcoder::basisu_lowlevel_uastc_hdr_transcoder() + { + } + + bool basisu_lowlevel_uastc_hdr_transcoder::transcode_slice( + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) + { + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + BASISU_NOTE_UNUSED(has_alpha); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(decode_flags); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + +#if BASISD_SUPPORT_UASTC_HDR + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + } + + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks; + if (image_data_size < total_expected_block_bytes) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n"); + return false; + } + + const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data); + + bool status = false; + + // TODO: Optimize pure memcpy() case. + + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) + { + void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes) + { + switch (fmt) + { + case block_format::cUASTC_HDR_4x4: + case block_format::cASTC_HDR_4x4: + { + // Nothing to do, UASTC HDR is just ASTC. + memcpy(pDst_block, pSource_block, sizeof(uastc_block)); + status = true; + break; + } + case block_format::cBC6H: + { + status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block); + break; + } + case block_format::cRGB_9E5: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4); + if (status) + { + uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>( + static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t) + ); + + uint32_t blk_texels[4][4]; + + status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5); + + if (status) + { + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x); + + pDst_pixels += output_row_pitch_in_blocks_or_pixels; + } // y + } + } + + break; + } + case block_format::cRGBA_HALF: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4); + if (status) + { + half_float* pDst_pixels = reinterpret_cast<half_float*>( + static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4 + ); + + half_float blk_texels[4][4][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16); + + if (status) + { + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 4 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 4 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 4 * x] = blk_texels[y][x][2]; + pDst_pixels[3 + 4 * x] = blk_texels[y][x][3]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4; + } // y + } + } + + break; + } + case block_format::cRGB_HALF: + { + astc_helpers:: log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4); + if (status) + { + half_float* pDst_pixels = + reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3); + + half_float blk_texels[4][4][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16); + if (status) + { + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 3 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 3 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 3 * x] = blk_texels[y][x][2]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3; + } // y + } + } + + break; + } + default: + assert(0); + break; + + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); return false; + } + + } // block_x + + } // block_y + + return true; +#else + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: UASTC_HDR is unsupported\n"); + + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(output_rows_in_pixels); + BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels); + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + BASISU_NOTE_UNUSED(fmt); + BASISU_NOTE_UNUSED(image_data_size); + BASISU_NOTE_UNUSED(pImage_data); + BASISU_NOTE_UNUSED(num_blocks_x); + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pDst_blocks); + + return false; +#endif + } + + bool basisu_lowlevel_uastc_hdr_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) + { + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + BASISU_NOTE_UNUSED(decode_flags); + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: source data buffer too small\n"); + return false; + } + + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + switch (target_format) + { + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC6H: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to BC6H failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_9E5: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: Invalid format\n"); + break; + } + } + + return status; + } + + //------------------------------------------------------------------------------------------------ basisu_transcoder::basisu_transcoder() : m_ready_to_transcode(false) @@ -10390,7 +10727,7 @@ namespace basist } else { - // Nothing special to do for UASTC. + // Nothing special to do for UASTC/UASTC HDR. if (m_lowlevel_etc1s_decoder.m_local_endpoints.size()) { m_lowlevel_etc1s_decoder.clear(); @@ -10510,7 +10847,14 @@ namespace basist return false; } - if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4) + { + return m_lowlevel_uastc_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, @@ -10742,7 +11086,18 @@ namespace basist memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); } - if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4) + { + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + // Use the container independent image transcode method. + status = m_lowlevel_uastc_hdr_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; @@ -10808,20 +11163,27 @@ namespace basist return 8; case transcoder_texture_format::cTFBC7_RGBA: case transcoder_texture_format::cTFBC7_ALT: + case transcoder_texture_format::cTFBC6H: case transcoder_texture_format::cTFETC2_RGBA: case transcoder_texture_format::cTFBC3_RGBA: case transcoder_texture_format::cTFBC5_RG: case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: case transcoder_texture_format::cTFATC_RGBA: case transcoder_texture_format::cTFFXT1_RGB: case transcoder_texture_format::cTFETC2_EAC_RG11: return 16; case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB_9E5: return sizeof(uint32_t); case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: return sizeof(uint16_t); + case transcoder_texture_format::cTFRGB_HALF: + return sizeof(half_float) * 3; + case transcoder_texture_format::cTFRGBA_HALF: + return sizeof(half_float) * 4; default: assert(0); BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); @@ -10845,17 +11207,22 @@ namespace basist case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA"; case transcoder_texture_format::cTFBC5_RG: return "BC5_RG"; case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA"; + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_RGBA"; case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB"; case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA"; case transcoder_texture_format::cTFRGBA32: return "RGBA32"; case transcoder_texture_format::cTFRGB565: return "RGB565"; case transcoder_texture_format::cTFBGR565: return "BGR565"; case transcoder_texture_format::cTFRGBA4444: return "RGBA4444"; + case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF"; + case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5"; + case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF"; case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB"; case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB"; case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11"; case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11"; + case transcoder_texture_format::cTFBC6H: return "BC6H"; default: assert(0); BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); @@ -10881,7 +11248,13 @@ namespace basist case block_format::cRGB565: return "RGB565"; case block_format::cBGR565: return "BGR565"; case block_format::cRGBA4444: return "RGBA4444"; + case block_format::cRGBA_HALF: return "RGBA_HALF"; + case block_format::cRGB_HALF: return "RGB_HALF"; + case block_format::cRGB_9E5: return "RGB_9E5"; case block_format::cUASTC_4x4: return "UASTC_4x4"; + case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4"; + case block_format::cBC6H: return "BC6H"; + case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4"; case block_format::cFXT1_RGB: return "FXT1_RGB"; case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB"; case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; @@ -10914,11 +11287,13 @@ namespace basist bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt) { + // TODO: Technically ASTC_HDR does support alpha, but UASTC_HDR doesn't yet support it. Unsure what to do here. switch (fmt) { case transcoder_texture_format::cTFETC2_RGBA: case transcoder_texture_format::cTFBC3_RGBA: case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: case transcoder_texture_format::cTFBC7_RGBA: case transcoder_texture_format::cTFBC7_ALT: case transcoder_texture_format::cTFPVRTC1_4_RGBA: @@ -10926,6 +11301,23 @@ namespace basist case transcoder_texture_format::cTFATC_RGBA: case transcoder_texture_format::cTFRGBA32: case transcoder_texture_format::cTFRGBA4444: + case transcoder_texture_format::cTFRGBA_HALF: + return true; + default: + break; + } + return false; + } + + bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGB_9E5: return true; default: break; @@ -10947,13 +11339,18 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA; case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3; case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5; - case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4; + case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4; + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4; + case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned; case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB; case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA; case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32; case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565; case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565; case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444; + case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF; + case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5; + case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF; case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB; case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA; case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA; @@ -10975,6 +11372,9 @@ namespace basist case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_9E5: return true; default: break; @@ -10995,6 +11395,9 @@ namespace basist case block_format::cRGBA4444_COLOR: case block_format::cRGBA4444_ALPHA: case block_format::cRGBA4444_COLOR_OPAQUE: + case block_format::cRGBA_HALF: + case block_format::cRGB_HALF: + case block_format::cRGB_9E5: return true; default: break; @@ -11007,11 +11410,16 @@ namespace basist switch (fmt) { case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB_9E5: return sizeof(uint32_t); case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: return sizeof(uint16_t); + case transcoder_texture_format::cTFRGB_HALF: + return sizeof(half_float) * 3; + case transcoder_texture_format::cTFRGBA_HALF: + return sizeof(half_float) * 4; default: break; } @@ -11038,8 +11446,26 @@ namespace basist bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) { - if (fmt == basis_tex_format::cUASTC4x4) + if (fmt == basis_tex_format::cUASTC_HDR_4x4) + { + // UASTC HDR +#if BASISD_SUPPORT_UASTC_HDR + switch (tex_type) + { + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGB_9E5: + return true; + default: + break; + } +#endif + } + else if (fmt == basis_tex_format::cUASTC4x4) { + // UASTC LDR #if BASISD_SUPPORT_UASTC switch (tex_type) { @@ -11049,6 +11475,12 @@ namespace basist case transcoder_texture_format::cTFATC_RGB: case transcoder_texture_format::cTFATC_RGBA: case transcoder_texture_format::cTFFXT1_RGB: + // UASTC LDR doesn't support transcoding to HDR formats + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGB_9E5: return false; default: return true; @@ -11057,6 +11489,7 @@ namespace basist } else { + // ETC1S switch (tex_type) { // ETC1 and uncompressed are always supported. @@ -11812,7 +12245,7 @@ namespace basist // Encodes 3 values to output, usable for any range that uses quints and bits static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n) { - // First extract the trits and the bits from the 5 input values + // First extract the quints and the bits from the 3 input values int quints = 0, bits[3]; const uint32_t bit_mask = (1 << n) - 1; for (int i = 0; i < 3; i++) @@ -12131,11 +12564,13 @@ namespace basist return bits & ((1U << codesize) - 1U); } - - uint32_t byte_bit_offset = bit_offset & 7U; - const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]); - bit_offset += codesize; - return (w >> byte_bit_offset)& ((1U << codesize) - 1U); + else + { + uint32_t byte_bit_offset = bit_offset & 7U; + const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]); + bit_offset += codesize; + return (w >> byte_bit_offset) & ((1U << codesize) - 1U); + } } bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) @@ -12170,6 +12605,7 @@ namespace basist return false; unpacked.m_mode = mode; + unpacked.m_common_pattern = 0; uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1]; @@ -16663,10 +17099,12 @@ namespace basist memcpy(&m_header, pData, sizeof(m_header)); - // We only support UASTC and ETC1S - if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) + // We only support UASTC LDR, UASTC HDR and ETC1S. + // Note the DFD's contents are what we are guided by for decoding the KTX2 file, not this format field (currently). + if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) && + (m_header.m_vk_format != basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n"); return false; } @@ -16890,6 +17328,16 @@ namespace basist // We're assuming "DATA" means RGBA so it has alpha. m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR) + { + m_format = basist::basis_tex_format::cUASTC_HDR_4x4; + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + // We're assuming "DATA" means RGBA so it has alpha. + m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); + } else { // Unsupported DFD color model. @@ -17167,7 +17615,8 @@ namespace basist return false; } } - else if (m_format == basist::basis_tex_format::cUASTC4x4) + else if ((m_format == basist::basis_tex_format::cUASTC4x4) || + (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)) { // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length); @@ -17188,14 +17637,29 @@ namespace basist return false; } - if (!m_uastc_transcoder.transcode_image(fmt, - pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, - (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index, - 0, (uint32_t)total_2D_image_size, - decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); - return false; + if (!m_uastc_hdr_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else + { + if (!m_uastc_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } } } else @@ -17476,4 +17940,1531 @@ namespace basist #endif } + //------------------------------- + +#ifdef BASISD_SUPPORT_UASTC_HDR + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + basist::half_float float_to_half(float val) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; + int s = flt_s, e = 0, m = 0; + + // inf/NaN + if (flt_e == 0xff) + { + e = 31; + if (flt_m != 0) // NaN + m = 1; + } + // not zero or denormal + else if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + m = lrintf((1 << 24) * fabsf(fi.f)); + else + { + e = new_exp + 15; + m = lrintf(flt_m * (1.0f / ((float)(1 << 13)))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((s >= 0) && (s <= 1)); + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m); + return result; + } + + //------------------------------------------------------------------------------------------------ + // HDR support + // + // Originally from bc6h_enc.cpp + // BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed + + const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b + { + // 2 subsets + { 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs + { 7, 6, 6, 6, }, // 1 + { 11, 5, 4, 4, }, // 2 + { 11, 4, 5, 4, }, // 3 + { 11, 4, 4, 5, }, // 4 + { 9, 5, 5, 5, }, // 5 + { 8, 6, 5, 5, }, // 6 + { 8, 5, 6, 5, }, // 7 + { 8, 5, 5, 6, }, // 8 + { 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs + // 1 subset + { 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs + { 11, 9, 9, 9, }, // 11 + { 12, 8, 8, 8, }, // 12 + { 16, 4, 4, 4, } // 13, also useful for solid blocks + }; + + const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 }; + + const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] = + { + // comp_index, subset*2+lh_index, last_bit, first_bit + //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta + { { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 }, + { 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 }, + { 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta + { { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 }, + { 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 }, + { 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 }, + { 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 }, + { 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 }, + { 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 }, + { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 }, + { 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 }, + { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta + { { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 }, + { 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 }, + { 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta + { { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 }, + { 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta + { { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 }, + { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta + { { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 }, + { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta + { { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 }, + { 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 }, + { 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} }, + //------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} }, + //------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} } + }; + + // The same as the first 32 2-subset patterns in BC7. + // Bit 7 is a flag indicating that the weight uses 1 less bit than usual. + const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x] + { + { {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} }, + { {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} }, + { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, + { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} }, + { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, + { {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} }, + { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} }, + { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} } + }; + + const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + + struct bc6h_logical_block + { + uint32_t m_mode; + uint32_t m_partition_pattern; // must be 0 if 1 subset + uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed + uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0 + + void clear() + { + basisu::clear_obj(*this); + } + }; + + static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h) + { + assert((num_bits) && (num_bits < 64) && (bit_pos < 128)); + assert(val < (1ULL << num_bits)); + + if (bit_pos < 64) + { + l |= (val << bit_pos); + + if ((bit_pos + num_bits) > 64) + h |= (val >> (64 - bit_pos)); + } + else + { + h |= (val << (bit_pos - 64)); + } + + bit_pos += num_bits; + assert(bit_pos <= 128); + } + + static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h) + { + assert((num_bits) && (num_bits < 64) && (bit_pos < 128)); + assert(val < (1ULL << num_bits)); + + for (uint32_t i = 0; i < num_bits; i++) + write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h); + } + + static void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk) + { + const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 }; + + const uint32_t mode = log_blk.m_mode; + assert(mode < NUM_BC6H_MODES); + + uint64_t l = s_mode_bits[mode], h = 0; + uint32_t bit_pos = (mode >= 2) ? 5 : 2; + + const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2; + + assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) || + ((num_subsets == 1) && (!log_blk.m_partition_pattern))); + + // Sanity checks + for (uint32_t c = 0; c < 3; c++) + { + assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits + assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10 + assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l + assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h + } + + const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0]; + + while (pLayout->m_comp != -1) + { + uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index]; + + if (pLayout->m_first_bit == -1) + { + write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h); + } + else + { + const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1; + + v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit); + v &= ((1 << total_bits) - 1); + + if (pLayout->m_first_bit > pLayout->m_last_bit) + write_rev_bits(v, total_bits, bit_pos, l, h); + else + write_bits(v, total_bits, bit_pos, l, h); + } + + pLayout++; + } + + const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3; + const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0]; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t sel = log_blk.m_weights[i]; + + uint32_t num_bits = num_mode_sel_bits; + if (num_subsets == 2) + { + const uint32_t subset_index = pPat[i]; + num_bits -= (subset_index >> 7); + } + else if (!i) + { + num_bits--; + } + + assert(sel < (1u << num_bits)); + + write_bits(sel, num_bits, bit_pos, l, h); + } + + assert(bit_pos == 128); + + basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l); + basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u)); + basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h); + basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u)); + } + +#if 0 + static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp) + { + int unq; + + if (bits_per_comp >= 15) + unq = comp; + else if (comp == 0) + unq = 0; + else if (comp == ((1u << bits_per_comp) - 1u)) + unq = 0xFFFFu; + else + unq = ((comp << 16u) + 0x8000u) >> bits_per_comp; + + return unq; + } +#endif + + // Suboptimal, but very close. + static inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits) + { + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + return (h * 64 + 30) / (31 * (1 << (16 - num_bits))); + } + + // 6,7,8,9,10,11,12 + const uint32_t BC6H_BLOG_TAB_MIN = 6; + const uint32_t BC6H_BLOG_TAB_MAX = 12; + //const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1; + + // Handles 16, or 6-12 bits. Others assert. + static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits) + { + BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN); + BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX); + + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + + if (num_bits == 16) + { + return bc6h_half_to_blog(h, 16); + } + else + { + assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX)); + + // Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints). + return bc6h_half_to_blog(h, num_bits); + } + } + + bool g_bc6h_enc_initialized; + + void bc6h_enc_init() + { + if (g_bc6h_enc_initialized) + return; + + g_bc6h_enc_initialized = true; + } + + // mode 10, 4-bit weights + void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 15); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + // Convert half endpoints to blog10 (mode 10 doesn't use delta encoding) + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10); + log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10); + } + + memcpy(log_blk.m_weights, pWeights, 16); + + if (log_blk.m_weights[0] & 8) + { + for (uint32_t i = 0; i < 16; i++) + log_blk.m_weights[i] = 15 - log_blk.m_weights[i]; + + for (uint32_t c = 0; c < 3; c++) + { + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + } + } + + log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX; + pack_bc6h_block(*pPacked_block, log_blk); + } + + // Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights + void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 15); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--) + { + const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1]; + const int base_bitmask = (1 << num_base_bits) - 1; + const int delta_bitmask = (1 << num_delta_bits) - 1; + BASISU_NOTE_UNUSED(base_bitmask); + + assert(num_delta_bits < num_base_bits); + assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3])); + + uint32_t blog_endpoints[3][2]; + + // Convert half endpoints to blog 16, 12, or 11 + for (uint32_t c = 0; c < 3; c++) + { + blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits); + assert((int)blog_endpoints[c][0] <= base_bitmask); + + blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits); + assert((int)blog_endpoints[c][1] <= base_bitmask); + } + + // Copy weights + memcpy(log_blk.m_weights, pWeights, 16); + + // Ensure first weight MSB is 0 + if (log_blk.m_weights[0] & 8) + { + // Invert weights + for (uint32_t i = 0; i < 16; i++) + log_blk.m_weights[i] = 15 - log_blk.m_weights[i]; + + // Swap blog quantized endpoints + for (uint32_t c = 0; c < 3; c++) + { + std::swap(blog_endpoints[c][0], blog_endpoints[c][1]); + } + } + + const int max_delta = (1 << (num_delta_bits - 1)) - 1; + const int min_delta = -(max_delta + 1); + assert((max_delta - min_delta) == delta_bitmask); + + bool failed_flag = false; + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0] = blog_endpoints[c][0]; + + int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0]; + if ((delta < min_delta) || (delta > max_delta)) + { + failed_flag = true; + break; + } + + log_blk.m_endpoints[c][1] = delta & delta_bitmask; + } + + if (failed_flag) + continue; + + log_blk.m_mode = mode; + pack_bc6h_block(*pPacked_block, log_blk); + + return; + } + + // Worst case fall back to mode 10, which can handle any endpoints + bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights); + } + + // Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset + void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding) + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6); + log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0]; + + log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6); + log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1]; + } + + memcpy(log_blk.m_weights, pWeights, 16); + + const uint32_t pat_index = 0; + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + if (log_blk.m_weights[0] & 4) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (log_blk.m_weights[15] & 4) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + log_blk.m_mode = 9; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + } + + // Tries modes 0-8, falls back to mode 9 + void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++) + { + static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least + const uint32_t mode = s_mode_order[mode_iter]; + + const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0]; + const int base_bitmask = (1 << num_base_bits) - 1; + BASISU_NOTE_UNUSED(base_bitmask); + + const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] }; + const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 }; + + uint32_t blog_endpoints[3][4]; + + // Convert half endpoints to blog 7-11 + for (uint32_t c = 0; c < 3; c++) + { + blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits); + blog_endpoints[c][2] = blog_endpoints[c][0]; + assert((int)blog_endpoints[c][0] <= base_bitmask); + + blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits); + blog_endpoints[c][3] = blog_endpoints[c][1]; + assert((int)blog_endpoints[c][1] <= base_bitmask); + } + + const uint32_t pat_index = 0; + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + memcpy(log_blk.m_weights, pWeights, 16); + + if (log_blk.m_weights[0] & 4) + { + // Swap part 0's endpoints/weights + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][0], blog_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (log_blk.m_weights[15] & 4) + { + // Swap part 1's endpoints/weights + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][2], blog_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + bool failed_flag = false; + + for (uint32_t c = 0; c < 3; c++) + { + const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1; + + const int min_delta = -(max_delta + 1); + assert((max_delta - min_delta) == delta_bitmasks[c]); + + log_blk.m_endpoints[c][0] = blog_endpoints[c][0]; + + int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0]; + int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0]; + int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0]; + + if ((delta0 < min_delta) || (delta0 > max_delta) || + (delta1 < min_delta) || (delta1 > max_delta) || + (delta2 < min_delta) || (delta2 > max_delta)) + { + failed_flag = true; + break; + } + + log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c]; + log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c]; + log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c]; + + if (failed_flag) + break; + } + if (failed_flag) + continue; + + log_blk.m_mode = mode; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + + return; + + } // mode_iter + + bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights); + } + + // pEndpoints[subset][comp][lh_index] + void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding) + for (uint32_t s = 0; s < 2; s++) + { + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6); + log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6); + } + } + + memcpy(log_blk.m_weights, pWeights, 16); + + //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7; + + const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert; + if (invert_flag) + { + for (uint32_t c = 0; c < 3; c++) + { + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]); + std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]); + } + } + + const uint32_t pat_index = bc7_pattern; + assert(pat_index < 32); + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + bool swap_flags[2] = { false, false }; + for (uint32_t i = 0; i < 16; i++) + { + if ((pPat[i] & 0x80) == 0) + continue; + + if (log_blk.m_weights[i] & 4) + { + const uint32_t p = pPat[i] & 1; + swap_flags[p] = true; + } + } + + if (swap_flags[0]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (swap_flags[1]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + log_blk.m_mode = 9; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + } + + void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++) + { + static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least + const uint32_t mode = s_mode_order[mode_iter]; + + const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0]; + const int base_bitmask = (1 << num_base_bits) - 1; + BASISU_NOTE_UNUSED(base_bitmask); + + const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] }; + const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 }; + + uint32_t blog_endpoints[3][4]; + + // Convert half endpoints to blog 7-11 + for (uint32_t s = 0; s < 2; s++) + { + for (uint32_t c = 0; c < 3; c++) + { + blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits); + blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits); + } + } + + memcpy(log_blk.m_weights, pWeights, 16); + + //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7; + + const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert; + if (invert_flag) + { + for (uint32_t c = 0; c < 3; c++) + { + std::swap(blog_endpoints[c][0], blog_endpoints[c][2]); + std::swap(blog_endpoints[c][1], blog_endpoints[c][3]); + } + } + + const uint32_t pat_index = bc7_pattern; + assert(pat_index < 32); + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + bool swap_flags[2] = { false, false }; + for (uint32_t i = 0; i < 16; i++) + { + if ((pPat[i] & 0x80) == 0) + continue; + + if (log_blk.m_weights[i] & 4) + { + const uint32_t p = pPat[i] & 1; + swap_flags[p] = true; + } + } + + if (swap_flags[0]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][0], blog_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (swap_flags[1]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][2], blog_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + // Try packing the endpoints + bool failed_flag = false; + + for (uint32_t c = 0; c < 3; c++) + { + const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1; + + const int min_delta = -(max_delta + 1); + assert((max_delta - min_delta) == delta_bitmasks[c]); + + log_blk.m_endpoints[c][0] = blog_endpoints[c][0]; + + int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0]; + int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0]; + int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0]; + + if ((delta0 < min_delta) || (delta0 > max_delta) || + (delta1 < min_delta) || (delta1 > max_delta) || + (delta2 < min_delta) || (delta2 > max_delta)) + { + failed_flag = true; + break; + } + + log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c]; + log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c]; + log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c]; + + if (failed_flag) + break; + } + if (failed_flag) + continue; + + log_blk.m_mode = mode; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + + //half_float blk[16 * 3]; + //unpack_bc6h(pPacked_block, blk, false); + + return; + } + + bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights); + } + + bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]) + { + assert(g_bc6h_enc_initialized); + + if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000) + return false; + + // ASTC block unpacker won't allow Inf/NaN's to come through. + //if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2])) + // return false; + + uint8_t weights[16]; + memset(weights, 0, sizeof(weights)); + + half_float endpoints[3][2]; + endpoints[0][0] = pColor[0]; + endpoints[0][1] = pColor[0]; + + endpoints[1][0] = pColor[1]; + endpoints[1][1] = pColor[1]; + + endpoints[2][0] = pColor[2]; + endpoints[2][1] = pColor[2]; + + bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights); + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + // basisu_astc_hdr_core.cpp + + static bool g_astc_hdr_core_initialized; + static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024]; + + //-------------------------------------------------------------------------------------------------------------------------- + + void astc_hdr_core_init() + { + if (g_astc_hdr_core_initialized) + return; + + memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index)); + + for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index) + { + const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc; + //const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + + assert(astc_pattern < 1024); + g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index; + } + + g_astc_hdr_core_initialized = true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + static inline int astc_hdr_sign_extend(int src, int num_src_bits) + { + assert(basisu::in_range(num_src_bits, 2, 31)); + + const bool negative = (src & (1 << (num_src_bits - 1))) != 0; + if (negative) + return src | ~((1 << num_src_bits) - 1); + else + return src & ((1 << num_src_bits) - 1); + } + + static inline void astc_hdr_pack_bit( + int& dst, int dst_bit, + int src_val, int src_bit = 0) + { + assert(dst_bit >= 0 && dst_bit <= 31); + int bit = basisu::get_bit(src_val, src_bit); + dst |= (bit << dst_bit); + } + + //-------------------------------------------------------------------------------------------------------------------------- + + void decode_mode7_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale) + { + assert(g_astc_hdr_core_initialized); + + for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++) + { + assert(pEndpoints[i] <= 255); + } + + const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3]; + + // Extract mode bits and unpack to major component and mode. + const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); + + int majcomp, mode; + if ((modeval & 0xC) != 0xC) + { + majcomp = modeval >> 2; + mode = modeval & 3; + } + else if (modeval != 0xF) + { + majcomp = modeval & 3; + mode = 4; + } + else + { + majcomp = 0; + mode = 5; + } + + // Extract low-order bits of r, g, b, and s. + int red = v0 & 0x3f; + int green = v1 & 0x1f; + int blue = v2 & 0x1f; + int scale = v3 & 0x1f; + + // Extract high-order bits, which may be assigned depending on mode + int x0 = (v1 >> 6) & 1; + int x1 = (v1 >> 5) & 1; + int x2 = (v2 >> 6) & 1; + int x3 = (v2 >> 5) & 1; + int x4 = (v3 >> 7) & 1; + int x5 = (v3 >> 6) & 1; + int x6 = (v3 >> 5) & 1; + + // Now move the high-order xs into the right place. + const int ohm = 1 << mode; + if (ohm & 0x30) green |= x0 << 6; + if (ohm & 0x3A) green |= x1 << 5; + if (ohm & 0x30) blue |= x2 << 6; + if (ohm & 0x3A) blue |= x3 << 5; + if (ohm & 0x3D) scale |= x6 << 5; + if (ohm & 0x2D) scale |= x5 << 6; + if (ohm & 0x04) scale |= x4 << 7; + if (ohm & 0x3B) red |= x4 << 6; + if (ohm & 0x04) red |= x3 << 6; + if (ohm & 0x10) red |= x5 << 7; + if (ohm & 0x0F) red |= x2 << 7; + if (ohm & 0x05) red |= x1 << 8; + if (ohm & 0x0A) red |= x0 << 8; + if (ohm & 0x05) red |= x0 << 9; + if (ohm & 0x02) red |= x6 << 9; + if (ohm & 0x01) red |= x3 << 10; + if (ohm & 0x02) red |= x5 << 10; + + // Shift the bits to the top of the 12-bit result. + static const int s_shamts[6] = { 1,1,2,3,4,5 }; + + const int shamt = s_shamts[mode]; + red <<= shamt; + green <<= shamt; + blue <<= shamt; + scale <<= shamt; + + // Minor components are stored as differences + if (mode != 5) + { + green = red - green; + blue = red - blue; + } + + // Swizzle major component into place + if (majcomp == 1) + std::swap(red, green); + + if (majcomp == 2) + std::swap(red, blue); + + // Clamp output values, set alpha to 1.0 + e[1][0] = basisu::clamp(red, 0, 0xFFF); + e[1][1] = basisu::clamp(green, 0, 0xFFF); + e[1][2] = basisu::clamp(blue, 0, 0xFFF); + + e[0][0] = basisu::clamp(red - scale, 0, 0xFFF); + e[0][1] = basisu::clamp(green - scale, 0, 0xFFF); + e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF); + + if (pScale) + *pScale = scale; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool decode_mode7_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale, + uint32_t ise_endpoint_range) + { + assert(g_astc_hdr_core_initialized); + + if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale); + } + else + { + uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS]; + + for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++) + dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]]; + + decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale); + } + + for (uint32_t i = 0; i < 2; i++) + { + if (e[i][0] > (int)MAX_QLOG12) + return false; + + if (e[i][1] > (int)MAX_QLOG12) + return false; + + if (e[i][2] > (int)MAX_QLOG12) + return false; + } + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + void decode_mode11_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3]) + { +#ifdef _DEBUG + for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++) + { + assert(pEndpoints[i] <= 255); + } +#endif + + const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1); + + if (maj_comp == 3) + { + // Direct, qlog8 and qlog7 + e[0][0] = pEndpoints[0] << 4; + e[1][0] = pEndpoints[1] << 4; + + e[0][1] = pEndpoints[2] << 4; + e[1][1] = pEndpoints[3] << 4; + + e[0][2] = (pEndpoints[4] & 127) << 5; + e[1][2] = (pEndpoints[5] & 127) << 5; + } + else + { + int v0 = pEndpoints[0]; + int v1 = pEndpoints[1]; + int v2 = pEndpoints[2]; + int v3 = pEndpoints[3]; + int v4 = pEndpoints[4]; + int v5 = pEndpoints[5]; + + int mode = 0; + astc_hdr_pack_bit(mode, 0, v1, 7); + astc_hdr_pack_bit(mode, 1, v2, 7); + astc_hdr_pack_bit(mode, 2, v3, 7); + + int va = v0; + astc_hdr_pack_bit(va, 8, v1, 6); + + int vb0 = v2 & 63; + int vb1 = v3 & 63; + int vc = v1 & 63; + + int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed + int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed + static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 }; + vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]); + vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]); + + int x0 = basisu::get_bit(v2, 6); + int x1 = basisu::get_bit(v3, 6); + int x2 = basisu::get_bit(v4, 6); + int x3 = basisu::get_bit(v5, 6); + int x4 = basisu::get_bit(v4, 5); + int x5 = basisu::get_bit(v5, 5); + + const uint32_t ohm = 1U << mode; + if (ohm & 0xA4) va |= (x0 << 9); + if (ohm & 0x08) va |= (x2 << 9); + if (ohm & 0x50) va |= (x4 << 9); + if (ohm & 0x50) va |= (x5 << 10); + if (ohm & 0xA0) va |= (x1 << 10); + if (ohm & 0xC0) va |= (x2 << 11); + if (ohm & 0x04) vc |= (x1 << 6); + if (ohm & 0xE8) vc |= (x3 << 6); + if (ohm & 0x20) vc |= (x2 << 7); + if (ohm & 0x5B) vb0 |= (x0 << 6); + if (ohm & 0x5B) vb1 |= (x1 << 6); + if (ohm & 0x12) vb0 |= (x2 << 7); + if (ohm & 0x12) vb1 |= (x3 << 7); + + const int shamt = (mode >> 1) ^ 3; + + va = (uint32_t)va << shamt; + vb0 = (uint32_t)vb0 << shamt; + vb1 = (uint32_t)vb1 << shamt; + vc = (uint32_t)vc << shamt; + vd0 = (uint32_t)vd0 << shamt; + vd1 = (uint32_t)vd1 << shamt; + + // qlog12 + e[1][0] = basisu::clamp<int>(va, 0, 0xFFF); + e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF); + e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF); + + e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF); + e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF); + e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF); + + if (maj_comp) + { + std::swap(e[0][0], e[0][maj_comp]); + std::swap(e[1][0], e[1][maj_comp]); + } + } + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool decode_mode11_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + uint32_t ise_endpoint_range) + { + assert(g_astc_hdr_core_initialized); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + + if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + decode_mode11_to_qlog12_ise20(pEndpoints, e); + } + else + { + uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS]; + + for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++) + dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]]; + + decode_mode11_to_qlog12_ise20(dequantized_endpoints, e); + } + + for (uint32_t i = 0; i < 2; i++) + { + if (e[i][0] > (int)MAX_QLOG12) + return false; + + if (e[i][1] > (int)MAX_QLOG12) + return false; + + if (e[i][2] > (int)MAX_QLOG12) + return false; + } + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk) + { + assert(g_astc_hdr_core_initialized); + assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8)); + + if (best_blk.m_weight_ise_range == 5) + { + // Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets + bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights); + } + else + { + uint8_t bc6h_weights[16]; + + if (best_blk.m_weight_ise_range == 1) + { + // weight ISE 1: 3 levels + static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 2) + { + // weight ISE 2: 4 levels + static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 3) + { + // weight ISE 3: 5 levels + static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 4) + { + // weight ISE 4: 6 levels + static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 6) + { + // weight ISE 6: 10 levels + static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 7) + { + // weight ISE 7: 12 levels + static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 8) + { + // 16 levels + memcpy(bc6h_weights, best_blk.m_weights, 16); + } + else + { + assert(0); + return false; + } + + bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights); + } + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk) + { + assert(g_astc_hdr_core_initialized); + assert(best_blk.m_num_partitions == 2); + assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index] + + // UASTC HDR checks + // Both CEM's must be equal in 2-subset UASTC HDR. + if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1]) + return false; + if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11)) + return false; + + if (best_blk.m_color_endpoint_modes[0] == 7) + { + if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) || + ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) || + ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) || + ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) || + ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15)))) + { + return false; + } + } + else + { + if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) || + ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12)))) + { + return false; + } + } + + for (uint32_t s = 0; s < 2; s++) + { + int e[2][3]; + if (best_blk.m_color_endpoint_modes[0] == 7) + { + bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range); + if (!success) + return false; + } + else + { + bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range); + if (!success) + return false; + } + + for (uint32_t c = 0; c < 3; c++) + { + bc6h_endpoints[s][c][0] = qlog_to_half_slow(e[0][c], 12); + if (is_half_inf_or_nan(bc6h_endpoints[s][c][0])) + return false; + + bc6h_endpoints[s][c][1] = qlog_to_half_slow(e[1][c], 12); + if (is_half_inf_or_nan(bc6h_endpoints[s][c][1])) + return false; + } + } + + uint8_t bc6h_weights[16]; + if (best_blk.m_weight_ise_range == 1) + { + static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 2) + { + static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 3) + { + static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 4) + { + static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 5) + { + memcpy(bc6h_weights, best_blk.m_weights, 16); + } + else + { + assert(0); + return false; + } + + bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights); + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails. + bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk) + { + assert(g_astc_hdr_core_initialized); + if (!g_astc_hdr_core_initialized) + { + assert(0); + return false; + } + + astc_helpers::log_astc_block log_blk; + + if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4)) + { + // Failed unpacking ASTC data + return false; + } + + return astc_hdr_transcode_to_bc6h(log_blk, dst_blk); + } + + //-------------------------------------------------------------------------------------------------------------------------- + // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails. + bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk) + { + assert(g_astc_hdr_core_initialized); + if (!g_astc_hdr_core_initialized) + { + assert(0); + return false; + } + + if (log_blk.m_solid_color_flag_ldr) + { + // Don't support LDR solid colors. + return false; + } + + if (log_blk.m_solid_color_flag_hdr) + { + // Solid color HDR block + return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color); + } + + // Only support 4x4 grid sizes + if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4)) + return false; + + // Don't support dual plane encoding + if (log_blk.m_dual_plane) + return false; + + if (log_blk.m_num_partitions == 1) + { + // Handle 1 partition (or subset) + + // UASTC HDR checks + if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8)) + return false; + + int e[2][3]; + bool success; + + if (log_blk.m_color_endpoint_modes[0] == 7) + { + if (log_blk.m_endpoint_ise_range != 20) + return false; + + success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range); + } + else if (log_blk.m_color_endpoint_modes[0] == 11) + { + // UASTC HDR checks + if (log_blk.m_weight_ise_range <= 7) + { + if (log_blk.m_endpoint_ise_range != 20) + return false; + } + else if (log_blk.m_endpoint_ise_range != 19) + { + return false; + } + + success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range); + } + else + { + return false; + } + + if (!success) + return false; + + // Transform endpoints to half float + half_float h_e[3][2] = + { + { qlog_to_half_slow(e[0][0], 12), qlog_to_half_slow(e[1][0], 12) }, + { qlog_to_half_slow(e[0][1], 12), qlog_to_half_slow(e[1][1], 12) }, + { qlog_to_half_slow(e[0][2], 12), qlog_to_half_slow(e[1][2], 12) } + }; + + // Sanity check for NaN/Inf + for (uint32_t i = 0; i < 2; i++) + if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i])) + return false; + + // Transcode to bc6h + if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk)) + return false; + } + else if (log_blk.m_num_partitions == 2) + { + // Handle 2 partition (or subset) + int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id]; + if (common_bc7_pat_index < 0) + return false; + + assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk)) + return false; + } + else + { + // Only supports 1 or 2 partitions (or subsets) + return false; + } + + return true; + } +#endif // BASISD_SUPPORT_UASTC_HDR + } // namespace basist diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.h b/thirdparty/basis_universal/transcoder/basisu_transcoder.h index 3327e8ddb7..8324e99698 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder.h +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.h @@ -1,5 +1,5 @@ // basisu_transcoder.h -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,6 +29,7 @@ // Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development. #ifndef BASISU_FORCE_DEVEL_MESSAGES + // TODO - disable before checking in #define BASISU_FORCE_DEVEL_MESSAGES 0 #endif @@ -55,7 +56,7 @@ namespace basist cTFETC2_RGBA = 1, // Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files // BC1-5, BC7 (desktop, some mobile devices) - cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified + cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified cTFBC3_RGBA = 3, // Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's) @@ -63,10 +64,11 @@ namespace basist // PVRTC1 4bpp (mobile, PowerVR devices) cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format. - cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. + cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. // ASTC (mobile, Intel devices, hopefully all desktop GPU's one day) - cTFASTC_4x4_RGBA = 10, // Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions. + cTFASTC_4x4_RGBA = 10, // LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. + // LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions. // ATC (mobile, Adreno devices, this is a niche format) cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) @@ -74,8 +76,8 @@ namespace basist // FXT1 (desktop, Intel devices, this is a super obscure format) cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). - // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now. - // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h. + // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now. + // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h. cTFPVRTC2_4_RGB = 18, // Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB). cTFPVRTC2_4_RGBA = 19, // Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks. @@ -83,13 +85,22 @@ namespace basist cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned) cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps + cTFBC6H = 22, // HDR, RGB only, unsigned + cTFASTC_HDR_4x4_RGBA = 23, // HDR, RGBA (currently UASTC HDR is only RGB), unsigned + // Uncompressed (raw pixel) formats + // Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR). cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte. cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11 cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0 - cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0 + cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0 + + // Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR). + cTFRGB_HALF = 24, // 48bpp RGB half (16-bits/component, 3 components) + cTFRGBA_HALF = 25, // 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha) + cTFRGB_9E5 = 26, // 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent) - cTFTotalTextureFormats = 22, + cTFTotalTextureFormats = 27, // Old enums for compatibility with code compiled against previous versions cTFETC1 = cTFETC1_RGB, @@ -124,6 +135,9 @@ namespace basist // Returns true if the format supports an alpha channel. bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt); + // Returns true if the format is HDR. + bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt); + // Returns the basisu::texture_format corresponding to the specified transcoder_texture_format. basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt); @@ -142,7 +156,7 @@ namespace basist // Returns the block height for the specified texture format, which is currently always 4. uint32_t basis_get_block_height(transcoder_texture_format tex_type); - // Returns true if the specified format was enabled at compile time. + // Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR). bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S); // Validates that the output buffer is large enough to hold the entire transcoded texture. @@ -317,6 +331,42 @@ namespace basist int channel0 = -1, int channel1 = -1); }; + class basisu_lowlevel_uastc_hdr_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_hdr_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + struct basisu_slice_info { uint32_t m_orig_width; @@ -530,6 +580,7 @@ namespace basist private: mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder; mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder; + mutable basisu_lowlevel_uastc_hdr_transcoder m_lowlevel_uastc_hdr_decoder; bool m_ready_to_transcode; @@ -612,10 +663,12 @@ namespace basist #pragma pack(pop) const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0; + const uint32_t KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK = 1000066000; // TODO, is this correct? const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166; + const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR = 167; const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163; const uint32_t KTX2_IMAGE_IS_P_FRAME = 2; - const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; + const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased // The KTX2 transfer functions supported by KTX2 @@ -800,13 +853,15 @@ namespace basist // Returns 0 or the number of layers in the texture array or texture video. Valid after init(). uint32_t get_layers() const { return m_header.m_layer_count; } - // Returns cETC1S or cUASTC4x4. Valid after init(). + // Returns cETC1S, cUASTC4x4, or cUASTC_HDR_4x4. Valid after init(). basist::basis_tex_format get_format() const { return m_format; } - + bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; } bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; } + bool is_hdr() const { return get_format() == basist::basis_tex_format::cUASTC_HDR_4x4; } + // Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init(). uint32_t get_has_alpha() const { return m_has_alpha; } @@ -913,6 +968,7 @@ namespace basist basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder; + basist::basisu_lowlevel_uastc_hdr_transcoder m_uastc_hdr_transcoder; ktx2_transcoder_state m_def_transcoder_state; diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h index 0505df6ea6..17c9dc7c8c 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h @@ -1,5 +1,5 @@ // basisu_transcoder_internal.h - Universal texture format transcoder library. -// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // @@ -20,8 +20,9 @@ #pragma warning (disable: 4127) // conditional expression is constant #endif -#define BASISD_LIB_VERSION 116 -#define BASISD_VERSION_STRING "01.16" +// v1.50: Added UASTC HDR support +#define BASISD_LIB_VERSION 150 +#define BASISD_VERSION_STRING "01.50" #ifdef _DEBUG #define BASISD_BUILD_DEBUG @@ -82,9 +83,15 @@ namespace basist cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, cRGBA4444, - - cUASTC_4x4, - + cRGBA_HALF, + cRGB_HALF, + cRGB_9E5, + + cUASTC_4x4, // LDR, universal + cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed + cBC6H, + cASTC_HDR_4x4, + cTotalBlockFormats }; @@ -264,8 +271,8 @@ namespace basist } const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } - const basisu::int_vec get_lookup() const { return m_lookup; } - const basisu::int16_vec get_tree() const { return m_tree; } + const basisu::int_vec &get_lookup() const { return m_lookup; } + const basisu::int16_vec &get_tree() const { return m_tree; } bool is_valid() const { return m_code_sizes.size() > 0; } @@ -789,7 +796,198 @@ namespace basist }; bool basis_block_format_is_uncompressed(block_format tex_type); - + + //------------------------------------ + + typedef uint16_t half_float; + + const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number + const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number + const double MAX_HALF_FLOAT = 65504.0; // largest normal number + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert((num_bits >= 1) && (num_bits <= 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + inline bool is_half_inf_or_nan(half_float v) + { + return get_bits(v, 10, 14) == 31; + } + + inline bool is_half_denorm(half_float v) + { + int e = (v >> 10) & 31; + return !e; + } + + inline int get_half_exp(half_float v) + { + int e = ((v >> 10) & 31); + return e ? (e - 15) : -14; + } + + inline int get_half_mantissa(half_float v) + { + if (is_half_denorm(v)) + return v & 0x3FF; + return (v & 0x3FF) | 0x400; + } + + inline float get_half_mantissaf(half_float v) + { + return ((float)get_half_mantissa(v)) / 1024.0f; + } + + inline int get_half_sign(half_float v) + { + return v ? ((v & 0x8000) ? -1 : 1) : 0; + } + + inline bool half_is_signed(half_float v) + { + return (v & 0x8000) != 0; + } + +#if 0 + int hexp = get_half_exp(Cf); + float hman = get_half_mantissaf(Cf); + int hsign = get_half_sign(Cf); + float k = powf(2.0f, hexp) * hman * hsign; + if (is_half_inf_or_nan(Cf)) + k = std::numeric_limits<float>::quiet_NaN(); +#endif + + half_float float_to_half(float val); + + inline float half_to_float(half_float hval) + { + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; + } + + // Originally from bc6h_enc.h + + void bc6h_enc_init(); + + const uint32_t MAX_BLOG16_VAL = 0xFFFF; + + // BC6H internals + const uint32_t NUM_BC6H_MODES = 14; + const uint32_t BC6H_LAST_MODE_INDEX = 13; + const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights + const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32; + + extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b + + struct bc6h_bit_layout + { + int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index) + int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d) + int8_t m_last_bit; + int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed + }; + + const uint32_t MAX_BC6H_LAYOUT_INDEX = 25; + extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX]; + + extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x] + + extern const uint8_t g_bc6h_weight3[8]; + extern const uint8_t g_bc6h_weight4[16]; + + extern const int8_t g_bc6h_mode_lookup[32]; + + // Converts b16 to half float + inline half_float bc6h_blog16_to_half(uint32_t comp) + { + assert(comp <= 0xFFFF); + + // scale the magnitude by 31/64 + comp = (comp * 31u) >> 6u; + return (half_float)comp; + } + + const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF; + + // Inverts bc6h_blog16_to_half(). + // Returns the nearest blog16 given a half value. + inline uint32_t bc6h_half_to_blog16(half_float h) + { + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + return (h * 64 + 30) / 31; + } + + struct bc6h_block + { + uint8_t m_bytes[16]; + }; + + void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index] + void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index] + bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]); + } // namespace basist diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc index 8244550959..205758b3d7 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc @@ -1,4 +1,4 @@ -// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc index fad45fe22d..f2d324fcc3 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc @@ -1,4 +1,4 @@ -// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h index f91314f4ff..457bd51e30 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h @@ -13,6 +13,7 @@ namespace basist const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8; const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30; + const uint32_t TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 = 27; // BC6H only supports only 5-bit pattern indices, BC7 supports 4-bit or 6-bit const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11; const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19; |