diff options
Diffstat (limited to 'core')
-rw-r--r-- | core/error/error_macros.cpp | 51 | ||||
-rw-r--r-- | core/error/error_macros.h | 13 | ||||
-rw-r--r-- | core/extension/gdextension.cpp | 11 | ||||
-rw-r--r-- | core/io/file_access_pack.cpp | 4 | ||||
-rw-r--r-- | core/io/image.cpp | 106 | ||||
-rw-r--r-- | core/io/resource_loader.cpp | 10 | ||||
-rw-r--r-- | core/math/math_funcs.h | 12 | ||||
-rw-r--r-- | core/math/transform_interpolator.cpp | 338 | ||||
-rw-r--r-- | core/math/transform_interpolator.h | 51 | ||||
-rw-r--r-- | core/os/main_loop.h | 1 | ||||
-rw-r--r-- | core/string/ustring.cpp | 83 | ||||
-rw-r--r-- | core/templates/cowdata.h | 13 |
12 files changed, 651 insertions, 42 deletions
diff --git a/core/error/error_macros.cpp b/core/error/error_macros.cpp index 8376c0aaf8..813ee7684f 100644 --- a/core/error/error_macros.cpp +++ b/core/error/error_macros.cpp @@ -34,6 +34,12 @@ #include "core/os/os.h" #include "core/string/ustring.h" +// Optional physics interpolation warnings try to include the path to the relevant node. +#if defined(DEBUG_ENABLED) && defined(TOOLS_ENABLED) +#include "core/config/project_settings.h" +#include "scene/main/node.h" +#endif + static ErrorHandlerList *error_handler_list = nullptr; void add_error_handler(ErrorHandlerList *p_handler) { @@ -128,3 +134,48 @@ void _err_print_index_error(const char *p_function, const char *p_file, int p_li void _err_flush_stdout() { fflush(stdout); } + +// Prevent error spam by limiting the warnings to a certain frequency. +void _physics_interpolation_warning(const char *p_function, const char *p_file, int p_line, ObjectID p_id, const char *p_warn_string) { +#if defined(DEBUG_ENABLED) && defined(TOOLS_ENABLED) + const uint32_t warn_max = 2048; + const uint32_t warn_timeout_seconds = 15; + + static uint32_t warn_count = warn_max; + static uint32_t warn_timeout = warn_timeout_seconds; + + uint32_t time_now = UINT32_MAX; + + if (warn_count) { + warn_count--; + } + + if (!warn_count) { + time_now = OS::get_singleton()->get_ticks_msec() / 1000; + } + + if ((warn_count == 0) && (time_now >= warn_timeout)) { + warn_count = warn_max; + warn_timeout = time_now + warn_timeout_seconds; + + if (GLOBAL_GET("debug/settings/physics_interpolation/enable_warnings")) { + // UINT64_MAX means unused. + if (p_id.operator uint64_t() == UINT64_MAX) { + _err_print_error(p_function, p_file, p_line, "[Physics interpolation] " + String(p_warn_string) + " (possibly benign).", false, ERR_HANDLER_WARNING); + } else { + String node_name; + if (p_id.is_valid()) { + Node *node = Object::cast_to<Node>(ObjectDB::get_instance(p_id)); + if (node && node->is_inside_tree()) { + node_name = "\"" + String(node->get_path()) + "\""; + } else { + node_name = "\"unknown\""; + } + } + + _err_print_error(p_function, p_file, p_line, "[Physics interpolation] " + String(p_warn_string) + ": " + node_name + " (possibly benign).", false, ERR_HANDLER_WARNING); + } + } + } +#endif +} diff --git a/core/error/error_macros.h b/core/error/error_macros.h index ab7dbcbd44..d31adb72be 100644 --- a/core/error/error_macros.h +++ b/core/error/error_macros.h @@ -31,6 +31,7 @@ #ifndef ERROR_MACROS_H #define ERROR_MACROS_H +#include "core/object/object_id.h" #include "core/typedefs.h" #include <atomic> // We'd normally use safe_refcount.h, but that would cause circular includes. @@ -71,6 +72,8 @@ void _err_print_index_error(const char *p_function, const char *p_file, int p_li void _err_print_index_error(const char *p_function, const char *p_file, int p_line, int64_t p_index, int64_t p_size, const char *p_index_str, const char *p_size_str, const String &p_message, bool p_editor_notify = false, bool fatal = false); void _err_flush_stdout(); +void _physics_interpolation_warning(const char *p_function, const char *p_file, int p_line, ObjectID p_id, const char *p_warn_string); + #ifdef __GNUC__ //#define FUNCTION_STR __PRETTY_FUNCTION__ - too annoying #define FUNCTION_STR __FUNCTION__ @@ -832,4 +835,14 @@ void _err_flush_stdout(); #define DEV_CHECK_ONCE(m_cond) #endif +/** + * Physics Interpolation warnings. + * These are spam protection warnings. + */ +#define PHYSICS_INTERPOLATION_NODE_WARNING(m_object_id, m_string) \ + _physics_interpolation_warning(FUNCTION_STR, __FILE__, __LINE__, m_object_id, m_string) + +#define PHYSICS_INTERPOLATION_WARNING(m_string) \ + _physics_interpolation_warning(FUNCTION_STR, __FILE__, __LINE__, UINT64_MAX, m_string) + #endif // ERROR_MACROS_H diff --git a/core/extension/gdextension.cpp b/core/extension/gdextension.cpp index 8e2366fc95..cb6832ea39 100644 --- a/core/extension/gdextension.cpp +++ b/core/extension/gdextension.cpp @@ -781,23 +781,14 @@ Error GDExtension::open_library(const String &p_path, const String &p_entry_symb } } - String actual_lib_path; OS::GDExtensionData data = { true, // also_set_library_path - &actual_lib_path, // r_resolved_path + &library_path, // r_resolved_path Engine::get_singleton()->is_editor_hint(), // generate_temp_files &abs_dependencies_paths, // library_dependencies }; Error err = OS::get_singleton()->open_dynamic_library(abs_path, library, &data); - if (actual_lib_path.get_file() != abs_path.get_file()) { - // If temporary files are generated, let's change the library path to point at the original, - // because that's what we want to check to see if it's changed. - library_path = actual_lib_path.get_base_dir().path_join(p_path.get_file()); - } else { - library_path = actual_lib_path; - } - ERR_FAIL_COND_V_MSG(err == ERR_FILE_NOT_FOUND, err, "GDExtension dynamic library not found: " + abs_path); ERR_FAIL_COND_V_MSG(err != OK, err, "Can't open GDExtension dynamic library: " + abs_path); diff --git a/core/io/file_access_pack.cpp b/core/io/file_access_pack.cpp index 991b94db38..02bf0a6039 100644 --- a/core/io/file_access_pack.cpp +++ b/core/io/file_access_pack.cpp @@ -119,6 +119,10 @@ void PackedData::_free_packed_dirs(PackedDir *p_dir) { } PackedData::~PackedData() { + if (singleton == this) { + singleton = nullptr; + } + for (int i = 0; i < sources.size(); i++) { memdelete(sources[i]); } diff --git a/core/io/image.cpp b/core/io/image.cpp index d0598e4dc6..b35d405662 100644 --- a/core/io/image.cpp +++ b/core/io/image.cpp @@ -501,6 +501,38 @@ static void _convert(int p_width, int p_height, const uint8_t *p_src, uint8_t *p } } +template <typename T, uint32_t read_channels, uint32_t write_channels, T def_zero, T def_one> +static void _convert_fast(int p_width, int p_height, const T *p_src, T *p_dst) { + uint32_t dst_count = 0; + uint32_t src_count = 0; + + const int resolution = p_width * p_height; + + for (int i = 0; i < resolution; i++) { + memcpy(p_dst + dst_count, p_src + src_count, MIN(read_channels, write_channels) * sizeof(T)); + + if constexpr (write_channels > read_channels) { + const T def_value[4] = { def_zero, def_zero, def_zero, def_one }; + memcpy(p_dst + dst_count + read_channels, &def_value[read_channels], (write_channels - read_channels) * sizeof(T)); + } + + dst_count += write_channels; + src_count += read_channels; + } +} + +static bool _are_formats_compatible(Image::Format p_format0, Image::Format p_format1) { + if (p_format0 <= Image::FORMAT_RGBA8 && p_format1 <= Image::FORMAT_RGBA8) { + return true; + } else if (p_format0 <= Image::FORMAT_RGBAH && p_format0 >= Image::FORMAT_RH && p_format1 <= Image::FORMAT_RGBAH && p_format1 >= Image::FORMAT_RH) { + return true; + } else if (p_format0 <= Image::FORMAT_RGBAF && p_format0 >= Image::FORMAT_RF && p_format1 <= Image::FORMAT_RGBAF && p_format1 >= Image::FORMAT_RF) { + return true; + } + + return false; +} + void Image::convert(Format p_new_format) { ERR_FAIL_INDEX_MSG(p_new_format, FORMAT_MAX, "The Image format specified (" + itos(p_new_format) + ") is out of range. See Image's Format enum."); if (data.size() == 0) { @@ -517,7 +549,7 @@ void Image::convert(Format p_new_format) { if (Image::is_format_compressed(format) || Image::is_format_compressed(p_new_format)) { ERR_FAIL_MSG("Cannot convert to <-> from compressed formats. Use compress() and decompress() instead."); - } else if (format > FORMAT_RGBA8 || p_new_format > FORMAT_RGBA8) { + } else if (!_are_formats_compatible(format, p_new_format)) { //use put/set pixel which is slower but works with non byte formats Image new_img(width, height, mipmaps, p_new_format); @@ -648,6 +680,78 @@ void Image::convert(Format p_new_format) { case FORMAT_RGBA8 | (FORMAT_RGB8 << 8): _convert<3, true, 3, false, false, false>(mip_width, mip_height, rptr, wptr); break; + case FORMAT_RH | (FORMAT_RGH << 8): + _convert_fast<uint16_t, 1, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RH | (FORMAT_RGBH << 8): + _convert_fast<uint16_t, 1, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RH | (FORMAT_RGBAH << 8): + _convert_fast<uint16_t, 1, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGH | (FORMAT_RH << 8): + _convert_fast<uint16_t, 2, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGH | (FORMAT_RGBH << 8): + _convert_fast<uint16_t, 2, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGH | (FORMAT_RGBAH << 8): + _convert_fast<uint16_t, 2, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGBH | (FORMAT_RH << 8): + _convert_fast<uint16_t, 3, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGBH | (FORMAT_RGH << 8): + _convert_fast<uint16_t, 3, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGBH | (FORMAT_RGBAH << 8): + _convert_fast<uint16_t, 3, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGBAH | (FORMAT_RH << 8): + _convert_fast<uint16_t, 4, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGBAH | (FORMAT_RGH << 8): + _convert_fast<uint16_t, 4, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RGBAH | (FORMAT_RGBH << 8): + _convert_fast<uint16_t, 4, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr); + break; + case FORMAT_RF | (FORMAT_RGF << 8): + _convert_fast<uint32_t, 1, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RF | (FORMAT_RGBF << 8): + _convert_fast<uint32_t, 1, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RF | (FORMAT_RGBAF << 8): + _convert_fast<uint32_t, 1, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGF | (FORMAT_RF << 8): + _convert_fast<uint32_t, 2, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGF | (FORMAT_RGBF << 8): + _convert_fast<uint32_t, 2, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGF | (FORMAT_RGBAF << 8): + _convert_fast<uint32_t, 2, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGBF | (FORMAT_RF << 8): + _convert_fast<uint32_t, 3, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGBF | (FORMAT_RGF << 8): + _convert_fast<uint32_t, 3, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGBF | (FORMAT_RGBAF << 8): + _convert_fast<uint32_t, 3, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGBAF | (FORMAT_RF << 8): + _convert_fast<uint32_t, 4, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGBAF | (FORMAT_RGF << 8): + _convert_fast<uint32_t, 4, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; + case FORMAT_RGBAF | (FORMAT_RGBF << 8): + _convert_fast<uint32_t, 4, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr); + break; } } diff --git a/core/io/resource_loader.cpp b/core/io/resource_loader.cpp index c5582ad231..928bb95de3 100644 --- a/core/io/resource_loader.cpp +++ b/core/io/resource_loader.cpp @@ -474,6 +474,7 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path, bool ignoring_cache = p_cache_mode == ResourceFormatLoader::CACHE_MODE_IGNORE || p_cache_mode == ResourceFormatLoader::CACHE_MODE_IGNORE_DEEP; Ref<LoadToken> load_token; + bool must_not_register = false; ThreadLoadTask unregistered_load_task; // Once set, must be valid up to the call to do the load. ThreadLoadTask *load_task_ptr = nullptr; bool run_on_current_thread = false; @@ -516,8 +517,9 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path, } } - // Cache-ignoring tasks aren't registered in the map and so must finish within scope. - if (ignoring_cache) { + // If we want to ignore cache, but there's another task loading it, we can't add this one to the map and we also have to finish within scope. + must_not_register = ignoring_cache && thread_load_tasks.has(local_path); + if (must_not_register) { load_token->local_path.clear(); unregistered_load_task = load_task; load_task_ptr = &unregistered_load_task; @@ -528,7 +530,7 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path, } } - run_on_current_thread = ignoring_cache || p_thread_mode == LOAD_THREAD_FROM_CURRENT; + run_on_current_thread = must_not_register || p_thread_mode == LOAD_THREAD_FROM_CURRENT; if (run_on_current_thread) { load_task_ptr->thread_id = Thread::get_caller_id(); @@ -539,7 +541,7 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path, if (run_on_current_thread) { _thread_load_function(load_task_ptr); - if (ignoring_cache) { + if (must_not_register) { load_token->res_if_unregistered = load_task_ptr->resource; } } diff --git a/core/math/math_funcs.h b/core/math/math_funcs.h index 3060f31970..fd53ed28fd 100644 --- a/core/math/math_funcs.h +++ b/core/math/math_funcs.h @@ -447,14 +447,22 @@ public: static _ALWAYS_INLINE_ double smoothstep(double p_from, double p_to, double p_s) { if (is_equal_approx(p_from, p_to)) { - return p_from; + if (likely(p_from <= p_to)) { + return p_s <= p_from ? 0.0 : 1.0; + } else { + return p_s <= p_to ? 1.0 : 0.0; + } } double s = CLAMP((p_s - p_from) / (p_to - p_from), 0.0, 1.0); return s * s * (3.0 - 2.0 * s); } static _ALWAYS_INLINE_ float smoothstep(float p_from, float p_to, float p_s) { if (is_equal_approx(p_from, p_to)) { - return p_from; + if (likely(p_from <= p_to)) { + return p_s <= p_from ? 0.0f : 1.0f; + } else { + return p_s <= p_to ? 1.0f : 0.0f; + } } float s = CLAMP((p_s - p_from) / (p_to - p_from), 0.0f, 1.0f); return s * s * (3.0f - 2.0f * s); diff --git a/core/math/transform_interpolator.cpp b/core/math/transform_interpolator.cpp index 6a564b0ca7..1cd35b3d1a 100644 --- a/core/math/transform_interpolator.cpp +++ b/core/math/transform_interpolator.cpp @@ -31,6 +31,7 @@ #include "transform_interpolator.h" #include "core/math/transform_2d.h" +#include "core/math/transform_3d.h" void TransformInterpolator::interpolate_transform_2d(const Transform2D &p_prev, const Transform2D &p_curr, Transform2D &r_result, real_t p_fraction) { // Special case for physics interpolation, if flipping, don't interpolate basis. @@ -44,3 +45,340 @@ void TransformInterpolator::interpolate_transform_2d(const Transform2D &p_prev, r_result = p_prev.interpolate_with(p_curr, p_fraction); } + +void TransformInterpolator::interpolate_transform_3d(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction) { + r_result.origin = p_prev.origin + ((p_curr.origin - p_prev.origin) * p_fraction); + interpolate_basis(p_prev.basis, p_curr.basis, r_result.basis, p_fraction); +} + +void TransformInterpolator::interpolate_basis(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction) { + Method method = find_method(p_prev, p_curr); + interpolate_basis_via_method(p_prev, p_curr, r_result, p_fraction, method); +} + +void TransformInterpolator::interpolate_transform_3d_via_method(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction, Method p_method) { + r_result.origin = p_prev.origin + ((p_curr.origin - p_prev.origin) * p_fraction); + interpolate_basis_via_method(p_prev.basis, p_curr.basis, r_result.basis, p_fraction, p_method); +} + +void TransformInterpolator::interpolate_basis_via_method(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction, Method p_method) { + switch (p_method) { + default: { + interpolate_basis_linear(p_prev, p_curr, r_result, p_fraction); + } break; + case INTERP_SLERP: { + r_result = _basis_slerp_unchecked(p_prev, p_curr, p_fraction); + } break; + case INTERP_SCALED_SLERP: { + interpolate_basis_scaled_slerp(p_prev, p_curr, r_result, p_fraction); + } break; + } +} + +Quaternion TransformInterpolator::_basis_to_quat_unchecked(const Basis &p_basis) { + Basis m = p_basis; + real_t trace = m.rows[0][0] + m.rows[1][1] + m.rows[2][2]; + real_t temp[4]; + + if (trace > 0.0) { + real_t s = Math::sqrt(trace + 1.0f); + temp[3] = (s * 0.5f); + s = 0.5f / s; + + temp[0] = ((m.rows[2][1] - m.rows[1][2]) * s); + temp[1] = ((m.rows[0][2] - m.rows[2][0]) * s); + temp[2] = ((m.rows[1][0] - m.rows[0][1]) * s); + } else { + int i = m.rows[0][0] < m.rows[1][1] + ? (m.rows[1][1] < m.rows[2][2] ? 2 : 1) + : (m.rows[0][0] < m.rows[2][2] ? 2 : 0); + int j = (i + 1) % 3; + int k = (i + 2) % 3; + + real_t s = Math::sqrt(m.rows[i][i] - m.rows[j][j] - m.rows[k][k] + 1.0f); + temp[i] = s * 0.5f; + s = 0.5f / s; + + temp[3] = (m.rows[k][j] - m.rows[j][k]) * s; + temp[j] = (m.rows[j][i] + m.rows[i][j]) * s; + temp[k] = (m.rows[k][i] + m.rows[i][k]) * s; + } + + return Quaternion(temp[0], temp[1], temp[2], temp[3]); +} + +Quaternion TransformInterpolator::_quat_slerp_unchecked(const Quaternion &p_from, const Quaternion &p_to, real_t p_fraction) { + Quaternion to1; + real_t omega, cosom, sinom, scale0, scale1; + + // Calculate cosine. + cosom = p_from.dot(p_to); + + // Adjust signs (if necessary) + if (cosom < 0.0f) { + cosom = -cosom; + to1.x = -p_to.x; + to1.y = -p_to.y; + to1.z = -p_to.z; + to1.w = -p_to.w; + } else { + to1.x = p_to.x; + to1.y = p_to.y; + to1.z = p_to.z; + to1.w = p_to.w; + } + + // Calculate coefficients. + + // This check could possibly be removed as we dealt with this + // case in the find_method() function, but is left for safety, it probably + // isn't a bottleneck. + if ((1.0f - cosom) > (real_t)CMP_EPSILON) { + // standard case (slerp) + omega = Math::acos(cosom); + sinom = Math::sin(omega); + scale0 = Math::sin((1.0f - p_fraction) * omega) / sinom; + scale1 = Math::sin(p_fraction * omega) / sinom; + } else { + // "from" and "to" quaternions are very close + // ... so we can do a linear interpolation + scale0 = 1.0f - p_fraction; + scale1 = p_fraction; + } + // Calculate final values. + return Quaternion( + scale0 * p_from.x + scale1 * to1.x, + scale0 * p_from.y + scale1 * to1.y, + scale0 * p_from.z + scale1 * to1.z, + scale0 * p_from.w + scale1 * to1.w); +} + +Basis TransformInterpolator::_basis_slerp_unchecked(Basis p_from, Basis p_to, real_t p_fraction) { + Quaternion from = _basis_to_quat_unchecked(p_from); + Quaternion to = _basis_to_quat_unchecked(p_to); + + Basis b(_quat_slerp_unchecked(from, to, p_fraction)); + return b; +} + +void TransformInterpolator::interpolate_basis_scaled_slerp(Basis p_prev, Basis p_curr, Basis &r_result, real_t p_fraction) { + // Normalize both and find lengths. + Vector3 lengths_prev = _basis_orthonormalize(p_prev); + Vector3 lengths_curr = _basis_orthonormalize(p_curr); + + r_result = _basis_slerp_unchecked(p_prev, p_curr, p_fraction); + + // Now the result is unit length basis, we need to scale. + Vector3 lengths_lerped = lengths_prev + ((lengths_curr - lengths_prev) * p_fraction); + + // Keep a note that the column / row order of the basis is weird, + // so keep an eye for bugs with this. + r_result[0] *= lengths_lerped; + r_result[1] *= lengths_lerped; + r_result[2] *= lengths_lerped; +} + +void TransformInterpolator::interpolate_basis_linear(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction) { + // Interpolate basis. + r_result = p_prev.lerp(p_curr, p_fraction); + + // It turns out we need to guard against zero scale basis. + // This is kind of silly, as we should probably fix the bugs elsewhere in Godot that can't deal with + // zero scale, but until that time... + for (int n = 0; n < 3; n++) { + Vector3 &axis = r_result[n]; + + // Not ok, this could cause errors due to bugs elsewhere, + // so we will bodge set this to a small value. + const real_t smallest = 0.0001f; + const real_t smallest_squared = smallest * smallest; + if (axis.length_squared() < smallest_squared) { + // Setting a different component to the smallest + // helps prevent the situation where all the axes are pointing in the same direction, + // which could be a problem for e.g. cross products... + axis[n] = smallest; + } + } +} + +// Returns length. +real_t TransformInterpolator::_vec3_normalize(Vector3 &p_vec) { + real_t lengthsq = p_vec.length_squared(); + if (lengthsq == 0.0f) { + p_vec.x = p_vec.y = p_vec.z = 0.0f; + return 0.0f; + } + real_t length = Math::sqrt(lengthsq); + p_vec.x /= length; + p_vec.y /= length; + p_vec.z /= length; + return length; +} + +// Returns lengths. +Vector3 TransformInterpolator::_basis_orthonormalize(Basis &r_basis) { + // Gram-Schmidt Process. + + Vector3 x = r_basis.get_column(0); + Vector3 y = r_basis.get_column(1); + Vector3 z = r_basis.get_column(2); + + Vector3 lengths; + + lengths.x = _vec3_normalize(x); + y = (y - x * (x.dot(y))); + lengths.y = _vec3_normalize(y); + z = (z - x * (x.dot(z)) - y * (y.dot(z))); + lengths.z = _vec3_normalize(z); + + r_basis.set_column(0, x); + r_basis.set_column(1, y); + r_basis.set_column(2, z); + + return lengths; +} + +TransformInterpolator::Method TransformInterpolator::_test_basis(Basis p_basis, bool r_needed_normalize, Quaternion &r_quat) { + // Axis lengths. + Vector3 al = Vector3(p_basis.get_column(0).length_squared(), + p_basis.get_column(1).length_squared(), + p_basis.get_column(2).length_squared()); + + // Non unit scale? + if (r_needed_normalize || !_vec3_is_equal_approx(al, Vector3(1.0, 1.0, 1.0), (real_t)0.001f)) { + // If the basis is not normalized (at least approximately), it will fail the checks needed for slerp. + // So we try to detect a scaled (but not sheared) basis, which we *can* slerp by normalizing first, + // and lerping the scales separately. + + // If any of the axes are really small, it is unlikely to be a valid rotation, or is scaled too small to deal with float error. + const real_t sl_epsilon = 0.00001f; + if ((al.x < sl_epsilon) || + (al.y < sl_epsilon) || + (al.z < sl_epsilon)) { + return INTERP_LERP; + } + + // Normalize the basis. + Basis norm_basis = p_basis; + + al.x = Math::sqrt(al.x); + al.y = Math::sqrt(al.y); + al.z = Math::sqrt(al.z); + + norm_basis.set_column(0, norm_basis.get_column(0) / al.x); + norm_basis.set_column(1, norm_basis.get_column(1) / al.y); + norm_basis.set_column(2, norm_basis.get_column(2) / al.z); + + // This doesn't appear necessary, as the later checks will catch it. + // if (!_basis_is_orthogonal_any_scale(norm_basis)) { + // return INTERP_LERP; + // } + + p_basis = norm_basis; + + // Orthonormalize not necessary as normal normalization(!) works if the + // axes are orthonormal. + // p_basis.orthonormalize(); + + // If we needed to normalize one of the two bases, we will need to normalize both, + // regardless of whether the 2nd needs it, just to make sure it takes the path to return + // INTERP_SCALED_LERP on the 2nd call of _test_basis. + r_needed_normalize = true; + } + + // Apply less stringent tests than the built in slerp, the standard Godot slerp + // is too susceptible to float error to be useful. + real_t det = p_basis.determinant(); + if (!Math::is_equal_approx(det, 1, (real_t)0.01f)) { + return INTERP_LERP; + } + + if (!_basis_is_orthogonal(p_basis)) { + return INTERP_LERP; + } + + // TODO: This could possibly be less stringent too, check this. + r_quat = _basis_to_quat_unchecked(p_basis); + if (!r_quat.is_normalized()) { + return INTERP_LERP; + } + + return r_needed_normalize ? INTERP_SCALED_SLERP : INTERP_SLERP; +} + +// This check doesn't seem to be needed but is preserved in case of bugs. +bool TransformInterpolator::_basis_is_orthogonal_any_scale(const Basis &p_basis) { + Vector3 cross = p_basis.get_column(0).cross(p_basis.get_column(1)); + real_t l = _vec3_normalize(cross); + // Too small numbers, revert to lerp. + if (l < 0.001f) { + return false; + } + + const real_t epsilon = 0.9995f; + + real_t dot = cross.dot(p_basis.get_column(2)); + if (dot < epsilon) { + return false; + } + + cross = p_basis.get_column(1).cross(p_basis.get_column(2)); + l = _vec3_normalize(cross); + // Too small numbers, revert to lerp. + if (l < 0.001f) { + return false; + } + + dot = cross.dot(p_basis.get_column(0)); + if (dot < epsilon) { + return false; + } + + return true; +} + +bool TransformInterpolator::_basis_is_orthogonal(const Basis &p_basis, real_t p_epsilon) { + Basis identity; + Basis m = p_basis * p_basis.transposed(); + + // Less stringent tests than the standard Godot slerp. + if (!_vec3_is_equal_approx(m[0], identity[0], p_epsilon) || !_vec3_is_equal_approx(m[1], identity[1], p_epsilon) || !_vec3_is_equal_approx(m[2], identity[2], p_epsilon)) { + return false; + } + return true; +} + +real_t TransformInterpolator::checksum_transform_3d(const Transform3D &p_transform) { + // just a really basic checksum, this can probably be improved + real_t sum = _vec3_sum(p_transform.origin); + sum -= _vec3_sum(p_transform.basis.rows[0]); + sum += _vec3_sum(p_transform.basis.rows[1]); + sum -= _vec3_sum(p_transform.basis.rows[2]); + return sum; +} + +TransformInterpolator::Method TransformInterpolator::find_method(const Basis &p_a, const Basis &p_b) { + bool needed_normalize = false; + + Quaternion q0; + Method method = _test_basis(p_a, needed_normalize, q0); + if (method == INTERP_LERP) { + return method; + } + + Quaternion q1; + method = _test_basis(p_b, needed_normalize, q1); + if (method == INTERP_LERP) { + return method; + } + + // Are they close together? + // Apply the same test that will revert to lerp as is present in the slerp routine. + // Calculate cosine. + real_t cosom = Math::abs(q0.dot(q1)); + if ((1.0f - cosom) <= (real_t)CMP_EPSILON) { + return INTERP_LERP; + } + + return method; +} diff --git a/core/math/transform_interpolator.h b/core/math/transform_interpolator.h index a9bce2bd7f..cc556707e4 100644 --- a/core/math/transform_interpolator.h +++ b/core/math/transform_interpolator.h @@ -32,15 +32,64 @@ #define TRANSFORM_INTERPOLATOR_H #include "core/math/math_defs.h" +#include "core/math/vector3.h" + +// Keep all the functions for fixed timestep interpolation together. +// There are two stages involved: +// Finding a method, for determining the interpolation method between two +// keyframes (which are physics ticks). +// And applying that pre-determined method. + +// Pre-determining the method makes sense because it is expensive and often +// several frames may occur between each physics tick, which will make it cheaper +// than performing every frame. struct Transform2D; +struct Transform3D; +struct Basis; +struct Quaternion; class TransformInterpolator { +public: + enum Method { + INTERP_LERP, + INTERP_SLERP, + INTERP_SCALED_SLERP, + }; + private: - static bool _sign(real_t p_val) { return p_val >= 0; } + _FORCE_INLINE_ static bool _sign(real_t p_val) { return p_val >= 0; } + static real_t _vec3_sum(const Vector3 &p_pt) { return p_pt.x + p_pt.y + p_pt.z; } + static real_t _vec3_normalize(Vector3 &p_vec); + _FORCE_INLINE_ static bool _vec3_is_equal_approx(const Vector3 &p_a, const Vector3 &p_b, real_t p_tolerance) { + return Math::is_equal_approx(p_a.x, p_b.x, p_tolerance) && Math::is_equal_approx(p_a.y, p_b.y, p_tolerance) && Math::is_equal_approx(p_a.z, p_b.z, p_tolerance); + } + static Vector3 _basis_orthonormalize(Basis &r_basis); + static Method _test_basis(Basis p_basis, bool r_needed_normalize, Quaternion &r_quat); + static Basis _basis_slerp_unchecked(Basis p_from, Basis p_to, real_t p_fraction); + static Quaternion _quat_slerp_unchecked(const Quaternion &p_from, const Quaternion &p_to, real_t p_fraction); + static Quaternion _basis_to_quat_unchecked(const Basis &p_basis); + static bool _basis_is_orthogonal(const Basis &p_basis, real_t p_epsilon = 0.01f); + static bool _basis_is_orthogonal_any_scale(const Basis &p_basis); + + static void interpolate_basis_linear(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction); + static void interpolate_basis_scaled_slerp(Basis p_prev, Basis p_curr, Basis &r_result, real_t p_fraction); public: static void interpolate_transform_2d(const Transform2D &p_prev, const Transform2D &p_curr, Transform2D &r_result, real_t p_fraction); + + // Generic functions, use when you don't know what method should be used, e.g. from GDScript. + // These will be slower. + static void interpolate_transform_3d(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction); + static void interpolate_basis(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction); + + // Optimized function when you know ahead of time the method. + static void interpolate_transform_3d_via_method(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction, Method p_method); + static void interpolate_basis_via_method(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction, Method p_method); + + static real_t checksum_transform_3d(const Transform3D &p_transform); + + static Method find_method(const Basis &p_a, const Basis &p_b); }; #endif // TRANSFORM_INTERPOLATOR_H diff --git a/core/os/main_loop.h b/core/os/main_loop.h index e48541d074..9c22cbaf3c 100644 --- a/core/os/main_loop.h +++ b/core/os/main_loop.h @@ -64,6 +64,7 @@ public: virtual void initialize(); virtual void iteration_prepare() {} virtual bool physics_process(double p_time); + virtual void iteration_end() {} virtual bool process(double p_time); virtual void finalize(); diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 3d37e17ef8..cf19a1d48f 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -1639,13 +1639,43 @@ Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allo } String String::join(const Vector<String> &parts) const { + if (parts.is_empty()) { + return String(); + } else if (parts.size() == 1) { + return parts[0]; + } + + const int this_length = length(); + + int new_size = (parts.size() - 1) * this_length; + for (const String &part : parts) { + new_size += part.length(); + } + new_size += 1; + String ret; - for (int i = 0; i < parts.size(); ++i) { - if (i > 0) { - ret += *this; + ret.resize(new_size); + char32_t *ret_ptrw = ret.ptrw(); + const char32_t *this_ptr = ptr(); + + bool first = true; + for (const String &part : parts) { + if (first) { + first = false; + } else if (this_length) { + memcpy(ret_ptrw, this_ptr, this_length * sizeof(char32_t)); + ret_ptrw += this_length; + } + + const int part_length = part.length(); + if (part_length) { + memcpy(ret_ptrw, part.ptr(), part_length * sizeof(char32_t)); + ret_ptrw += part_length; } - ret += parts[i]; } + + *ret_ptrw = 0; + return ret; } @@ -3149,7 +3179,7 @@ Vector<uint8_t> String::sha256_buffer() const { } String String::insert(int p_at_pos, const String &p_string) const { - if (p_at_pos < 0) { + if (p_string.is_empty() || p_at_pos < 0) { return *this; } @@ -3157,17 +3187,27 @@ String String::insert(int p_at_pos, const String &p_string) const { p_at_pos = length(); } - String pre; + String ret; + ret.resize(length() + p_string.length() + 1); + char32_t *ret_ptrw = ret.ptrw(); + const char32_t *this_ptr = ptr(); + if (p_at_pos > 0) { - pre = substr(0, p_at_pos); + memcpy(ret_ptrw, this_ptr, p_at_pos * sizeof(char32_t)); + ret_ptrw += p_at_pos; } - String post; + memcpy(ret_ptrw, p_string.ptr(), p_string.length() * sizeof(char32_t)); + ret_ptrw += p_string.length(); + if (p_at_pos < length()) { - post = substr(p_at_pos, length() - p_at_pos); + memcpy(ret_ptrw, this_ptr + p_at_pos, (length() - p_at_pos) * sizeof(char32_t)); + ret_ptrw += length() - p_at_pos; } - return pre + p_string + post; + *ret_ptrw = 0; + + return ret; } String String::erase(int p_pos, int p_chars) const { @@ -5321,6 +5361,11 @@ String String::lpad(int min_length, const String &character) const { // "fish %s %d pie" % ["frog", 12] // In case of an error, the string returned is the error description and "error" is true. String String::sprintf(const Array &values, bool *error) const { + static const String ZERO("0"); + static const String SPACE(" "); + static const String MINUS("-"); + static const String PLUS("+"); + String formatted; char32_t *self = (char32_t *)get_data(); bool in_format = false; @@ -5343,7 +5388,7 @@ String String::sprintf(const Array &values, bool *error) const { if (in_format) { // We have % - let's see what else we get. switch (c) { case '%': { // Replace %% with % - formatted += chr(c); + formatted += c; in_format = false; break; } @@ -5393,7 +5438,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. int pad_chars_count = (negative || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeros ? String("0") : String(" "); + const String &pad_char = pad_with_zeros ? ZERO : SPACE; if (left_justified) { str = str.rpad(pad_chars_count, pad_char); } else { @@ -5402,7 +5447,7 @@ String String::sprintf(const Array &values, bool *error) const { // Sign. if (show_sign || negative) { - String sign_char = negative ? "-" : "+"; + const String &sign_char = negative ? MINUS : PLUS; if (left_justified) { str = str.insert(0, sign_char); } else { @@ -5439,7 +5484,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. Leave room for sign later if required. int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars; - String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros + const String &pad_char = (pad_with_zeros && is_finite) ? ZERO : SPACE; // Never pad NaN or inf with zeros if (left_justified) { str = str.rpad(pad_chars_count, pad_char); } else { @@ -5448,7 +5493,7 @@ String String::sprintf(const Array &values, bool *error) const { // Add sign if needed. if (show_sign || is_negative) { - String sign_char = is_negative ? "-" : "+"; + const String &sign_char = is_negative ? MINUS : PLUS; if (left_justified) { str = str.insert(0, sign_char); } else { @@ -5501,7 +5546,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. Leave room for sign later if required. int pad_chars_count = val < 0 ? min_chars - 1 : min_chars; - String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros + const String &pad_char = (pad_with_zeros && is_finite) ? ZERO : SPACE; // Never pad NaN or inf with zeros if (left_justified) { number_str = number_str.rpad(pad_chars_count, pad_char); } else { @@ -5511,9 +5556,9 @@ String String::sprintf(const Array &values, bool *error) const { // Add sign if needed. if (val < 0) { if (left_justified) { - number_str = number_str.insert(0, "-"); + number_str = number_str.insert(0, MINUS); } else { - number_str = number_str.insert(pad_with_zeros ? 0 : number_str.length() - initial_len, "-"); + number_str = number_str.insert(pad_with_zeros ? 0 : number_str.length() - initial_len, MINUS); } } @@ -5678,7 +5723,7 @@ String String::sprintf(const Array &values, bool *error) const { in_decimals = false; break; default: - formatted += chr(c); + formatted += c; } } } diff --git a/core/templates/cowdata.h b/core/templates/cowdata.h index f22ae1f1d3..6f818956ea 100644 --- a/core/templates/cowdata.h +++ b/core/templates/cowdata.h @@ -222,12 +222,15 @@ public: } Error insert(Size p_pos, const T &p_val) { - ERR_FAIL_INDEX_V(p_pos, size() + 1, ERR_INVALID_PARAMETER); - resize(size() + 1); - for (Size i = (size() - 1); i > p_pos; i--) { - set(i, get(i - 1)); + Size new_size = size() + 1; + ERR_FAIL_INDEX_V(p_pos, new_size, ERR_INVALID_PARAMETER); + Error err = resize(new_size); + ERR_FAIL_COND_V(err, err); + T *p = ptrw(); + for (Size i = new_size - 1; i > p_pos; i--) { + p[i] = p[i - 1]; } - set(p_pos, p_val); + p[p_pos] = p_val; return OK; } |