24 files changed, 489 insertions, 100 deletions
diff --git a/core/math/a_star_grid_2d.cpp b/core/math/a_star_grid_2d.cpp
index 82bf13cc1f..ad69748452 100644
--- a/core/math/a_star_grid_2d.cpp
+++ b/core/math/a_star_grid_2d.cpp
@@ -122,6 +122,10 @@ AStarGrid2D::CellShape AStarGrid2D::get_cell_shape() const {
 }
 
 void AStarGrid2D::update() {
+	if (!dirty) {
+		return;
+	}
+
 	points.clear();
 
 	const int32_t end_x = region.get_end().x;
diff --git a/core/math/aabb.h b/core/math/aabb.h
index 9a74266ff7..7a5581b5d4 100644
--- a/core/math/aabb.h
+++ b/core/math/aabb.h
@@ -41,7 +41,7 @@
 
 class Variant;
 
-struct _NO_DISCARD_ AABB {
+struct [[nodiscard]] AABB {
 	Vector3 position;
 	Vector3 size;
 
@@ -85,7 +85,7 @@ struct _NO_DISCARD_ AABB {
 	bool intersects_plane(const Plane &p_plane) const;
 
 	_FORCE_INLINE_ bool has_point(const Vector3 &p_point) const;
-	_FORCE_INLINE_ Vector3 get_support(const Vector3 &p_normal) const;
+	_FORCE_INLINE_ Vector3 get_support(const Vector3 &p_direction) const;
 
 	Vector3 get_longest_axis() const;
 	int get_longest_axis_index() const;
@@ -212,15 +212,18 @@ inline bool AABB::encloses(const AABB &p_aabb) const {
 			(src_max.z >= dst_max.z));
 }
 
-Vector3 AABB::get_support(const Vector3 &p_normal) const {
-	Vector3 half_extents = size * 0.5f;
-	Vector3 ofs = position + half_extents;
-
-	return Vector3(
-				   (p_normal.x > 0) ? half_extents.x : -half_extents.x,
-				   (p_normal.y > 0) ? half_extents.y : -half_extents.y,
-				   (p_normal.z > 0) ? half_extents.z : -half_extents.z) +
-			ofs;
+Vector3 AABB::get_support(const Vector3 &p_direction) const {
+	Vector3 support = position;
+	if (p_direction.x > 0.0f) {
+		support.x += size.x;
+	}
+	if (p_direction.y > 0.0f) {
+		support.y += size.y;
+	}
+	if (p_direction.z > 0.0f) {
+		support.z += size.z;
+	}
+	return support;
 }
 
 Vector3 AABB::get_endpoint(int p_point) const {
diff --git a/core/math/basis.h b/core/math/basis.h
index 918cbc18d4..5c1a5fbdda 100644
--- a/core/math/basis.h
+++ b/core/math/basis.h
@@ -34,7 +34,7 @@
 #include "core/math/quaternion.h"
 #include "core/math/vector3.h"
 
-struct _NO_DISCARD_ Basis {
+struct [[nodiscard]] Basis {
 	Vector3 rows[3] = {
 		Vector3(1, 0, 0),
 		Vector3(0, 1, 0),
diff --git a/core/math/color.cpp b/core/math/color.cpp
index d36306d968..1638acd74d 100644
--- a/core/math/color.cpp
+++ b/core/math/color.cpp
@@ -33,7 +33,7 @@
 #include "color_names.inc"
 #include "core/math/math_funcs.h"
 #include "core/string/ustring.h"
-#include "core/templates/rb_map.h"
+#include "core/templates/hash_map.h"
 
 #include "thirdparty/misc/ok_color.h"
 
@@ -414,7 +414,7 @@ Color Color::named(const String &p_name, const Color &p_default) {
 
 int Color::find_named_color(const String &p_name) {
 	String name = p_name;
-	// Normalize name
+	// Normalize name.
 	name = name.replace(" ", "");
 	name = name.replace("-", "");
 	name = name.replace("_", "");
@@ -422,23 +422,24 @@ int Color::find_named_color(const String &p_name) {
 	name = name.replace(".", "");
 	name = name.to_upper();
 
-	int idx = 0;
-	while (named_colors[idx].name != nullptr) {
-		if (name == String(named_colors[idx].name).replace("_", "")) {
-			return idx;
+	static HashMap<String, int> named_colors_hashmap;
+	if (unlikely(named_colors_hashmap.is_empty())) {
+		const int named_color_count = get_named_color_count();
+		for (int i = 0; i < named_color_count; i++) {
+			named_colors_hashmap[String(named_colors[i].name).replace("_", "")] = i;
 		}
-		idx++;
+	}
+
+	const HashMap<String, int>::ConstIterator E = named_colors_hashmap.find(name);
+	if (E) {
+		return E->value;
 	}
 
 	return -1;
 }
 
 int Color::get_named_color_count() {
-	int idx = 0;
-	while (named_colors[idx].name != nullptr) {
-		idx++;
-	}
-	return idx;
+	return sizeof(named_colors) / sizeof(NamedColor);
 }
 
 String Color::get_named_color_name(int p_idx) {
diff --git a/core/math/color.h b/core/math/color.h
index 65d7377c1c..70fad78acb 100644
--- a/core/math/color.h
+++ b/core/math/color.h
@@ -35,7 +35,7 @@
 
 class String;
 
-struct _NO_DISCARD_ Color {
+struct [[nodiscard]] Color {
 	union {
 		struct {
 			float r;
@@ -129,33 +129,46 @@ struct _NO_DISCARD_ Color {
 	}
 
 	_FORCE_INLINE_ uint32_t to_rgbe9995() const {
-		const float pow2to9 = 512.0f;
-		const float B = 15.0f;
-		const float N = 9.0f;
-
-		float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)
-
-		float cRed = MAX(0.0f, MIN(sharedexp, r));
-		float cGreen = MAX(0.0f, MIN(sharedexp, g));
-		float cBlue = MAX(0.0f, MIN(sharedexp, b));
-
-		float cMax = MAX(cRed, MAX(cGreen, cBlue));
-
-		float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;
-
-		float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);
-
-		float exps = expp + 1.0f;
-
-		if (0.0f <= sMax && sMax < pow2to9) {
-			exps = expp;
-		}
-
-		float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
-		float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
-		float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
-
-		return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
+		// https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
+		static const float kMaxVal = float(0x1FF << 7);
+		static const float kMinVal = float(1.f / (1 << 16));
+
+		// Clamp RGB to [0, 1.FF*2^16]
+		const float _r = CLAMP(r, 0.0f, kMaxVal);
+		const float _g = CLAMP(g, 0.0f, kMaxVal);
+		const float _b = CLAMP(b, 0.0f, kMaxVal);
+
+		// Compute the maximum channel, no less than 1.0*2^-15
+		const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));
+
+		// Take the exponent of the maximum channel (rounding up the 9th bit) and
+		// add 15 to it.  When added to the channels, it causes the implicit '1.0'
+		// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
+		// of the mantissa, rounding the truncated bits.
+		union {
+			float f;
+			int32_t i;
+		} R, G, B, E;
+
+		E.f = MaxChannel;
+		E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
+		E.i &= 0x7F800000; // Zero the mantissa
+
+		// This shifts the 9-bit values we need into the lowest bits, rounding as
+		// needed. Note that if the channel has a smaller exponent than the max
+		// channel, it will shift even more.  This is intentional.
+		R.f = _r + E.f;
+		G.f = _g + E.f;
+		B.f = _b + E.f;
+
+		// Convert the Bias to the correct exponent in the upper 5 bits.
+		E.i <<= 4;
+		E.i += 0x10000000;
+
+		// Combine the fields. RGB floats have unwanted data in the upper 9
+		// bits. Only red needs to mask them off because green and blue shift
+		// it out to the left.
+		return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
 	}
 
 	_FORCE_INLINE_ Color blend(const Color &p_over) const {
diff --git a/core/math/color_names.inc b/core/math/color_names.inc
index eaa1b1087f..6c0d2a4bfd 100644
--- a/core/math/color_names.inc
+++ b/core/math/color_names.inc
@@ -189,5 +189,4 @@ static NamedColor named_colors[] = {
 	{ "WHITE_SMOKE", Color::hex(0xF5F5F5FF) },
 	{ "YELLOW", Color::hex(0xFFFF00FF) },
 	{ "YELLOW_GREEN", Color::hex(0x9ACD32FF) },
-	{ nullptr, Color() },
 };
diff --git a/core/math/face3.h b/core/math/face3.h
index 3dd47d0226..519dcb6414 100644
--- a/core/math/face3.h
+++ b/core/math/face3.h
@@ -36,7 +36,7 @@
 #include "core/math/transform_3d.h"
 #include "core/math/vector3.h"
 
-struct _NO_DISCARD_ Face3 {
+struct [[nodiscard]] Face3 {
 	enum Side {
 		SIDE_OVER,
 		SIDE_UNDER,
diff --git a/core/math/geometry_2d.h b/core/math/geometry_2d.h
index 1502b2807c..83ebdc5a84 100644
--- a/core/math/geometry_2d.h
+++ b/core/math/geometry_2d.h
@@ -350,6 +350,8 @@ public:
 		return triangles;
 	}
 
+	// Assumes cartesian coordinate system with +x to the right, +y up.
+	// If using screen coordinates (+x to the right, +y down) the result will need to be flipped.
 	static bool is_polygon_clockwise(const Vector<Vector2> &p_polygon) {
 		int c = p_polygon.size();
 		if (c < 3) {
diff --git a/core/math/math_funcs.h b/core/math/math_funcs.h
index 3060f31970..fd53ed28fd 100644
--- a/core/math/math_funcs.h
+++ b/core/math/math_funcs.h
@@ -447,14 +447,22 @@ public:
 
 	static _ALWAYS_INLINE_ double smoothstep(double p_from, double p_to, double p_s) {
 		if (is_equal_approx(p_from, p_to)) {
-			return p_from;
+			if (likely(p_from <= p_to)) {
+				return p_s <= p_from ? 0.0 : 1.0;
+			} else {
+				return p_s <= p_to ? 1.0 : 0.0;
+			}
 		}
 		double s = CLAMP((p_s - p_from) / (p_to - p_from), 0.0, 1.0);
 		return s * s * (3.0 - 2.0 * s);
 	}
 	static _ALWAYS_INLINE_ float smoothstep(float p_from, float p_to, float p_s) {
 		if (is_equal_approx(p_from, p_to)) {
-			return p_from;
+			if (likely(p_from <= p_to)) {
+				return p_s <= p_from ? 0.0f : 1.0f;
+			} else {
+				return p_s <= p_to ? 1.0f : 0.0f;
+			}
 		}
 		float s = CLAMP((p_s - p_from) / (p_to - p_from), 0.0f, 1.0f);
 		return s * s * (3.0f - 2.0f * s);
diff --git a/core/math/plane.h b/core/math/plane.h
index 8159f25342..6529fea60a 100644
--- a/core/math/plane.h
+++ b/core/math/plane.h
@@ -35,7 +35,7 @@
 
 class Variant;
 
-struct _NO_DISCARD_ Plane {
+struct [[nodiscard]] Plane {
 	Vector3 normal;
 	real_t d = 0;
 
diff --git a/core/math/projection.h b/core/math/projection.h
index f3ed9d7b1c..5af43561c0 100644
--- a/core/math/projection.h
+++ b/core/math/projection.h
@@ -43,7 +43,7 @@ struct Rect2;
 struct Transform3D;
 struct Vector2;
 
-struct _NO_DISCARD_ Projection {
+struct [[nodiscard]] Projection {
 	enum Planes {
 		PLANE_NEAR,
 		PLANE_FAR,
diff --git a/core/math/quaternion.h b/core/math/quaternion.h
index 868a2916f5..655e55e0a2 100644
--- a/core/math/quaternion.h
+++ b/core/math/quaternion.h
@@ -35,7 +35,7 @@
 #include "core/math/vector3.h"
 #include "core/string/ustring.h"
 
-struct _NO_DISCARD_ Quaternion {
+struct [[nodiscard]] Quaternion {
 	union {
 		struct {
 			real_t x;
diff --git a/core/math/rect2.h b/core/math/rect2.h
index b4069ae86a..817923c134 100644
--- a/core/math/rect2.h
+++ b/core/math/rect2.h
@@ -38,7 +38,7 @@ class String;
 struct Rect2i;
 struct Transform2D;
 
-struct _NO_DISCARD_ Rect2 {
+struct [[nodiscard]] Rect2 {
 	Point2 position;
 	Size2 size;
 
@@ -285,13 +285,15 @@ struct _NO_DISCARD_ Rect2 {
 		return Rect2(position.round(), size.round());
 	}
 
-	Vector2 get_support(const Vector2 &p_normal) const {
-		Vector2 half_extents = size * 0.5f;
-		Vector2 ofs = position + half_extents;
-		return Vector2(
-					   (p_normal.x > 0) ? -half_extents.x : half_extents.x,
-					   (p_normal.y > 0) ? -half_extents.y : half_extents.y) +
-				ofs;
+	Vector2 get_support(const Vector2 &p_direction) const {
+		Vector2 support = position;
+		if (p_direction.x > 0.0f) {
+			support.x += size.x;
+		}
+		if (p_direction.y > 0.0f) {
+			support.y += size.y;
+		}
+		return support;
 	}
 
 	_FORCE_INLINE_ bool intersects_filled_polygon(const Vector2 *p_points, int p_point_count) const {
diff --git a/core/math/rect2i.h b/core/math/rect2i.h
index a1338da0bb..5f3a3d54f5 100644
--- a/core/math/rect2i.h
+++ b/core/math/rect2i.h
@@ -37,7 +37,7 @@
 class String;
 struct Rect2;
 
-struct _NO_DISCARD_ Rect2i {
+struct [[nodiscard]] Rect2i {
 	Point2i position;
 	Size2i size;
 
diff --git a/core/math/transform_2d.h b/core/math/transform_2d.h
index 4ec2dc119c..476577508f 100644
--- a/core/math/transform_2d.h
+++ b/core/math/transform_2d.h
@@ -38,7 +38,7 @@
 
 class String;
 
-struct _NO_DISCARD_ Transform2D {
+struct [[nodiscard]] Transform2D {
 	// Warning #1: basis of Transform2D is stored differently from Basis. In terms of columns array, the basis matrix looks like "on paper":
 	// M = (columns[0][0] columns[1][0])
 	//     (columns[0][1] columns[1][1])
diff --git a/core/math/transform_3d.h b/core/math/transform_3d.h
index 7d89b86c75..b1de233445 100644
--- a/core/math/transform_3d.h
+++ b/core/math/transform_3d.h
@@ -36,7 +36,7 @@
 #include "core/math/plane.h"
 #include "core/templates/vector.h"
 
-struct _NO_DISCARD_ Transform3D {
+struct [[nodiscard]] Transform3D {
 	Basis basis;
 	Vector3 origin;
 
diff --git a/core/math/transform_interpolator.cpp b/core/math/transform_interpolator.cpp
index 7cfe880b5a..1cd35b3d1a 100644
--- a/core/math/transform_interpolator.cpp
+++ b/core/math/transform_interpolator.cpp
@@ -31,46 +31,354 @@
 #include "transform_interpolator.h"
 
 #include "core/math/transform_2d.h"
+#include "core/math/transform_3d.h"
 
 void TransformInterpolator::interpolate_transform_2d(const Transform2D &p_prev, const Transform2D &p_curr, Transform2D &r_result, real_t p_fraction) {
-	// Extract parameters.
-	Vector2 p1 = p_prev.get_origin();
-	Vector2 p2 = p_curr.get_origin();
-
 	// Special case for physics interpolation, if flipping, don't interpolate basis.
 	// If the determinant polarity changes, the handedness of the coordinate system changes.
 	if (_sign(p_prev.determinant()) != _sign(p_curr.determinant())) {
 		r_result.columns[0] = p_curr.columns[0];
 		r_result.columns[1] = p_curr.columns[1];
-		r_result.set_origin(p1.lerp(p2, p_fraction));
+		r_result.set_origin(p_prev.get_origin().lerp(p_curr.get_origin(), p_fraction));
 		return;
 	}
 
-	real_t r1 = p_prev.get_rotation();
-	real_t r2 = p_curr.get_rotation();
+	r_result = p_prev.interpolate_with(p_curr, p_fraction);
+}
+
+void TransformInterpolator::interpolate_transform_3d(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction) {
+	r_result.origin = p_prev.origin + ((p_curr.origin - p_prev.origin) * p_fraction);
+	interpolate_basis(p_prev.basis, p_curr.basis, r_result.basis, p_fraction);
+}
+
+void TransformInterpolator::interpolate_basis(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction) {
+	Method method = find_method(p_prev, p_curr);
+	interpolate_basis_via_method(p_prev, p_curr, r_result, p_fraction, method);
+}
+
+void TransformInterpolator::interpolate_transform_3d_via_method(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction, Method p_method) {
+	r_result.origin = p_prev.origin + ((p_curr.origin - p_prev.origin) * p_fraction);
+	interpolate_basis_via_method(p_prev.basis, p_curr.basis, r_result.basis, p_fraction, p_method);
+}
+
+void TransformInterpolator::interpolate_basis_via_method(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction, Method p_method) {
+	switch (p_method) {
+		default: {
+			interpolate_basis_linear(p_prev, p_curr, r_result, p_fraction);
+		} break;
+		case INTERP_SLERP: {
+			r_result = _basis_slerp_unchecked(p_prev, p_curr, p_fraction);
+		} break;
+		case INTERP_SCALED_SLERP: {
+			interpolate_basis_scaled_slerp(p_prev, p_curr, r_result, p_fraction);
+		} break;
+	}
+}
+
+Quaternion TransformInterpolator::_basis_to_quat_unchecked(const Basis &p_basis) {
+	Basis m = p_basis;
+	real_t trace = m.rows[0][0] + m.rows[1][1] + m.rows[2][2];
+	real_t temp[4];
+
+	if (trace > 0.0) {
+		real_t s = Math::sqrt(trace + 1.0f);
+		temp[3] = (s * 0.5f);
+		s = 0.5f / s;
+
+		temp[0] = ((m.rows[2][1] - m.rows[1][2]) * s);
+		temp[1] = ((m.rows[0][2] - m.rows[2][0]) * s);
+		temp[2] = ((m.rows[1][0] - m.rows[0][1]) * s);
+	} else {
+		int i = m.rows[0][0] < m.rows[1][1]
+				? (m.rows[1][1] < m.rows[2][2] ? 2 : 1)
+				: (m.rows[0][0] < m.rows[2][2] ? 2 : 0);
+		int j = (i + 1) % 3;
+		int k = (i + 2) % 3;
 
-	Size2 s1 = p_prev.get_scale();
-	Size2 s2 = p_curr.get_scale();
+		real_t s = Math::sqrt(m.rows[i][i] - m.rows[j][j] - m.rows[k][k] + 1.0f);
+		temp[i] = s * 0.5f;
+		s = 0.5f / s;
 
-	// Slerp rotation.
-	Vector2 v1(Math::cos(r1), Math::sin(r1));
-	Vector2 v2(Math::cos(r2), Math::sin(r2));
+		temp[3] = (m.rows[k][j] - m.rows[j][k]) * s;
+		temp[j] = (m.rows[j][i] + m.rows[i][j]) * s;
+		temp[k] = (m.rows[k][i] + m.rows[i][k]) * s;
+	}
 
-	real_t dot = v1.dot(v2);
+	return Quaternion(temp[0], temp[1], temp[2], temp[3]);
+}
 
-	dot = CLAMP(dot, -1, 1);
+Quaternion TransformInterpolator::_quat_slerp_unchecked(const Quaternion &p_from, const Quaternion &p_to, real_t p_fraction) {
+	Quaternion to1;
+	real_t omega, cosom, sinom, scale0, scale1;
 
-	Vector2 v;
+	// Calculate cosine.
+	cosom = p_from.dot(p_to);
 
-	if (dot > 0.9995f) {
-		v = v1.lerp(v2, p_fraction).normalized(); // Linearly interpolate to avoid numerical precision issues.
+	// Adjust signs (if necessary)
+	if (cosom < 0.0f) {
+		cosom = -cosom;
+		to1.x = -p_to.x;
+		to1.y = -p_to.y;
+		to1.z = -p_to.z;
+		to1.w = -p_to.w;
 	} else {
-		real_t angle = p_fraction * Math::acos(dot);
-		Vector2 v3 = (v2 - v1 * dot).normalized();
-		v = v1 * Math::cos(angle) + v3 * Math::sin(angle);
+		to1.x = p_to.x;
+		to1.y = p_to.y;
+		to1.z = p_to.z;
+		to1.w = p_to.w;
+	}
+
+	// Calculate coefficients.
+
+	// This check could possibly be removed as we dealt with this
+	// case in the find_method() function, but is left for safety, it probably
+	// isn't a bottleneck.
+	if ((1.0f - cosom) > (real_t)CMP_EPSILON) {
+		// standard case (slerp)
+		omega = Math::acos(cosom);
+		sinom = Math::sin(omega);
+		scale0 = Math::sin((1.0f - p_fraction) * omega) / sinom;
+		scale1 = Math::sin(p_fraction * omega) / sinom;
+	} else {
+		// "from" and "to" quaternions are very close
+		//  ... so we can do a linear interpolation
+		scale0 = 1.0f - p_fraction;
+		scale1 = p_fraction;
+	}
+	// Calculate final values.
+	return Quaternion(
+			scale0 * p_from.x + scale1 * to1.x,
+			scale0 * p_from.y + scale1 * to1.y,
+			scale0 * p_from.z + scale1 * to1.z,
+			scale0 * p_from.w + scale1 * to1.w);
+}
+
+Basis TransformInterpolator::_basis_slerp_unchecked(Basis p_from, Basis p_to, real_t p_fraction) {
+	Quaternion from = _basis_to_quat_unchecked(p_from);
+	Quaternion to = _basis_to_quat_unchecked(p_to);
+
+	Basis b(_quat_slerp_unchecked(from, to, p_fraction));
+	return b;
+}
+
+void TransformInterpolator::interpolate_basis_scaled_slerp(Basis p_prev, Basis p_curr, Basis &r_result, real_t p_fraction) {
+	// Normalize both and find lengths.
+	Vector3 lengths_prev = _basis_orthonormalize(p_prev);
+	Vector3 lengths_curr = _basis_orthonormalize(p_curr);
+
+	r_result = _basis_slerp_unchecked(p_prev, p_curr, p_fraction);
+
+	// Now the result is unit length basis, we need to scale.
+	Vector3 lengths_lerped = lengths_prev + ((lengths_curr - lengths_prev) * p_fraction);
+
+	// Keep a note that the column / row order of the basis is weird,
+	// so keep an eye for bugs with this.
+	r_result[0] *= lengths_lerped;
+	r_result[1] *= lengths_lerped;
+	r_result[2] *= lengths_lerped;
+}
+
+void TransformInterpolator::interpolate_basis_linear(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction) {
+	// Interpolate basis.
+	r_result = p_prev.lerp(p_curr, p_fraction);
+
+	// It turns out we need to guard against zero scale basis.
+	// This is kind of silly, as we should probably fix the bugs elsewhere in Godot that can't deal with
+	// zero scale, but until that time...
+	for (int n = 0; n < 3; n++) {
+		Vector3 &axis = r_result[n];
+
+		// Not ok, this could cause errors due to bugs elsewhere,
+		// so we will bodge set this to a small value.
+		const real_t smallest = 0.0001f;
+		const real_t smallest_squared = smallest * smallest;
+		if (axis.length_squared() < smallest_squared) {
+			// Setting a different component to the smallest
+			// helps prevent the situation where all the axes are pointing in the same direction,
+			// which could be a problem for e.g. cross products...
+			axis[n] = smallest;
+		}
+	}
+}
+
+// Returns length.
+real_t TransformInterpolator::_vec3_normalize(Vector3 &p_vec) {
+	real_t lengthsq = p_vec.length_squared();
+	if (lengthsq == 0.0f) {
+		p_vec.x = p_vec.y = p_vec.z = 0.0f;
+		return 0.0f;
+	}
+	real_t length = Math::sqrt(lengthsq);
+	p_vec.x /= length;
+	p_vec.y /= length;
+	p_vec.z /= length;
+	return length;
+}
+
+// Returns lengths.
+Vector3 TransformInterpolator::_basis_orthonormalize(Basis &r_basis) {
+	// Gram-Schmidt Process.
+
+	Vector3 x = r_basis.get_column(0);
+	Vector3 y = r_basis.get_column(1);
+	Vector3 z = r_basis.get_column(2);
+
+	Vector3 lengths;
+
+	lengths.x = _vec3_normalize(x);
+	y = (y - x * (x.dot(y)));
+	lengths.y = _vec3_normalize(y);
+	z = (z - x * (x.dot(z)) - y * (y.dot(z)));
+	lengths.z = _vec3_normalize(z);
+
+	r_basis.set_column(0, x);
+	r_basis.set_column(1, y);
+	r_basis.set_column(2, z);
+
+	return lengths;
+}
+
+TransformInterpolator::Method TransformInterpolator::_test_basis(Basis p_basis, bool r_needed_normalize, Quaternion &r_quat) {
+	// Axis lengths.
+	Vector3 al = Vector3(p_basis.get_column(0).length_squared(),
+			p_basis.get_column(1).length_squared(),
+			p_basis.get_column(2).length_squared());
+
+	// Non unit scale?
+	if (r_needed_normalize || !_vec3_is_equal_approx(al, Vector3(1.0, 1.0, 1.0), (real_t)0.001f)) {
+		// If the basis is not normalized (at least approximately), it will fail the checks needed for slerp.
+		// So we try to detect a scaled (but not sheared) basis, which we *can* slerp by normalizing first,
+		// and lerping the scales separately.
+
+		// If any of the axes are really small, it is unlikely to be a valid rotation, or is scaled too small to deal with float error.
+		const real_t sl_epsilon = 0.00001f;
+		if ((al.x < sl_epsilon) ||
+				(al.y < sl_epsilon) ||
+				(al.z < sl_epsilon)) {
+			return INTERP_LERP;
+		}
+
+		// Normalize the basis.
+		Basis norm_basis = p_basis;
+
+		al.x = Math::sqrt(al.x);
+		al.y = Math::sqrt(al.y);
+		al.z = Math::sqrt(al.z);
+
+		norm_basis.set_column(0, norm_basis.get_column(0) / al.x);
+		norm_basis.set_column(1, norm_basis.get_column(1) / al.y);
+		norm_basis.set_column(2, norm_basis.get_column(2) / al.z);
+
+		// This doesn't appear necessary, as the later checks will catch it.
+		// if (!_basis_is_orthogonal_any_scale(norm_basis)) {
+		// return INTERP_LERP;
+		// }
+
+		p_basis = norm_basis;
+
+		// Orthonormalize not necessary as normal normalization(!) works if the
+		// axes are orthonormal.
+		// p_basis.orthonormalize();
+
+		// If we needed to normalize one of the two bases, we will need to normalize both,
+		// regardless of whether the 2nd needs it, just to make sure it takes the path to return
+		// INTERP_SCALED_LERP on the 2nd call of _test_basis.
+		r_needed_normalize = true;
+	}
+
+	// Apply less stringent tests than the built in slerp, the standard Godot slerp
+	// is too susceptible to float error to be useful.
+	real_t det = p_basis.determinant();
+	if (!Math::is_equal_approx(det, 1, (real_t)0.01f)) {
+		return INTERP_LERP;
+	}
+
+	if (!_basis_is_orthogonal(p_basis)) {
+		return INTERP_LERP;
+	}
+
+	// TODO: This could possibly be less stringent too, check this.
+	r_quat = _basis_to_quat_unchecked(p_basis);
+	if (!r_quat.is_normalized()) {
+		return INTERP_LERP;
+	}
+
+	return r_needed_normalize ? INTERP_SCALED_SLERP : INTERP_SLERP;
+}
+
+// This check doesn't seem to be needed but is preserved in case of bugs.
+bool TransformInterpolator::_basis_is_orthogonal_any_scale(const Basis &p_basis) {
+	Vector3 cross = p_basis.get_column(0).cross(p_basis.get_column(1));
+	real_t l = _vec3_normalize(cross);
+	// Too small numbers, revert to lerp.
+	if (l < 0.001f) {
+		return false;
+	}
+
+	const real_t epsilon = 0.9995f;
+
+	real_t dot = cross.dot(p_basis.get_column(2));
+	if (dot < epsilon) {
+		return false;
+	}
+
+	cross = p_basis.get_column(1).cross(p_basis.get_column(2));
+	l = _vec3_normalize(cross);
+	// Too small numbers, revert to lerp.
+	if (l < 0.001f) {
+		return false;
+	}
+
+	dot = cross.dot(p_basis.get_column(0));
+	if (dot < epsilon) {
+		return false;
+	}
+
+	return true;
+}
+
+bool TransformInterpolator::_basis_is_orthogonal(const Basis &p_basis, real_t p_epsilon) {
+	Basis identity;
+	Basis m = p_basis * p_basis.transposed();
+
+	// Less stringent tests than the standard Godot slerp.
+	if (!_vec3_is_equal_approx(m[0], identity[0], p_epsilon) || !_vec3_is_equal_approx(m[1], identity[1], p_epsilon) || !_vec3_is_equal_approx(m[2], identity[2], p_epsilon)) {
+		return false;
+	}
+	return true;
+}
+
+real_t TransformInterpolator::checksum_transform_3d(const Transform3D &p_transform) {
+	// just a really basic checksum, this can probably be improved
+	real_t sum = _vec3_sum(p_transform.origin);
+	sum -= _vec3_sum(p_transform.basis.rows[0]);
+	sum += _vec3_sum(p_transform.basis.rows[1]);
+	sum -= _vec3_sum(p_transform.basis.rows[2]);
+	return sum;
+}
+
+TransformInterpolator::Method TransformInterpolator::find_method(const Basis &p_a, const Basis &p_b) {
+	bool needed_normalize = false;
+
+	Quaternion q0;
+	Method method = _test_basis(p_a, needed_normalize, q0);
+	if (method == INTERP_LERP) {
+		return method;
+	}
+
+	Quaternion q1;
+	method = _test_basis(p_b, needed_normalize, q1);
+	if (method == INTERP_LERP) {
+		return method;
+	}
+
+	// Are they close together?
+	// Apply the same test that will revert to lerp as is present in the slerp routine.
+	// Calculate cosine.
+	real_t cosom = Math::abs(q0.dot(q1));
+	if ((1.0f - cosom) <= (real_t)CMP_EPSILON) {
+		return INTERP_LERP;
 	}
 
-	// Construct matrix.
-	r_result = Transform2D(Math::atan2(v.y, v.x), p1.lerp(p2, p_fraction));
-	r_result.scale_basis(s1.lerp(s2, p_fraction));
+	return method;
 }
diff --git a/core/math/transform_interpolator.h b/core/math/transform_interpolator.h
index a9bce2bd7f..cc556707e4 100644
--- a/core/math/transform_interpolator.h
+++ b/core/math/transform_interpolator.h
@@ -32,15 +32,64 @@
 #define TRANSFORM_INTERPOLATOR_H
 
 #include "core/math/math_defs.h"
+#include "core/math/vector3.h"
+
+// Keep all the functions for fixed timestep interpolation together.
+// There are two stages involved:
+// Finding a method, for determining the interpolation method between two
+// keyframes (which are physics ticks).
+// And applying that pre-determined method.
+
+// Pre-determining the method makes sense because it is expensive and often
+// several frames may occur between each physics tick, which will make it cheaper
+// than performing every frame.
 
 struct Transform2D;
+struct Transform3D;
+struct Basis;
+struct Quaternion;
 
 class TransformInterpolator {
+public:
+	enum Method {
+		INTERP_LERP,
+		INTERP_SLERP,
+		INTERP_SCALED_SLERP,
+	};
+
 private:
-	static bool _sign(real_t p_val) { return p_val >= 0; }
+	_FORCE_INLINE_ static bool _sign(real_t p_val) { return p_val >= 0; }
+	static real_t _vec3_sum(const Vector3 &p_pt) { return p_pt.x + p_pt.y + p_pt.z; }
+	static real_t _vec3_normalize(Vector3 &p_vec);
+	_FORCE_INLINE_ static bool _vec3_is_equal_approx(const Vector3 &p_a, const Vector3 &p_b, real_t p_tolerance) {
+		return Math::is_equal_approx(p_a.x, p_b.x, p_tolerance) && Math::is_equal_approx(p_a.y, p_b.y, p_tolerance) && Math::is_equal_approx(p_a.z, p_b.z, p_tolerance);
+	}
+	static Vector3 _basis_orthonormalize(Basis &r_basis);
+	static Method _test_basis(Basis p_basis, bool r_needed_normalize, Quaternion &r_quat);
+	static Basis _basis_slerp_unchecked(Basis p_from, Basis p_to, real_t p_fraction);
+	static Quaternion _quat_slerp_unchecked(const Quaternion &p_from, const Quaternion &p_to, real_t p_fraction);
+	static Quaternion _basis_to_quat_unchecked(const Basis &p_basis);
+	static bool _basis_is_orthogonal(const Basis &p_basis, real_t p_epsilon = 0.01f);
+	static bool _basis_is_orthogonal_any_scale(const Basis &p_basis);
+
+	static void interpolate_basis_linear(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction);
+	static void interpolate_basis_scaled_slerp(Basis p_prev, Basis p_curr, Basis &r_result, real_t p_fraction);
 
 public:
 	static void interpolate_transform_2d(const Transform2D &p_prev, const Transform2D &p_curr, Transform2D &r_result, real_t p_fraction);
+
+	// Generic functions, use when you don't know what method should be used, e.g. from GDScript.
+	// These will be slower.
+	static void interpolate_transform_3d(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction);
+	static void interpolate_basis(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction);
+
+	// Optimized function when you know ahead of time the method.
+	static void interpolate_transform_3d_via_method(const Transform3D &p_prev, const Transform3D &p_curr, Transform3D &r_result, real_t p_fraction, Method p_method);
+	static void interpolate_basis_via_method(const Basis &p_prev, const Basis &p_curr, Basis &r_result, real_t p_fraction, Method p_method);
+
+	static real_t checksum_transform_3d(const Transform3D &p_transform);
+
+	static Method find_method(const Basis &p_a, const Basis &p_b);
 };
 
 #endif // TRANSFORM_INTERPOLATOR_H
diff --git a/core/math/vector2.h b/core/math/vector2.h
index 8851942cdd..edb47db6fd 100644
--- a/core/math/vector2.h
+++ b/core/math/vector2.h
@@ -37,7 +37,7 @@
 class String;
 struct Vector2i;
 
-struct _NO_DISCARD_ Vector2 {
+struct [[nodiscard]] Vector2 {
 	static const int AXIS_COUNT = 2;
 
 	enum Axis {
diff --git a/core/math/vector2i.h b/core/math/vector2i.h
index aca9ae8272..fff9b0a658 100644
--- a/core/math/vector2i.h
+++ b/core/math/vector2i.h
@@ -37,7 +37,7 @@
 class String;
 struct Vector2;
 
-struct _NO_DISCARD_ Vector2i {
+struct [[nodiscard]] Vector2i {
 	static const int AXIS_COUNT = 2;
 
 	enum Axis {
diff --git a/core/math/vector3.h b/core/math/vector3.h
index 2313eb557a..14bc44c4e7 100644
--- a/core/math/vector3.h
+++ b/core/math/vector3.h
@@ -39,7 +39,7 @@ struct Basis;
 struct Vector2;
 struct Vector3i;
 
-struct _NO_DISCARD_ Vector3 {
+struct [[nodiscard]] Vector3 {
 	static const int AXIS_COUNT = 3;
 
 	enum Axis {
diff --git a/core/math/vector3i.h b/core/math/vector3i.h
index 035cfcf9e2..40d0700bf7 100644
--- a/core/math/vector3i.h
+++ b/core/math/vector3i.h
@@ -37,7 +37,7 @@
 class String;
 struct Vector3;
 
-struct _NO_DISCARD_ Vector3i {
+struct [[nodiscard]] Vector3i {
 	static const int AXIS_COUNT = 3;
 
 	enum Axis {
diff --git a/core/math/vector4.h b/core/math/vector4.h
index f69b4752bb..8632f69f57 100644
--- a/core/math/vector4.h
+++ b/core/math/vector4.h
@@ -38,7 +38,7 @@
 class String;
 struct Vector4i;
 
-struct _NO_DISCARD_ Vector4 {
+struct [[nodiscard]] Vector4 {
 	static const int AXIS_COUNT = 4;
 
 	enum Axis {
diff --git a/core/math/vector4i.h b/core/math/vector4i.h
index 8a9c580bc1..a9036d684a 100644
--- a/core/math/vector4i.h
+++ b/core/math/vector4i.h
@@ -37,7 +37,7 @@
 class String;
 struct Vector4;
 
-struct _NO_DISCARD_ Vector4i {
+struct [[nodiscard]] Vector4i {
 	static const int AXIS_COUNT = 4;
 
 	enum Axis {